refactor(api): migrate console/service_api.dataset to BaseModel (#36480)

This commit is contained in:
chariri 2026-05-23 02:39:07 +09:00 committed by GitHub
parent 4d8b6c7dc0
commit 790ca72627
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 2850 additions and 839 deletions

View File

@ -1,15 +1,16 @@
from typing import Any, cast
from datetime import datetime
from typing import Any
from flask import request
from flask_restx import Resource, fields, marshal, marshal_with
from pydantic import BaseModel, Field, field_validator
from flask_restx import Resource
from pydantic import BaseModel, Field, field_validator, model_validator
from sqlalchemy import func, select
from werkzeug.exceptions import Forbidden, NotFound
import services
from configs import dify_config
from controllers.common.fields import ApiBaseUrlResponse, SimpleResultResponse, UsageCheckResponse
from controllers.common.schema import get_or_create_model, register_response_schema_models, register_schema_models
from controllers.common.schema import query_params_from_model, register_response_schema_models, register_schema_models
from controllers.console import console_ns
from controllers.console.apikey import ApiKeyItem, ApiKeyList
from controllers.console.app.error import ProviderNotInitializeError
@ -30,26 +31,10 @@ from core.rag.extractor.entity.extract_setting import ExtractSetting, NotionInfo
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db
from fields.app_fields import app_detail_kernel_fields, related_app_list
from fields.dataset_fields import (
content_fields,
dataset_detail_fields,
dataset_fields,
dataset_query_detail_fields,
dataset_retrieval_model_fields,
doc_metadata_fields,
external_knowledge_info_fields,
external_retrieval_model_fields,
file_info_fields,
icon_info_fields,
keyword_setting_fields,
reranking_model_fields,
tag_fields,
vector_setting_fields,
weighted_score_fields,
)
from fields.document_fields import document_status_fields
from fields.base import ResponseModel
from fields.dataset_fields import DatasetDetailResponse
from graphon.model_runtime.entities.model_entities import ModelType
from libs.helper import build_icon_url, dump_response, to_timestamp
from libs.login import current_account_with_tenant, login_required
from libs.url_utils import normalize_api_base_url
from models import ApiToken, Dataset, Document, DocumentSegment, UploadFile
@ -61,58 +46,6 @@ from services.dataset_service import DatasetPermissionService, DatasetService, D
register_response_schema_models(console_ns, ApiBaseUrlResponse, SimpleResultResponse, UsageCheckResponse)
# Register models for flask_restx to avoid dict type issues in Swagger
dataset_base_model = get_or_create_model("DatasetBase", dataset_fields)
tag_model = get_or_create_model("Tag", tag_fields)
keyword_setting_model = get_or_create_model("DatasetKeywordSetting", keyword_setting_fields)
vector_setting_model = get_or_create_model("DatasetVectorSetting", vector_setting_fields)
weighted_score_fields_copy = weighted_score_fields.copy()
weighted_score_fields_copy["keyword_setting"] = fields.Nested(keyword_setting_model)
weighted_score_fields_copy["vector_setting"] = fields.Nested(vector_setting_model)
weighted_score_model = get_or_create_model("DatasetWeightedScore", weighted_score_fields_copy)
reranking_model = get_or_create_model("DatasetRerankingModel", reranking_model_fields)
dataset_retrieval_model_fields_copy = dataset_retrieval_model_fields.copy()
dataset_retrieval_model_fields_copy["reranking_model"] = fields.Nested(reranking_model)
dataset_retrieval_model_fields_copy["weights"] = fields.Nested(weighted_score_model, allow_null=True)
dataset_retrieval_model = get_or_create_model("DatasetRetrievalModel", dataset_retrieval_model_fields_copy)
external_knowledge_info_model = get_or_create_model("ExternalKnowledgeInfo", external_knowledge_info_fields)
external_retrieval_model = get_or_create_model("ExternalRetrievalModel", external_retrieval_model_fields)
doc_metadata_model = get_or_create_model("DatasetDocMetadata", doc_metadata_fields)
icon_info_model = get_or_create_model("DatasetIconInfo", icon_info_fields)
dataset_detail_fields_copy = dataset_detail_fields.copy()
dataset_detail_fields_copy["retrieval_model_dict"] = fields.Nested(dataset_retrieval_model)
dataset_detail_fields_copy["tags"] = fields.List(fields.Nested(tag_model))
dataset_detail_fields_copy["external_knowledge_info"] = fields.Nested(external_knowledge_info_model)
dataset_detail_fields_copy["external_retrieval_model"] = fields.Nested(external_retrieval_model, allow_null=True)
dataset_detail_fields_copy["doc_metadata"] = fields.List(fields.Nested(doc_metadata_model))
dataset_detail_fields_copy["icon_info"] = fields.Nested(icon_info_model)
dataset_detail_model = get_or_create_model("DatasetDetail", dataset_detail_fields_copy)
file_info_model = get_or_create_model("DatasetFileInfo", file_info_fields)
content_fields_copy = content_fields.copy()
content_fields_copy["file_info"] = fields.Nested(file_info_model, allow_null=True)
content_model = get_or_create_model("DatasetContent", content_fields_copy)
dataset_query_detail_fields_copy = dataset_query_detail_fields.copy()
dataset_query_detail_fields_copy["queries"] = fields.Nested(content_model)
dataset_query_detail_model = get_or_create_model("DatasetQueryDetail", dataset_query_detail_fields_copy)
app_detail_kernel_model = get_or_create_model("AppDetailKernel", app_detail_kernel_fields)
related_app_list_copy = related_app_list.copy()
related_app_list_copy["data"] = fields.List(fields.Nested(app_detail_kernel_model))
related_app_list_model = get_or_create_model("RelatedAppList", related_app_list_copy)
def _validate_indexing_technique(value: str | None) -> str | None:
if value is None:
@ -208,9 +141,165 @@ class ConsoleDatasetListQuery(BaseModel):
tag_ids: list[str] = Field(default_factory=list, description="Filter by tag IDs")
class DatasetListItemResponse(DatasetDetailResponse):
partial_member_list: list[str]
class DatasetListResponse(ResponseModel):
data: list[DatasetListItemResponse]
has_more: bool
limit: int
total: int
page: int
class DatasetDetailWithPartialMembersResponse(DatasetDetailResponse):
partial_member_list: list[str] | None = None
class DatasetQueryFileInfoResponse(ResponseModel):
id: str
name: str
size: int
extension: str
mime_type: str
source_url: str
class DatasetQueryContentResponse(ResponseModel):
content_type: str
content: str
file_info: DatasetQueryFileInfoResponse | None = None
class DatasetQueryDetailResponse(ResponseModel):
id: str
queries: list[DatasetQueryContentResponse]
source: str
source_app_id: str | None
created_by_role: str
created_by: str
created_at: int
@field_validator("created_at", mode="before")
@classmethod
def _normalize_created_at(cls, value: datetime | int | None) -> int | None:
return to_timestamp(value)
class DatasetQueryListResponse(ResponseModel):
data: list[DatasetQueryDetailResponse]
has_more: bool
limit: int
total: int
page: int
class RelatedAppResponse(ResponseModel):
id: str
name: str
description: str
mode: str = Field(validation_alias="mode_compatible_with_agent")
icon_type: str | None
icon: str | None
icon_background: str | None
icon_url: str | None = None
@model_validator(mode="after")
def _set_icon_url(self) -> "RelatedAppResponse":
self.icon_url = self.icon_url or build_icon_url(self.icon_type, self.icon)
return self
class RelatedAppListResponse(ResponseModel):
data: list[RelatedAppResponse]
total: int
class DocumentStatusResponse(ResponseModel):
id: str
indexing_status: str
processing_started_at: int | None
parsing_completed_at: int | None
cleaning_completed_at: int | None
splitting_completed_at: int | None
completed_at: int | None
paused_at: int | None
error: str | None
stopped_at: int | None
completed_segments: int | None = None
total_segments: int | None = None
@field_validator(
"processing_started_at",
"parsing_completed_at",
"cleaning_completed_at",
"splitting_completed_at",
"completed_at",
"paused_at",
"stopped_at",
mode="before",
)
@classmethod
def _normalize_timestamp(cls, value: datetime | int | None) -> int | None:
return to_timestamp(value)
class DocumentStatusListResponse(ResponseModel):
data: list[DocumentStatusResponse]
class ErrorDocsResponse(DocumentStatusListResponse):
total: int
class IndexingEstimatePreviewItemResponse(ResponseModel):
content: str
child_chunks: list[str] | None = None
summary: str | None = None
class IndexingEstimateQaPreviewItemResponse(ResponseModel):
question: str
answer: str
class IndexingEstimateResponse(ResponseModel):
total_segments: int
preview: list[IndexingEstimatePreviewItemResponse]
qa_preview: list[IndexingEstimateQaPreviewItemResponse] | None = None
class RetrievalSettingResponse(ResponseModel):
retrieval_method: list[str]
class PartialMemberListResponse(ResponseModel):
data: list[str]
class AutoDisableLogsResponse(ResponseModel):
document_ids: list[str]
count: int
register_schema_models(
console_ns, DatasetCreatePayload, DatasetUpdatePayload, IndexingEstimatePayload, ConsoleDatasetListQuery
)
register_response_schema_models(
console_ns,
DatasetDetailResponse,
DatasetDetailWithPartialMembersResponse,
DatasetListResponse,
DatasetQueryListResponse,
IndexingEstimateResponse,
RelatedAppListResponse,
DocumentStatusListResponse,
ErrorDocsResponse,
RetrievalSettingResponse,
PartialMemberListResponse,
AutoDisableLogsResponse,
)
def _get_retrieval_methods_by_vector_type(vector_type: str | None, is_mock: bool = False) -> dict[str, list[str]]:
@ -293,17 +382,8 @@ def _get_retrieval_methods_by_vector_type(vector_type: str | None, is_mock: bool
class DatasetListApi(Resource):
@console_ns.doc("get_datasets")
@console_ns.doc(description="Get list of datasets")
@console_ns.doc(
params={
"page": "Page number (default: 1)",
"limit": "Number of items per page (default: 20)",
"ids": "Filter by dataset IDs (list)",
"keyword": "Search keyword",
"tag_ids": "Filter by tag IDs (list)",
"include_all": "Include all datasets (default: false)",
}
)
@console_ns.response(200, "Datasets retrieved successfully")
@console_ns.doc(params=query_params_from_model(ConsoleDatasetListQuery))
@console_ns.response(200, "Datasets retrieved successfully", console_ns.models[DatasetListResponse.__name__])
@setup_required
@login_required
@account_initialization_required
@ -342,7 +422,7 @@ class DatasetListApi(Resource):
for embedding_model in embedding_models:
model_names.append(f"{embedding_model.model}:{embedding_model.provider.provider}")
data = cast(list[dict[str, Any]], marshal(datasets, dataset_detail_fields))
data = [dump_response(DatasetDetailResponse, dataset) for dataset in datasets]
dataset_ids = [item["id"] for item in data if item.get("permission") == "partial_members"]
partial_members_map: dict[str, list[str]] = {}
if dataset_ids:
@ -379,12 +459,12 @@ class DatasetListApi(Resource):
"total": total,
"page": query.page,
}
return response, 200
return dump_response(DatasetListResponse, response), 200
@console_ns.doc("create_dataset")
@console_ns.doc(description="Create a new dataset")
@console_ns.expect(console_ns.models[DatasetCreatePayload.__name__])
@console_ns.response(201, "Dataset created successfully")
@console_ns.response(201, "Dataset created successfully", console_ns.models[DatasetDetailResponse.__name__])
@console_ns.response(400, "Invalid request parameters")
@setup_required
@login_required
@ -413,7 +493,7 @@ class DatasetListApi(Resource):
except services.errors.dataset.DatasetNameDuplicateError:
raise DatasetNameDuplicateError()
return marshal(dataset, dataset_detail_fields), 201
return dump_response(DatasetDetailResponse, dataset), 201
@console_ns.route("/datasets/<uuid:dataset_id>")
@ -421,7 +501,11 @@ class DatasetApi(Resource):
@console_ns.doc("get_dataset")
@console_ns.doc(description="Get dataset details")
@console_ns.doc(params={"dataset_id": "Dataset ID"})
@console_ns.response(200, "Dataset retrieved successfully", dataset_detail_model)
@console_ns.response(
200,
"Dataset retrieved successfully",
console_ns.models[DatasetDetailWithPartialMembersResponse.__name__],
)
@console_ns.response(404, "Dataset not found")
@console_ns.response(403, "Permission denied")
@setup_required
@ -437,7 +521,7 @@ class DatasetApi(Resource):
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
data = cast(dict[str, Any], marshal(dataset, dataset_detail_fields))
data = dump_response(DatasetDetailResponse, dataset)
if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY:
if dataset.embedding_model_provider:
provider_id = ModelProviderID(dataset.embedding_model_provider)
@ -470,7 +554,11 @@ class DatasetApi(Resource):
@console_ns.doc("update_dataset")
@console_ns.doc(description="Update dataset details")
@console_ns.expect(console_ns.models[DatasetUpdatePayload.__name__])
@console_ns.response(200, "Dataset updated successfully", dataset_detail_model)
@console_ns.response(
200,
"Dataset updated successfully",
console_ns.models[DatasetDetailWithPartialMembersResponse.__name__],
)
@console_ns.response(404, "Dataset not found")
@console_ns.response(403, "Permission denied")
@setup_required
@ -506,7 +594,7 @@ class DatasetApi(Resource):
if dataset is None:
raise NotFound("Dataset not found.")
result_data = cast(dict[str, Any], marshal(dataset, dataset_detail_fields))
result_data = dump_response(DatasetDetailResponse, dataset)
tenant_id = current_tenant_id
if payload.partial_member_list is not None and payload.permission == DatasetPermissionEnum.PARTIAL_TEAM:
@ -567,7 +655,11 @@ class DatasetQueryApi(Resource):
@console_ns.doc("get_dataset_queries")
@console_ns.doc(description="Get dataset query history")
@console_ns.doc(params={"dataset_id": "Dataset ID"})
@console_ns.response(200, "Query history retrieved successfully", dataset_query_detail_model)
@console_ns.response(
200,
"Query history retrieved successfully",
console_ns.models[DatasetQueryListResponse.__name__],
)
@setup_required
@login_required
@account_initialization_required
@ -589,20 +681,24 @@ class DatasetQueryApi(Resource):
dataset_queries, total = DatasetService.get_dataset_queries(dataset_id=dataset.id, page=page, per_page=limit)
response = {
"data": marshal(dataset_queries, dataset_query_detail_model),
"data": dataset_queries,
"has_more": len(dataset_queries) == limit,
"limit": limit,
"total": total,
"page": page,
}
return response, 200
return dump_response(DatasetQueryListResponse, response), 200
@console_ns.route("/datasets/indexing-estimate")
class DatasetIndexingEstimateApi(Resource):
@console_ns.doc("estimate_dataset_indexing")
@console_ns.doc(description="Estimate dataset indexing cost")
@console_ns.response(200, "Indexing estimate calculated successfully")
@console_ns.response(
200,
"Indexing estimate calculated successfully",
console_ns.models[IndexingEstimateResponse.__name__],
)
@setup_required
@login_required
@account_initialization_required
@ -699,11 +795,14 @@ class DatasetRelatedAppListApi(Resource):
@console_ns.doc("get_dataset_related_apps")
@console_ns.doc(description="Get applications related to dataset")
@console_ns.doc(params={"dataset_id": "Dataset ID"})
@console_ns.response(200, "Related apps retrieved successfully", related_app_list_model)
@console_ns.response(
200,
"Related apps retrieved successfully",
console_ns.models[RelatedAppListResponse.__name__],
)
@setup_required
@login_required
@account_initialization_required
@marshal_with(related_app_list_model)
def get(self, dataset_id):
current_user, _ = current_account_with_tenant()
dataset_id_str = str(dataset_id)
@ -724,7 +823,7 @@ class DatasetRelatedAppListApi(Resource):
if app_model:
related_apps.append(app_model)
return {"data": related_apps, "total": len(related_apps)}, 200
return dump_response(RelatedAppListResponse, {"data": related_apps, "total": len(related_apps)}), 200
@console_ns.route("/datasets/<uuid:dataset_id>/indexing-status")
@ -732,7 +831,11 @@ class DatasetIndexingStatusApi(Resource):
@console_ns.doc("get_dataset_indexing_status")
@console_ns.doc(description="Get dataset indexing status")
@console_ns.doc(params={"dataset_id": "Dataset ID"})
@console_ns.response(200, "Indexing status retrieved successfully")
@console_ns.response(
200,
"Indexing status retrieved successfully",
console_ns.models[DocumentStatusListResponse.__name__],
)
@setup_required
@login_required
@account_initialization_required
@ -778,9 +881,8 @@ class DatasetIndexingStatusApi(Resource):
"completed_segments": completed_segments,
"total_segments": total_segments,
}
documents_status.append(marshal(document_dict, document_status_fields))
data = {"data": documents_status}
return data, 200
documents_status.append(document_dict)
return dump_response(DocumentStatusListResponse, {"data": documents_status}), 200
@console_ns.route("/datasets/api-keys")
@ -907,13 +1009,18 @@ class DatasetApiBaseUrlApi(Resource):
class DatasetRetrievalSettingApi(Resource):
@console_ns.doc("get_dataset_retrieval_setting")
@console_ns.doc(description="Get dataset retrieval settings")
@console_ns.response(200, "Retrieval settings retrieved successfully")
@console_ns.response(
200, "Retrieval settings retrieved successfully", console_ns.models[RetrievalSettingResponse.__name__]
)
@setup_required
@login_required
@account_initialization_required
def get(self):
vector_type = dify_config.VECTOR_STORE
return _get_retrieval_methods_by_vector_type(vector_type, is_mock=False)
return dump_response(
RetrievalSettingResponse,
_get_retrieval_methods_by_vector_type(vector_type, is_mock=False),
)
@console_ns.route("/datasets/retrieval-setting/<string:vector_type>")
@ -921,12 +1028,19 @@ class DatasetRetrievalSettingMockApi(Resource):
@console_ns.doc("get_dataset_retrieval_setting_mock")
@console_ns.doc(description="Get mock dataset retrieval settings by vector type")
@console_ns.doc(params={"vector_type": "Vector store type"})
@console_ns.response(200, "Mock retrieval settings retrieved successfully")
@console_ns.response(
200,
"Mock retrieval settings retrieved successfully",
console_ns.models[RetrievalSettingResponse.__name__],
)
@setup_required
@login_required
@account_initialization_required
def get(self, vector_type):
return _get_retrieval_methods_by_vector_type(vector_type, is_mock=True)
return dump_response(
RetrievalSettingResponse,
_get_retrieval_methods_by_vector_type(vector_type, is_mock=True),
)
@console_ns.route("/datasets/<uuid:dataset_id>/error-docs")
@ -934,7 +1048,7 @@ class DatasetErrorDocs(Resource):
@console_ns.doc("get_dataset_error_docs")
@console_ns.doc(description="Get dataset error documents")
@console_ns.doc(params={"dataset_id": "Dataset ID"})
@console_ns.response(200, "Error documents retrieved successfully")
@console_ns.response(200, "Error documents retrieved successfully", console_ns.models[ErrorDocsResponse.__name__])
@console_ns.response(404, "Dataset not found")
@setup_required
@login_required
@ -946,7 +1060,7 @@ class DatasetErrorDocs(Resource):
raise NotFound("Dataset not found.")
results = DocumentService.get_error_documents_by_dataset_id(dataset_id_str)
return {"data": [marshal(item, document_status_fields) for item in results], "total": len(results)}, 200
return dump_response(ErrorDocsResponse, {"data": results, "total": len(results)}), 200
@console_ns.route("/datasets/<uuid:dataset_id>/permission-part-users")
@ -954,7 +1068,11 @@ class DatasetPermissionUserListApi(Resource):
@console_ns.doc("get_dataset_permission_users")
@console_ns.doc(description="Get dataset permission user list")
@console_ns.doc(params={"dataset_id": "Dataset ID"})
@console_ns.response(200, "Permission users retrieved successfully")
@console_ns.response(
200,
"Permission users retrieved successfully",
console_ns.models[PartialMemberListResponse.__name__],
)
@console_ns.response(404, "Dataset not found")
@console_ns.response(403, "Permission denied")
@setup_required
@ -973,9 +1091,7 @@ class DatasetPermissionUserListApi(Resource):
partial_members_list = DatasetPermissionService.get_dataset_partial_member_list(dataset_id_str)
return {
"data": partial_members_list,
}, 200
return dump_response(PartialMemberListResponse, {"data": partial_members_list}), 200
@console_ns.route("/datasets/<uuid:dataset_id>/auto-disable-logs")
@ -983,7 +1099,11 @@ class DatasetAutoDisableLogApi(Resource):
@console_ns.doc("get_dataset_auto_disable_logs")
@console_ns.doc(description="Get dataset auto disable logs")
@console_ns.doc(params={"dataset_id": "Dataset ID"})
@console_ns.response(200, "Auto disable logs retrieved successfully")
@console_ns.response(
200,
"Auto disable logs retrieved successfully",
console_ns.models[AutoDisableLogsResponse.__name__],
)
@console_ns.response(404, "Dataset not found")
@setup_required
@login_required
@ -993,4 +1113,4 @@ class DatasetAutoDisableLogApi(Resource):
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
raise NotFound("Dataset not found.")
return DatasetService.get_dataset_auto_disable_logs(dataset_id_str), 200
return dump_response(AutoDisableLogsResponse, DatasetService.get_dataset_auto_disable_logs(dataset_id_str)), 200

View File

@ -1,13 +1,17 @@
from typing import Any, Literal, cast
from typing import Any, Literal
from flask import request
from flask_restx import marshal
from pydantic import BaseModel, Field, TypeAdapter, field_validator, model_validator
from pydantic import BaseModel, ConfigDict, Field, RootModel, field_validator, model_validator
from werkzeug.exceptions import Forbidden, NotFound
import services
from controllers.common.fields import SimpleResultResponse
from controllers.common.schema import register_enum_models, register_response_schema_models, register_schema_models
from controllers.common.schema import (
query_params_from_model,
register_enum_models,
register_response_schema_models,
register_schema_models,
)
from controllers.console.wraps import edit_permission_required
from controllers.service_api import service_api_ns
from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError, InvalidActionError
@ -17,9 +21,10 @@ from controllers.service_api.wraps import (
)
from core.plugin.impl.model_runtime_factory import create_plugin_provider_manager
from core.rag.index_processor.constant.index_type import IndexTechniqueType
from fields.dataset_fields import dataset_detail_fields
from fields.tag_fields import DataSetTag
from fields.base import ResponseModel
from fields.dataset_fields import DatasetDetailResponse
from graphon.model_runtime.entities.model_entities import ModelType
from libs.helper import dump_response
from libs.login import current_user
from models.account import Account
from models.dataset import DatasetPermissionEnum
@ -119,6 +124,21 @@ class TagUnbindingPayload(BaseModel):
return self
class KnowledgeTagResponse(ResponseModel):
model_config = ConfigDict(coerce_numbers_to_str=True)
id: str
name: str
type: str
# TODO: The public Service API docs expose binding_count as string|null.
# Keep matching the old RESTX fields.String coercion until that contract is intentionally migrated.
binding_count: str | None = None
class KnowledgeTagListResponse(RootModel[list[KnowledgeTagResponse]]):
pass
class DatasetListQuery(BaseModel):
page: int = Field(default=1, description="Page number")
limit: int = Field(default=20, description="Number of items per page")
@ -127,6 +147,29 @@ class DatasetListQuery(BaseModel):
tag_ids: list[str] = Field(default_factory=list, description="Filter by tag IDs")
class DatasetDetailWithPartialMembersResponse(DatasetDetailResponse):
partial_member_list: list[str] | None = None
# todo: duplicate code, but the partial_member_list has different nullability
class DatasetListResponse(ResponseModel):
data: list[DatasetDetailResponse]
has_more: bool
limit: int
total: int
page: int
class DatasetBoundTagResponse(ResponseModel):
id: str
name: str
class DatasetBoundTagListResponse(ResponseModel):
data: list[DatasetBoundTagResponse]
total: int
register_schema_models(
service_api_ns,
DatasetCreatePayload,
@ -137,9 +180,17 @@ register_schema_models(
TagBindingPayload,
TagUnbindingPayload,
DatasetListQuery,
DataSetTag,
)
register_response_schema_models(service_api_ns, SimpleResultResponse)
register_response_schema_models(
service_api_ns,
SimpleResultResponse,
KnowledgeTagResponse,
KnowledgeTagListResponse,
DatasetDetailResponse,
DatasetDetailWithPartialMembersResponse,
DatasetListResponse,
DatasetBoundTagListResponse,
)
@service_api_ns.route("/datasets")
@ -154,9 +205,18 @@ class DatasetListApi(DatasetApiResource):
401: "Unauthorized - invalid API token",
}
)
@service_api_ns.doc(params=query_params_from_model(DatasetListQuery))
@service_api_ns.response(
200,
"Datasets retrieved successfully",
service_api_ns.models[DatasetListResponse.__name__],
)
def get(self, tenant_id):
"""Resource for getting datasets."""
query = DatasetListQuery.model_validate(request.args.to_dict())
query_params: dict[str, str | list[str]] = dict(request.args.to_dict())
if "tag_ids" in request.args:
query_params["tag_ids"] = request.args.getlist("tag_ids")
query = DatasetListQuery.model_validate(query_params)
# provider = request.args.get("provider", default="vendor")
datasets, total = DatasetService.get_datasets(
@ -175,17 +235,17 @@ class DatasetListApi(DatasetApiResource):
for embedding_model in embedding_models:
model_names.append(f"{embedding_model.model}:{embedding_model.provider.provider}")
data = marshal(datasets, dataset_detail_fields)
data = [dump_response(DatasetDetailResponse, dataset) for dataset in datasets]
for item in data:
if item["indexing_technique"] == IndexTechniqueType.HIGH_QUALITY and item["embedding_model_provider"]:
item["embedding_model_provider"] = str(ModelProviderID(item["embedding_model_provider"]))
item_model = f"{item['embedding_model']}:{item['embedding_model_provider']}"
if item_model in model_names:
item["embedding_available"] = True # type: ignore
item["embedding_available"] = True
else:
item["embedding_available"] = False # type: ignore
item["embedding_available"] = False
else:
item["embedding_available"] = True # type: ignore
item["embedding_available"] = True
response = {
"data": data,
"has_more": len(datasets) == query.limit,
@ -193,7 +253,7 @@ class DatasetListApi(DatasetApiResource):
"total": total,
"page": query.page,
}
return response, 200
return dump_response(DatasetListResponse, response), 200
@service_api_ns.expect(service_api_ns.models[DatasetCreatePayload.__name__])
@service_api_ns.doc("create_dataset")
@ -205,6 +265,11 @@ class DatasetListApi(DatasetApiResource):
400: "Bad request - invalid parameters",
}
)
@service_api_ns.response(
200,
"Dataset created successfully",
service_api_ns.models[DatasetDetailResponse.__name__],
)
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def post(self, tenant_id):
"""Resource for creating datasets."""
@ -248,7 +313,7 @@ class DatasetListApi(DatasetApiResource):
except services.errors.dataset.DatasetNameDuplicateError:
raise DatasetNameDuplicateError()
return marshal(dataset, dataset_detail_fields), 200
return dump_response(DatasetDetailResponse, dataset), 200
@service_api_ns.route("/datasets/<uuid:dataset_id>")
@ -266,6 +331,11 @@ class DatasetApi(DatasetApiResource):
404: "Dataset not found",
}
)
@service_api_ns.response(
200,
"Dataset retrieved successfully",
service_api_ns.models[DatasetDetailWithPartialMembersResponse.__name__],
)
def get(self, _, dataset_id):
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
@ -275,7 +345,7 @@ class DatasetApi(DatasetApiResource):
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
data = cast(dict[str, Any], marshal(dataset, dataset_detail_fields))
data = dump_response(DatasetDetailResponse, dataset)
# check embedding setting
assert isinstance(current_user, Account)
cid = current_user.current_tenant_id
@ -307,7 +377,13 @@ class DatasetApi(DatasetApiResource):
part_users_list = DatasetPermissionService.get_dataset_partial_member_list(dataset_id_str)
data.update({"partial_member_list": part_users_list})
return data, 200
return (
DatasetDetailWithPartialMembersResponse.model_validate(data).model_dump(
mode="json",
exclude={"partial_member_list"} if "partial_member_list" not in data else set(),
),
200,
)
@service_api_ns.expect(service_api_ns.models[DatasetUpdatePayload.__name__])
@service_api_ns.doc("update_dataset")
@ -321,6 +397,11 @@ class DatasetApi(DatasetApiResource):
404: "Dataset not found",
}
)
@service_api_ns.response(
200,
"Dataset updated successfully",
service_api_ns.models[DatasetDetailWithPartialMembersResponse.__name__],
)
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def patch(self, _, dataset_id):
dataset_id_str = str(dataset_id)
@ -371,7 +452,7 @@ class DatasetApi(DatasetApiResource):
if dataset is None:
raise NotFound("Dataset not found.")
result_data = cast(dict[str, Any], marshal(dataset, dataset_detail_fields))
result_data = dump_response(DatasetDetailResponse, dataset)
assert isinstance(current_user, Account)
tenant_id = current_user.current_tenant_id
@ -384,7 +465,7 @@ class DatasetApi(DatasetApiResource):
partial_member_list = DatasetPermissionService.get_dataset_partial_member_list(dataset_id_str)
result_data.update({"partial_member_list": partial_member_list})
return result_data, 200
return DatasetDetailWithPartialMembersResponse.model_validate(result_data).model_dump(mode="json"), 200
@service_api_ns.doc("delete_dataset")
@service_api_ns.doc(description="Delete a dataset")
@ -497,7 +578,7 @@ class DocumentStatusApi(DatasetApiResource):
except ValueError as e:
raise InvalidActionError(str(e))
return {"result": "success"}, 200
return dump_response(SimpleResultResponse, {"result": "success"}), 200
@service_api_ns.route("/datasets/tags")
@ -510,14 +591,18 @@ class DatasetTagsApi(DatasetApiResource):
401: "Unauthorized - invalid API token",
}
)
@service_api_ns.response(
200,
"Tags retrieved successfully",
service_api_ns.models[KnowledgeTagListResponse.__name__],
)
def get(self, _):
"""Get all knowledge type tags."""
assert isinstance(current_user, Account)
cid = current_user.current_tenant_id
assert cid is not None
tags = TagService.get_tags("knowledge", cid)
tag_models = TypeAdapter(list[DataSetTag]).validate_python(tags, from_attributes=True)
return [tag.model_dump(mode="json") for tag in tag_models], 200
return dump_response(KnowledgeTagListResponse, tags), 200
@service_api_ns.expect(service_api_ns.models[TagCreatePayload.__name__])
@service_api_ns.doc("create_dataset_tag")
@ -529,6 +614,11 @@ class DatasetTagsApi(DatasetApiResource):
403: "Forbidden - insufficient permissions",
}
)
@service_api_ns.response(
200,
"Tag created successfully",
service_api_ns.models[KnowledgeTagResponse.__name__],
)
def post(self, _):
"""Add a knowledge type tag."""
assert isinstance(current_user, Account)
@ -538,9 +628,10 @@ class DatasetTagsApi(DatasetApiResource):
payload = TagCreatePayload.model_validate(service_api_ns.payload or {})
tag = TagService.save_tags(SaveTagPayload(name=payload.name, type=TagType.KNOWLEDGE))
response = DataSetTag.model_validate(
{"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": 0}
).model_dump(mode="json")
response = dump_response(
KnowledgeTagResponse,
{"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": 0},
)
return response, 200
@service_api_ns.expect(service_api_ns.models[TagUpdatePayload.__name__])
@ -553,6 +644,11 @@ class DatasetTagsApi(DatasetApiResource):
403: "Forbidden - insufficient permissions",
}
)
@service_api_ns.response(
200,
"Tag updated successfully",
service_api_ns.models[KnowledgeTagResponse.__name__],
)
def patch(self, _):
assert isinstance(current_user, Account)
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
@ -564,9 +660,10 @@ class DatasetTagsApi(DatasetApiResource):
binding_count = TagService.get_tag_binding_count(tag_id)
response = DataSetTag.model_validate(
{"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": binding_count}
).model_dump(mode="json")
response = dump_response(
KnowledgeTagResponse,
{"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": binding_count},
)
return response, 200
@service_api_ns.expect(service_api_ns.models[TagDeletePayload.__name__])
@ -651,6 +748,11 @@ class DatasetTagsBindingStatusApi(DatasetApiResource):
401: "Unauthorized - invalid API token",
}
)
@service_api_ns.response(
200,
"Tags retrieved successfully",
service_api_ns.models[DatasetBoundTagListResponse.__name__],
)
def get(self, _, *args, **kwargs):
"""Get all knowledge type tags."""
dataset_id = kwargs.get("dataset_id")
@ -658,5 +760,4 @@ class DatasetTagsBindingStatusApi(DatasetApiResource):
assert current_user.current_tenant_id is not None
tags = TagService.get_tags_by_target_id("knowledge", current_user.current_tenant_id, str(dataset_id))
tags_list = [{"id": tag.id, "name": tag.name} for tag in tags]
response = {"data": tags_list, "total": len(tags)}
return response, 200
return dump_response(DatasetBoundTagListResponse, {"data": tags_list, "total": len(tags)}), 200

View File

@ -1,7 +1,10 @@
from datetime import datetime
from flask_restx import fields
from pydantic import field_validator
from fields.base import ResponseModel
from libs.helper import TimestampField
from libs.helper import TimestampField, to_timestamp
dataset_fields = {
"id": fields.String,
@ -142,27 +145,116 @@ dataset_detail_fields = {
"is_multimodal": fields.Boolean,
}
file_info_fields = {
"id": fields.String,
"name": fields.String,
"size": fields.Integer,
"extension": fields.String,
"mime_type": fields.String,
"source_url": fields.String,
}
content_fields = {
"content_type": fields.String,
"content": fields.String,
"file_info": fields.Nested(file_info_fields, allow_null=True),
}
class DatasetRerankingModelResponse(ResponseModel):
reranking_provider_name: str | None = None
reranking_model_name: str | None = None
dataset_query_detail_fields = {
"id": fields.String,
"queries": fields.Nested(content_fields),
"source": fields.String,
"source_app_id": fields.String,
"created_by_role": fields.String,
"created_by": fields.String,
"created_at": TimestampField,
}
class DatasetKeywordSettingResponse(ResponseModel):
keyword_weight: float
class DatasetVectorSettingResponse(ResponseModel):
vector_weight: float
embedding_model_name: str
embedding_provider_name: str
class DatasetWeightedScoreResponse(ResponseModel):
weight_type: str | None
keyword_setting: DatasetKeywordSettingResponse | None
vector_setting: DatasetVectorSettingResponse | None
class DatasetRetrievalModelResponse(ResponseModel):
search_method: str
reranking_enable: bool
reranking_mode: str | None = None
reranking_model: DatasetRerankingModelResponse | None
weights: DatasetWeightedScoreResponse | None = None
top_k: int
score_threshold_enabled: bool
score_threshold: float | None = None
class DatasetSummaryIndexSettingResponse(ResponseModel):
enable: bool | None = None
model_name: str | None = None
model_provider_name: str | None = None
summary_prompt: str | None = None
class DatasetTagResponse(ResponseModel):
id: str
name: str
type: str
class DatasetExternalKnowledgeInfoResponse(ResponseModel):
external_knowledge_id: str
external_knowledge_api_id: str
external_knowledge_api_name: str
external_knowledge_api_endpoint: str
class DatasetExternalRetrievalModelResponse(ResponseModel):
top_k: int
score_threshold: float
score_threshold_enabled: bool | None = None
class DatasetDocMetadataResponse(ResponseModel):
id: str
name: str
type: str
class DatasetIconInfoResponse(ResponseModel):
icon_type: str | None
icon: str | None
icon_background: str | None = None
icon_url: str | None = None
class DatasetDetailResponse(ResponseModel):
id: str
name: str
description: str | None
provider: str
permission: str
data_source_type: str | None
indexing_technique: str | None
app_count: int
document_count: int
word_count: int
created_by: str
author_name: str | None
created_at: int
updated_by: str | None
updated_at: int
embedding_model: str | None
embedding_model_provider: str | None
embedding_available: bool | None = None
retrieval_model_dict: DatasetRetrievalModelResponse
summary_index_setting: DatasetSummaryIndexSettingResponse | None
tags: list[DatasetTagResponse]
doc_form: str | None
external_knowledge_info: DatasetExternalKnowledgeInfoResponse | None
external_retrieval_model: DatasetExternalRetrievalModelResponse | None
doc_metadata: list[DatasetDocMetadataResponse]
built_in_field_enabled: bool
pipeline_id: str | None
runtime_mode: str | None
chunk_structure: str | None
icon_info: DatasetIconInfoResponse | None
is_published: bool
total_documents: int
total_available_documents: int
enable_api: bool
is_multimodal: bool
@field_validator("created_at", "updated_at", mode="before")
@classmethod
def _normalize_timestamp(cls, value: datetime | int | None) -> int | None:
return to_timestamp(value)

View File

@ -1,10 +0,0 @@
from __future__ import annotations
from fields.base import ResponseModel
class DataSetTag(ResponseModel):
id: str
name: str
type: str
binding_count: str | None = None

View File

@ -4135,18 +4135,18 @@ Get list of datasets
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| ids | query | Filter by dataset IDs (list) | No | string |
| include_all | query | Include all datasets (default: false) | No | string |
| ids | query | Filter by dataset IDs | No | [ string ] |
| include_all | query | Include all datasets | No | boolean |
| keyword | query | Search keyword | No | string |
| limit | query | Number of items per page (default: 20) | No | string |
| page | query | Page number (default: 1) | No | string |
| tag_ids | query | Filter by tag IDs (list) | No | string |
| limit | query | Number of items per page | No | integer |
| page | query | Page number | No | integer |
| tag_ids | query | Filter by tag IDs | No | [ string ] |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Datasets retrieved successfully |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Datasets retrieved successfully | [DatasetListResponse](#datasetlistresponse) |
#### POST
##### Description
@ -4161,10 +4161,10 @@ Create a new dataset
##### Responses
| Code | Description |
| ---- | ----------- |
| 201 | Dataset created successfully |
| 400 | Invalid request parameters |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 201 | Dataset created successfully | [DatasetDetailResponse](#datasetdetailresponse) |
| 400 | Invalid request parameters | |
### /datasets/api-base-info
@ -4384,9 +4384,9 @@ Estimate dataset indexing cost
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Indexing estimate calculated successfully |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Indexing estimate calculated successfully | [IndexingEstimateResponse](#indexingestimateresponse) |
### /datasets/init
@ -4467,9 +4467,9 @@ Get dataset retrieval settings
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Retrieval settings retrieved successfully |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Retrieval settings retrieved successfully | [RetrievalSettingResponse](#retrievalsettingresponse) |
### /datasets/retrieval-setting/{vector_type}
@ -4486,9 +4486,9 @@ Get mock dataset retrieval settings by vector type
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Mock retrieval settings retrieved successfully |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Mock retrieval settings retrieved successfully | [RetrievalSettingResponse](#retrievalsettingresponse) |
### /datasets/{dataset_id}
@ -4520,7 +4520,7 @@ Get dataset details
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Dataset retrieved successfully | [DatasetDetail](#datasetdetail) |
| 200 | Dataset retrieved successfully | [DatasetDetailWithPartialMembersResponse](#datasetdetailwithpartialmembersresponse) |
| 403 | Permission denied | |
| 404 | Dataset not found | |
@ -4540,7 +4540,7 @@ Update dataset details
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Dataset updated successfully | [DatasetDetail](#datasetdetail) |
| 200 | Dataset updated successfully | [DatasetDetailWithPartialMembersResponse](#datasetdetailwithpartialmembersresponse) |
| 403 | Permission denied | |
| 404 | Dataset not found | |
@ -4575,10 +4575,10 @@ Get dataset auto disable logs
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Auto disable logs retrieved successfully |
| 404 | Dataset not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Auto disable logs retrieved successfully | [AutoDisableLogsResponse](#autodisablelogsresponse) |
| 404 | Dataset not found | |
### /datasets/{dataset_id}/batch/{batch}/indexing-estimate
@ -5263,10 +5263,10 @@ Get dataset error documents
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Error documents retrieved successfully |
| 404 | Dataset not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Error documents retrieved successfully | [ErrorDocsResponse](#errordocsresponse) |
| 404 | Dataset not found | |
### /datasets/{dataset_id}/external-hit-testing
@ -5327,9 +5327,9 @@ Get dataset indexing status
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Indexing status retrieved successfully |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Indexing status retrieved successfully | [DocumentStatusListResponse](#documentstatuslistresponse) |
### /datasets/{dataset_id}/metadata
@ -5437,11 +5437,11 @@ Get dataset permission user list
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Permission users retrieved successfully |
| 403 | Permission denied |
| 404 | Dataset not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Permission users retrieved successfully | [PartialMemberListResponse](#partialmemberlistresponse) |
| 403 | Permission denied | |
| 404 | Dataset not found | |
### /datasets/{dataset_id}/queries
@ -5460,7 +5460,7 @@ Get dataset query history
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Query history retrieved successfully | [DatasetQueryDetail](#datasetquerydetail) |
| 200 | Query history retrieved successfully | [DatasetQueryListResponse](#datasetquerylistresponse) |
### /datasets/{dataset_id}/related-apps
@ -5479,7 +5479,7 @@ Get applications related to dataset
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Related apps retrieved successfully | [RelatedAppList](#relatedapplist) |
| 200 | Related apps retrieved successfully | [RelatedAppListResponse](#relatedapplistresponse) |
### /datasets/{dataset_id}/retry
@ -10863,19 +10863,6 @@ Enum class for api provider schema type.
| use_icon_as_answer_icon | boolean | | No |
| workflow | [WorkflowPartial](#workflowpartial) | | No |
#### AppDetailKernel
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| description | string | | No |
| icon | string | | No |
| icon_background | string | | No |
| icon_type | string | | No |
| icon_url | object | | No |
| id | string | | No |
| mode | string | | No |
| name | string | | No |
#### AppDetailWithSite
| Name | Type | Description | Required |
@ -11081,6 +11068,13 @@ AppMCPServer Status Enum
| ---- | ---- | ----------- | -------- |
| text | string | Transcribed text from audio | Yes |
#### AutoDisableLogsResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| count | integer | | Yes |
| document_ids | [ string ] | | Yes |
#### AvatarUrlResponse
| Name | Type | Description | Required |
@ -11643,27 +11637,6 @@ Condition detail
| dataset | [DatasetResponse](#datasetresponse) | | Yes |
| documents | [ [DocumentResponse](#documentresponse) ] | | Yes |
#### DatasetBase
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| created_at | object | | No |
| created_by | string | | No |
| data_source_type | string | | No |
| description | string | | No |
| id | string | | No |
| indexing_technique | string | | No |
| name | string | | No |
| permission | string | | No |
#### DatasetContent
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| content | string | | No |
| content_type | string | | No |
| file_info | [DatasetFileInfo](#datasetfileinfo) | | No |
#### DatasetCreatePayload
| Name | Type | Description | Required |
@ -11716,6 +11689,87 @@ Condition detail
| updated_by | string | | No |
| word_count | integer | | No |
#### DatasetDetailResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| app_count | integer | | Yes |
| author_name | string | | Yes |
| built_in_field_enabled | boolean | | Yes |
| chunk_structure | string | | Yes |
| created_at | integer | | Yes |
| created_by | string | | Yes |
| data_source_type | string | | Yes |
| description | string | | Yes |
| doc_form | string | | Yes |
| doc_metadata | [ [DatasetDocMetadataResponse](#datasetdocmetadataresponse) ] | | Yes |
| document_count | integer | | Yes |
| embedding_available | boolean | | No |
| embedding_model | string | | Yes |
| embedding_model_provider | string | | Yes |
| enable_api | boolean | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes |
| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes |
| id | string | | Yes |
| indexing_technique | string | | Yes |
| is_multimodal | boolean | | Yes |
| is_published | boolean | | Yes |
| name | string | | Yes |
| permission | string | | Yes |
| pipeline_id | string | | Yes |
| provider | string | | Yes |
| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes |
| runtime_mode | string | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes |
| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes |
| total_available_documents | integer | | Yes |
| total_documents | integer | | Yes |
| updated_at | integer | | Yes |
| updated_by | string | | Yes |
| word_count | integer | | Yes |
#### DatasetDetailWithPartialMembersResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| app_count | integer | | Yes |
| author_name | string | | Yes |
| built_in_field_enabled | boolean | | Yes |
| chunk_structure | string | | Yes |
| created_at | integer | | Yes |
| created_by | string | | Yes |
| data_source_type | string | | Yes |
| description | string | | Yes |
| doc_form | string | | Yes |
| doc_metadata | [ [DatasetDocMetadataResponse](#datasetdocmetadataresponse) ] | | Yes |
| document_count | integer | | Yes |
| embedding_available | boolean | | No |
| embedding_model | string | | Yes |
| embedding_model_provider | string | | Yes |
| enable_api | boolean | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes |
| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes |
| id | string | | Yes |
| indexing_technique | string | | Yes |
| is_multimodal | boolean | | Yes |
| is_published | boolean | | Yes |
| name | string | | Yes |
| partial_member_list | [ string ] | | No |
| permission | string | | Yes |
| pipeline_id | string | | Yes |
| provider | string | | Yes |
| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes |
| runtime_mode | string | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes |
| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes |
| total_available_documents | integer | | Yes |
| total_documents | integer | | Yes |
| updated_at | integer | | Yes |
| updated_by | string | | Yes |
| word_count | integer | | Yes |
#### DatasetDocMetadata
| Name | Type | Description | Required |
@ -11724,16 +11778,30 @@ Condition detail
| name | string | | No |
| type | string | | No |
#### DatasetFileInfo
#### DatasetDocMetadataResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| extension | string | | No |
| id | string | | No |
| mime_type | string | | No |
| name | string | | No |
| size | integer | | No |
| source_url | string | | No |
| id | string | | Yes |
| name | string | | Yes |
| type | string | | Yes |
#### DatasetExternalKnowledgeInfoResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| external_knowledge_api_endpoint | string | | Yes |
| external_knowledge_api_id | string | | Yes |
| external_knowledge_api_name | string | | Yes |
| external_knowledge_id | string | | Yes |
#### DatasetExternalRetrievalModelResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| score_threshold | number | | Yes |
| score_threshold_enabled | boolean | | No |
| top_k | integer | | Yes |
#### DatasetIconInfo
@ -11744,12 +11812,78 @@ Condition detail
| icon_type | string | | No |
| icon_url | string | | No |
#### DatasetIconInfoResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| icon | string | | Yes |
| icon_background | string | | No |
| icon_type | string | | Yes |
| icon_url | string | | No |
#### DatasetKeywordSetting
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| keyword_weight | number | | No |
#### DatasetKeywordSettingResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| keyword_weight | number | | Yes |
#### DatasetListItemResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| app_count | integer | | Yes |
| author_name | string | | Yes |
| built_in_field_enabled | boolean | | Yes |
| chunk_structure | string | | Yes |
| created_at | integer | | Yes |
| created_by | string | | Yes |
| data_source_type | string | | Yes |
| description | string | | Yes |
| doc_form | string | | Yes |
| doc_metadata | [ [DatasetDocMetadataResponse](#datasetdocmetadataresponse) ] | | Yes |
| document_count | integer | | Yes |
| embedding_available | boolean | | No |
| embedding_model | string | | Yes |
| embedding_model_provider | string | | Yes |
| enable_api | boolean | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes |
| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes |
| id | string | | Yes |
| indexing_technique | string | | Yes |
| is_multimodal | boolean | | Yes |
| is_published | boolean | | Yes |
| name | string | | Yes |
| partial_member_list | [ string ] | | Yes |
| permission | string | | Yes |
| pipeline_id | string | | Yes |
| provider | string | | Yes |
| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes |
| runtime_mode | string | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes |
| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes |
| total_available_documents | integer | | Yes |
| total_documents | integer | | Yes |
| updated_at | integer | | Yes |
| updated_by | string | | Yes |
| word_count | integer | | Yes |
#### DatasetListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [DatasetListItemResponse](#datasetlistitemresponse) ] | | Yes |
| has_more | boolean | | Yes |
| limit | integer | | Yes |
| page | integer | | Yes |
| total | integer | | Yes |
#### DatasetMetadataBuiltInFieldResponse
| Name | Type | Description | Required |
@ -11793,17 +11927,46 @@ Condition detail
| ---- | ---- | ----------- | -------- |
| DatasetPermissionEnum | string | | |
#### DatasetQueryDetail
#### DatasetQueryContentResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| created_at | object | | No |
| created_by | string | | No |
| created_by_role | string | | No |
| id | string | | No |
| queries | [DatasetContent](#datasetcontent) | | No |
| source | string | | No |
| source_app_id | string | | No |
| content | string | | Yes |
| content_type | string | | Yes |
| file_info | [DatasetQueryFileInfoResponse](#datasetqueryfileinforesponse) | | No |
#### DatasetQueryDetailResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| created_at | integer | | Yes |
| created_by | string | | Yes |
| created_by_role | string | | Yes |
| id | string | | Yes |
| queries | [ [DatasetQueryContentResponse](#datasetquerycontentresponse) ] | | Yes |
| source | string | | Yes |
| source_app_id | string | | Yes |
#### DatasetQueryFileInfoResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| extension | string | | Yes |
| id | string | | Yes |
| mime_type | string | | Yes |
| name | string | | Yes |
| size | integer | | Yes |
| source_url | string | | Yes |
#### DatasetQueryListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [DatasetQueryDetailResponse](#datasetquerydetailresponse) ] | | Yes |
| has_more | boolean | | Yes |
| limit | integer | | Yes |
| page | integer | | Yes |
| total | integer | | Yes |
#### DatasetRerankingModel
@ -11812,6 +11975,13 @@ Condition detail
| reranking_model_name | string | | No |
| reranking_provider_name | string | | No |
#### DatasetRerankingModelResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| reranking_model_name | string | | No |
| reranking_provider_name | string | | No |
#### DatasetResponse
| Name | Type | Description | Required |
@ -11838,6 +12008,36 @@ Condition detail
| top_k | integer | | No |
| weights | [DatasetWeightedScore](#datasetweightedscore) | | No |
#### DatasetRetrievalModelResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| reranking_enable | boolean | | Yes |
| reranking_mode | string | | No |
| reranking_model | [DatasetRerankingModelResponse](#datasetrerankingmodelresponse) | | Yes |
| score_threshold | number | | No |
| score_threshold_enabled | boolean | | Yes |
| search_method | string | | Yes |
| top_k | integer | | Yes |
| weights | [DatasetWeightedScoreResponse](#datasetweightedscoreresponse) | | No |
#### DatasetSummaryIndexSettingResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| enable | boolean | | No |
| model_name | string | | No |
| model_provider_name | string | | No |
| summary_prompt | string | | No |
#### DatasetTagResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| id | string | | Yes |
| name | string | | Yes |
| type | string | | Yes |
#### DatasetUpdatePayload
| Name | Type | Description | Required |
@ -11865,6 +12065,14 @@ Condition detail
| embedding_provider_name | string | | No |
| vector_weight | number | | No |
#### DatasetVectorSettingResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| embedding_model_name | string | | Yes |
| embedding_provider_name | string | | Yes |
| vector_weight | number | | Yes |
#### DatasetWeightedScore
| Name | Type | Description | Required |
@ -11873,6 +12081,14 @@ Condition detail
| vector_setting | [DatasetVectorSetting](#datasetvectorsetting) | | No |
| weight_type | string | | No |
#### DatasetWeightedScoreResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| keyword_setting | [DatasetKeywordSettingResponse](#datasetkeywordsettingresponse) | | Yes |
| vector_setting | [DatasetVectorSettingResponse](#datasetvectorsettingresponse) | | Yes |
| weight_type | string | | Yes |
#### DatasourceCredentialDeletePayload
| Name | Type | Description | Required |
@ -12066,6 +12282,29 @@ Request payload for bulk downloading documents as a zip archive.
| ---- | ---- | ----------- | -------- |
| document_ids | [ string ] | | Yes |
#### DocumentStatusListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [DocumentStatusResponse](#documentstatusresponse) ] | | Yes |
#### DocumentStatusResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| cleaning_completed_at | integer | | Yes |
| completed_at | integer | | Yes |
| completed_segments | integer | | No |
| error | string | | Yes |
| id | string | | Yes |
| indexing_status | string | | Yes |
| parsing_completed_at | integer | | Yes |
| paused_at | integer | | Yes |
| processing_started_at | integer | | Yes |
| splitting_completed_at | integer | | Yes |
| stopped_at | integer | | Yes |
| total_segments | integer | | No |
#### DocumentWithSegmentsResponse
| Name | Type | Description | Required |
@ -12310,6 +12549,13 @@ Request payload for bulk downloading documents as a zip archive.
| ---- | ---- | ----------- | -------- |
| environment_variables | [ object ] | Environment variables for the draft workflow | Yes |
#### ErrorDocsResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [DocumentStatusResponse](#documentstatusresponse) ] | | Yes |
| total | integer | | Yes |
#### ExecutionContentType
| Name | Type | Description | Required |
@ -12746,6 +12992,29 @@ Request payload for bulk downloading documents as a zip archive.
| info_list | object | | Yes |
| process_rule | object | | Yes |
#### IndexingEstimatePreviewItemResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| child_chunks | [ string ] | | No |
| content | string | | Yes |
| summary | string | | No |
#### IndexingEstimateQaPreviewItemResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| answer | string | | Yes |
| question | string | | Yes |
#### IndexingEstimateResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| preview | [ [IndexingEstimatePreviewItemResponse](#indexingestimatepreviewitemresponse) ] | | Yes |
| qa_preview | [ [IndexingEstimateQaPreviewItemResponse](#indexingestimateqapreviewitemresponse) ] | | No |
| total_segments | integer | | Yes |
#### InfoList
| Name | Type | Description | Required |
@ -13649,6 +13918,12 @@ Form input definition.
| model | string | | Yes |
| model_type | [ModelType](#modeltype) | | Yes |
#### PartialMemberListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ string ] | | Yes |
#### PartnerTenantsPayload
| Name | Type | Description | Required |
@ -13901,12 +14176,25 @@ Form input definition.
| ---- | ---- | ----------- | -------- |
| redirect_url | string | | Yes |
#### RelatedAppList
#### RelatedAppListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [AppDetailKernel](#appdetailkernel) ] | | No |
| total | integer | | No |
| data | [ [RelatedAppResponse](#relatedappresponse) ] | | Yes |
| total | integer | | Yes |
#### RelatedAppResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| description | string | | Yes |
| icon | string | | Yes |
| icon_background | string | | Yes |
| icon_type | string | | Yes |
| icon_url | string | | No |
| id | string | | Yes |
| mode | string | | Yes |
| name | string | | Yes |
#### RemoteFileInfo
@ -13954,6 +14242,12 @@ Form input definition.
| top_k | integer | | Yes |
| weights | [WeightModel](#weightmodel) | | No |
#### RetrievalSettingResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| retrieval_method | [ string ] | | Yes |
#### RosterAgentCreatePayload
| Name | Type | Description | Required |

View File

@ -469,12 +469,22 @@ Resource for getting datasets
List all datasets
##### Parameters
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| include_all | query | Include all datasets | No | boolean |
| keyword | query | Search keyword | No | string |
| limit | query | Number of items per page | No | integer |
| page | query | Page number | No | integer |
| tag_ids | query | Filter by tag IDs | No | [ string ] |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Datasets retrieved successfully |
| 401 | Unauthorized - invalid API token |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Datasets retrieved successfully | [DatasetListResponse](#datasetlistresponse) |
| 401 | Unauthorized - invalid API token | |
#### POST
##### Summary
@ -493,11 +503,11 @@ Create a new dataset
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Dataset created successfully |
| 400 | Bad request - invalid parameters |
| 401 | Unauthorized - invalid API token |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Dataset created successfully | [DatasetDetailResponse](#datasetdetailresponse) |
| 400 | Bad request - invalid parameters | |
| 401 | Unauthorized - invalid API token | |
### /datasets/pipeline/file-upload
@ -557,10 +567,10 @@ Get all knowledge type tags
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Tags retrieved successfully |
| 401 | Unauthorized - invalid API token |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Tags retrieved successfully | [KnowledgeTagListResponse](#knowledgetaglistresponse) |
| 401 | Unauthorized - invalid API token | |
#### PATCH
##### Description
@ -575,11 +585,11 @@ Update a knowledge type tag
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Tag updated successfully |
| 401 | Unauthorized - invalid API token |
| 403 | Forbidden - insufficient permissions |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Tag updated successfully | [KnowledgeTagResponse](#knowledgetagresponse) |
| 401 | Unauthorized - invalid API token | |
| 403 | Forbidden - insufficient permissions | |
#### POST
##### Summary
@ -598,11 +608,11 @@ Add a knowledge type tag
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Tag created successfully |
| 401 | Unauthorized - invalid API token |
| 403 | Forbidden - insufficient permissions |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Tag created successfully | [KnowledgeTagResponse](#knowledgetagresponse) |
| 401 | Unauthorized - invalid API token | |
| 403 | Forbidden - insufficient permissions | |
### /datasets/tags/binding
@ -696,12 +706,12 @@ Get a specific dataset by ID
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Dataset retrieved successfully |
| 401 | Unauthorized - invalid API token |
| 403 | Forbidden - insufficient permissions |
| 404 | Dataset not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Dataset retrieved successfully | [DatasetDetailWithPartialMembersResponse](#datasetdetailwithpartialmembersresponse) |
| 401 | Unauthorized - invalid API token | |
| 403 | Forbidden - insufficient permissions | |
| 404 | Dataset not found | |
#### PATCH
##### Description
@ -717,12 +727,12 @@ Update an existing dataset
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Dataset updated successfully |
| 401 | Unauthorized - invalid API token |
| 403 | Forbidden - insufficient permissions |
| 404 | Dataset not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Dataset updated successfully | [DatasetDetailWithPartialMembersResponse](#datasetdetailwithpartialmembersresponse) |
| 401 | Unauthorized - invalid API token | |
| 403 | Forbidden - insufficient permissions | |
| 404 | Dataset not found | |
### /datasets/{dataset_id}/document/create-by-file
@ -1629,10 +1639,10 @@ Get tags bound to a specific dataset
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Tags retrieved successfully |
| 401 | Unauthorized - invalid API token |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Tags retrieved successfully | [DatasetBoundTagListResponse](#datasetboundtaglistresponse) |
| 401 | Unauthorized - invalid API token | |
### /end-users/{end_user_id}
@ -2279,14 +2289,19 @@ Condition detail
| limit | integer | Number of variables to return | No |
| variable_name | string | Filter variables by name | No |
#### DataSetTag
#### DatasetBoundTagListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [DatasetBoundTagResponse](#datasetboundtagresponse) ] | | Yes |
| total | integer | | Yes |
#### DatasetBoundTagResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| binding_count | string | | No |
| id | string | | Yes |
| name | string | | Yes |
| type | string | | Yes |
#### DatasetCreatePayload
@ -2304,6 +2319,127 @@ Condition detail
| retrieval_model | [RetrievalModel](#retrievalmodel) | | No |
| summary_index_setting | object | | No |
#### DatasetDetailResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| app_count | integer | | Yes |
| author_name | string | | Yes |
| built_in_field_enabled | boolean | | Yes |
| chunk_structure | string | | Yes |
| created_at | integer | | Yes |
| created_by | string | | Yes |
| data_source_type | string | | Yes |
| description | string | | Yes |
| doc_form | string | | Yes |
| doc_metadata | [ [DatasetDocMetadataResponse](#datasetdocmetadataresponse) ] | | Yes |
| document_count | integer | | Yes |
| embedding_available | boolean | | No |
| embedding_model | string | | Yes |
| embedding_model_provider | string | | Yes |
| enable_api | boolean | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes |
| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes |
| id | string | | Yes |
| indexing_technique | string | | Yes |
| is_multimodal | boolean | | Yes |
| is_published | boolean | | Yes |
| name | string | | Yes |
| permission | string | | Yes |
| pipeline_id | string | | Yes |
| provider | string | | Yes |
| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes |
| runtime_mode | string | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes |
| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes |
| total_available_documents | integer | | Yes |
| total_documents | integer | | Yes |
| updated_at | integer | | Yes |
| updated_by | string | | Yes |
| word_count | integer | | Yes |
#### DatasetDetailWithPartialMembersResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| app_count | integer | | Yes |
| author_name | string | | Yes |
| built_in_field_enabled | boolean | | Yes |
| chunk_structure | string | | Yes |
| created_at | integer | | Yes |
| created_by | string | | Yes |
| data_source_type | string | | Yes |
| description | string | | Yes |
| doc_form | string | | Yes |
| doc_metadata | [ [DatasetDocMetadataResponse](#datasetdocmetadataresponse) ] | | Yes |
| document_count | integer | | Yes |
| embedding_available | boolean | | No |
| embedding_model | string | | Yes |
| embedding_model_provider | string | | Yes |
| enable_api | boolean | | Yes |
| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes |
| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes |
| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes |
| id | string | | Yes |
| indexing_technique | string | | Yes |
| is_multimodal | boolean | | Yes |
| is_published | boolean | | Yes |
| name | string | | Yes |
| partial_member_list | [ string ] | | No |
| permission | string | | Yes |
| pipeline_id | string | | Yes |
| provider | string | | Yes |
| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes |
| runtime_mode | string | | Yes |
| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes |
| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes |
| total_available_documents | integer | | Yes |
| total_documents | integer | | Yes |
| updated_at | integer | | Yes |
| updated_by | string | | Yes |
| word_count | integer | | Yes |
#### DatasetDocMetadataResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| id | string | | Yes |
| name | string | | Yes |
| type | string | | Yes |
#### DatasetExternalKnowledgeInfoResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| external_knowledge_api_endpoint | string | | Yes |
| external_knowledge_api_id | string | | Yes |
| external_knowledge_api_name | string | | Yes |
| external_knowledge_id | string | | Yes |
#### DatasetExternalRetrievalModelResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| score_threshold | number | | Yes |
| score_threshold_enabled | boolean | | No |
| top_k | integer | | Yes |
#### DatasetIconInfoResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| icon | string | | Yes |
| icon_background | string | | No |
| icon_type | string | | Yes |
| icon_url | string | | No |
#### DatasetKeywordSettingResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| keyword_weight | number | | Yes |
#### DatasetListQuery
| Name | Type | Description | Required |
@ -2314,6 +2450,16 @@ Condition detail
| page | integer | Page number | No |
| tag_ids | [ string ] | Filter by tag IDs | No |
#### DatasetListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [DatasetDetailResponse](#datasetdetailresponse) ] | | Yes |
| has_more | boolean | | Yes |
| limit | integer | | Yes |
| page | integer | | Yes |
| total | integer | | Yes |
#### DatasetMetadataActionResponse
| Name | Type | Description | Required |
@ -2363,6 +2509,43 @@ Condition detail
| ---- | ---- | ----------- | -------- |
| DatasetPermissionEnum | string | | |
#### DatasetRerankingModelResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| reranking_model_name | string | | No |
| reranking_provider_name | string | | No |
#### DatasetRetrievalModelResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| reranking_enable | boolean | | Yes |
| reranking_mode | string | | No |
| reranking_model | [DatasetRerankingModelResponse](#datasetrerankingmodelresponse) | | Yes |
| score_threshold | number | | No |
| score_threshold_enabled | boolean | | Yes |
| search_method | string | | Yes |
| top_k | integer | | Yes |
| weights | [DatasetWeightedScoreResponse](#datasetweightedscoreresponse) | | No |
#### DatasetSummaryIndexSettingResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| enable | boolean | | No |
| model_name | string | | No |
| model_provider_name | string | | No |
| summary_prompt | string | | No |
#### DatasetTagResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| id | string | | Yes |
| name | string | | Yes |
| type | string | | Yes |
#### DatasetUpdatePayload
| Name | Type | Description | Required |
@ -2379,6 +2562,22 @@ Condition detail
| permission | [DatasetPermissionEnum](#datasetpermissionenum) | | No |
| retrieval_model | [RetrievalModel](#retrievalmodel) | | No |
#### DatasetVectorSettingResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| embedding_model_name | string | | Yes |
| embedding_provider_name | string | | Yes |
| vector_weight | number | | Yes |
#### DatasetWeightedScoreResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| keyword_setting | [DatasetKeywordSettingResponse](#datasetkeywordsettingresponse) | | Yes |
| vector_setting | [DatasetVectorSettingResponse](#datasetvectorsettingresponse) | | Yes |
| weight_type | string | | Yes |
#### DatasourceNodeRunPayload
| Name | Type | Description | Required |
@ -2522,6 +2721,21 @@ Note: The SQLAlchemy model defines an `is_anonymous` property for Flask-Login se
| ---- | ---- | ----------- | -------- |
| JsonValue | | | |
#### KnowledgeTagListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| KnowledgeTagListResponse | array | | |
#### KnowledgeTagResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| binding_count | string | | No |
| id | string | | Yes |
| name | string | | Yes |
| type | string | | Yes |
#### MessageFeedbackPayload
| Name | Type | Description | Required |

View File

@ -14,8 +14,9 @@ since these test controller-level behavior.
"""
import uuid
from types import SimpleNamespace
from unittest.mock import Mock, patch
from contextlib import ExitStack
from datetime import UTC, datetime
from unittest.mock import Mock, PropertyMock, patch
import pytest
from flask import Flask
@ -35,7 +36,7 @@ from controllers.service_api.dataset.dataset import (
)
from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError, InvalidActionError
from models.account import Account
from models.dataset import DatasetPermissionEnum
from models.dataset import Dataset, DatasetPermissionEnum
from models.enums import TagType
from models.model import Tag
@ -116,6 +117,7 @@ class TestDatasetUpdatePayload:
partial_member_list=[{"user_id": "user_123", "role": "editor"}],
)
assert payload.permission == DatasetPermissionEnum.PARTIAL_TEAM
assert payload.partial_member_list is not None
assert len(payload.partial_member_list) == 1
def test_payload_name_length_validation(self):
@ -181,7 +183,7 @@ class TestTagUpdatePayload:
def test_payload_requires_tag_id(self):
with pytest.raises(ValueError):
TagUpdatePayload(name="Updated Tag")
TagUpdatePayload.model_validate({"name": "Updated Tag"})
class TestTagDeletePayload:
@ -193,7 +195,7 @@ class TestTagDeletePayload:
def test_payload_requires_tag_id(self):
with pytest.raises(ValueError):
TagDeletePayload()
TagDeletePayload.model_validate({})
class TestTagBindingPayload:
@ -264,13 +266,134 @@ def mock_tenant():
@pytest.fixture
def mock_dataset():
dataset = Mock()
dataset.id = str(uuid.uuid4())
dataset.tenant_id = str(uuid.uuid4())
dataset.indexing_technique = "economy"
dataset.embedding_model_provider = None
dataset.embedding_model = None
return dataset
return make_dataset(id=str(uuid.uuid4()), tenant_id=str(uuid.uuid4()))
@pytest.fixture(autouse=True)
def dataset_model_property_defaults():
properties: dict[str, object] = {
"app_count": 0,
"document_count": 0,
"word_count": 0,
"author_name": None,
"tags": [],
"doc_form": None,
"external_knowledge_info": None,
"doc_metadata": [],
"is_published": False,
"total_documents": 0,
"total_available_documents": 0,
}
with ExitStack() as stack:
for name, value in properties.items():
property_mock = stack.enter_context(patch.object(Dataset, name, new_callable=PropertyMock))
property_mock.return_value = value
yield
def make_dataset(**overrides) -> Dataset:
base = {
"id": "ds-1",
"tenant_id": "tenant-1",
"name": "Dataset",
"description": "desc",
"provider": "vendor",
"permission": "only_me",
"data_source_type": None,
"indexing_technique": "economy",
"created_by": "account-1",
"created_at": datetime(2024, 1, 1, 12, 0, 0, tzinfo=UTC),
"updated_by": None,
"updated_at": datetime(2024, 1, 1, 12, 0, 0, tzinfo=UTC),
"embedding_model": None,
"embedding_model_provider": None,
"retrieval_model": None,
"summary_index_setting": None,
"built_in_field_enabled": False,
"pipeline_id": None,
"runtime_mode": "general",
"chunk_structure": None,
"icon_info": None,
"enable_api": False,
"is_multimodal": False,
}
base.update(overrides)
return Dataset(**base)
def make_tag(*, id: str, name: str, binding_count: int | None = None) -> Tag:
tag = Tag(tenant_id="tenant-1", type=TagType.KNOWLEDGE, name=name, created_by="account-1")
tag.id = id
if binding_count is not None:
tag.__dict__["binding_count"] = binding_count
return tag
DATASET_DETAIL_KEYS = {
"id",
"name",
"description",
"provider",
"permission",
"data_source_type",
"indexing_technique",
"app_count",
"document_count",
"word_count",
"created_by",
"author_name",
"created_at",
"updated_by",
"updated_at",
"embedding_model",
"embedding_model_provider",
"embedding_available",
"retrieval_model_dict",
"summary_index_setting",
"tags",
"doc_form",
"external_knowledge_info",
"external_retrieval_model",
"doc_metadata",
"built_in_field_enabled",
"pipeline_id",
"runtime_mode",
"chunk_structure",
"icon_info",
"is_published",
"total_documents",
"total_available_documents",
"enable_api",
"is_multimodal",
}
def assert_dataset_detail_shape(response: dict, *, with_partial_members: bool = False) -> None:
expected_keys = set(DATASET_DETAIL_KEYS)
if with_partial_members:
expected_keys.add("partial_member_list")
assert set(response) == expected_keys
assert isinstance(response["created_at"], int)
assert isinstance(response["updated_at"], int)
assert set(response["retrieval_model_dict"]) == {
"search_method",
"reranking_enable",
"reranking_mode",
"reranking_model",
"weights",
"top_k",
"score_threshold_enabled",
"score_threshold",
}
if response["external_retrieval_model"] is not None:
assert set(response["external_retrieval_model"]) == {
"top_k",
"score_threshold",
"score_threshold_enabled",
}
if not with_partial_members:
assert "partial_member_list" not in response
# ---------------------------------------------------------------------------
@ -281,7 +404,6 @@ def mock_dataset():
class TestDatasetListApiGet:
"""Test suite for DatasetListApi.get() endpoint."""
@patch("controllers.service_api.dataset.dataset.marshal")
@patch("controllers.service_api.dataset.dataset.create_plugin_provider_manager")
@patch("controllers.service_api.dataset.dataset.current_user")
@patch("controllers.service_api.dataset.dataset.DatasetService")
@ -290,7 +412,6 @@ class TestDatasetListApiGet:
mock_dataset_svc,
mock_current_user,
mock_provider_mgr,
mock_marshal,
app: Flask,
mock_tenant,
):
@ -298,42 +419,79 @@ class TestDatasetListApiGet:
mock_current_user.__class__ = Account
mock_current_user.current_tenant_id = mock_tenant.id
mock_dataset_svc.get_datasets.return_value = ([Mock()], 1)
mock_dataset_svc.get_datasets.return_value = ([make_dataset()], 1)
mock_configs = Mock()
mock_configs.get_models.return_value = []
mock_provider_mgr.return_value.get_configurations.return_value = mock_configs
mock_marshal.return_value = [{"indexing_technique": "economy", "embedding_model_provider": None}]
with app.test_request_context("/datasets?page=1&limit=20", method="GET"):
api = DatasetListApi()
response, status = api.get(tenant_id=mock_tenant.id)
assert status == 200
assert "data" in response
assert "total" in response
assert set(response) == {"data", "has_more", "limit", "total", "page"}
assert response["has_more"] is False
assert response["limit"] == 20
assert response["total"] == 1
assert response["page"] == 1
assert len(response["data"]) == 1
assert_dataset_detail_shape(response["data"][0])
class TestDatasetListApiPost:
"""Test suite for DatasetListApi.post() endpoint."""
@patch("controllers.service_api.dataset.dataset.marshal")
@patch("controllers.service_api.dataset.dataset.create_plugin_provider_manager")
@patch("controllers.service_api.dataset.dataset.current_user")
@patch("controllers.service_api.dataset.dataset.DatasetService")
def test_create_dataset_success(
def test_list_datasets_preserves_repeated_tag_ids(
self,
mock_dataset_svc,
mock_current_user,
mock_marshal,
mock_provider_mgr,
app: Flask,
mock_tenant,
):
from controllers.service_api.dataset.dataset import DatasetListApi
mock_current_user.__class__ = Account
mock_dataset_svc.create_empty_dataset.return_value = Mock()
mock_marshal.return_value = {"id": "ds-1", "name": "New Dataset"}
mock_current_user.current_tenant_id = mock_tenant.id
mock_dataset_svc.get_datasets.return_value = ([make_dataset()], 1)
mock_configs = Mock()
mock_configs.get_models.return_value = []
mock_provider_mgr.return_value.get_configurations.return_value = mock_configs
with app.test_request_context("/datasets?tag_ids=tag-a&tag_ids=tag-b", method="GET"):
api = DatasetListApi()
response, status = api.get(tenant_id=mock_tenant.id)
assert status == 200
assert response["total"] == 1
mock_dataset_svc.get_datasets.assert_called_once_with(
1,
20,
mock_tenant.id,
mock_current_user,
None,
["tag-a", "tag-b"],
False,
)
class TestDatasetListApiPost:
"""Test suite for DatasetListApi.post() endpoint."""
@patch("controllers.service_api.dataset.dataset.current_user")
@patch("controllers.service_api.dataset.dataset.DatasetService")
def test_create_dataset_success(
self,
mock_dataset_svc,
mock_current_user,
app: Flask,
mock_tenant,
):
from controllers.service_api.dataset.dataset import DatasetListApi
mock_current_user.__class__ = Account
mock_dataset_svc.create_empty_dataset.return_value = make_dataset(name="New Dataset")
with app.test_request_context(
"/datasets",
@ -344,6 +502,8 @@ class TestDatasetListApiPost:
response, status = _unwrap(api.post)(api, tenant_id=mock_tenant.id)
assert status == 200
assert_dataset_detail_shape(response)
assert response["name"] == "New Dataset"
mock_dataset_svc.create_empty_dataset.assert_called_once()
@patch("controllers.service_api.dataset.dataset.current_user")
@ -379,7 +539,6 @@ class TestDatasetApiGet:
"""Test suite for DatasetApi.get() endpoint."""
@patch("controllers.service_api.dataset.dataset.DatasetPermissionService")
@patch("controllers.service_api.dataset.dataset.marshal")
@patch("controllers.service_api.dataset.dataset.create_plugin_provider_manager")
@patch("controllers.service_api.dataset.dataset.current_user")
@patch("controllers.service_api.dataset.dataset.DatasetService")
@ -388,7 +547,6 @@ class TestDatasetApiGet:
mock_dataset_svc,
mock_current_user,
mock_provider_mgr,
mock_marshal,
mock_perm_svc,
app: Flask,
mock_dataset,
@ -404,11 +562,43 @@ class TestDatasetApiGet:
mock_configs.get_models.return_value = []
mock_provider_mgr.return_value.get_configurations.return_value = mock_configs
mock_marshal.return_value = {
"indexing_technique": "economy",
"embedding_model_provider": None,
"permission": "only_me",
}
with app.test_request_context(
f"/datasets/{mock_dataset.id}",
method="GET",
):
api = DatasetApi()
response, status = api.get(_=mock_dataset.tenant_id, dataset_id=mock_dataset.id)
assert status == 200
assert_dataset_detail_shape(response)
assert response["embedding_available"] is True
assert response["retrieval_model_dict"]["search_method"] == "keyword_search"
@patch("controllers.service_api.dataset.dataset.DatasetPermissionService")
@patch("controllers.service_api.dataset.dataset.create_plugin_provider_manager")
@patch("controllers.service_api.dataset.dataset.current_user")
@patch("controllers.service_api.dataset.dataset.DatasetService")
def test_get_dataset_partial_members_shape(
self,
mock_dataset_svc,
mock_current_user,
mock_provider_mgr,
mock_perm_svc,
app: Flask,
mock_dataset,
):
from controllers.service_api.dataset.dataset import DatasetApi
mock_dataset.permission = "partial_members"
mock_dataset_svc.get_dataset.return_value = mock_dataset
mock_dataset_svc.check_dataset_permission.return_value = None
mock_current_user.__class__ = Account
mock_current_user.current_tenant_id = mock_dataset.tenant_id
mock_perm_svc.get_dataset_partial_member_list.return_value = ["user-1", "user-2"]
mock_configs = Mock()
mock_configs.get_models.return_value = []
mock_provider_mgr.return_value.get_configurations.return_value = mock_configs
with app.test_request_context(
f"/datasets/{mock_dataset.id}",
@ -418,7 +608,45 @@ class TestDatasetApiGet:
response, status = api.get(_=mock_dataset.tenant_id, dataset_id=mock_dataset.id)
assert status == 200
assert response["embedding_available"] is True
assert_dataset_detail_shape(response, with_partial_members=True)
assert response["partial_member_list"] == ["user-1", "user-2"]
@patch("controllers.service_api.dataset.dataset.DatasetPermissionService")
@patch("controllers.service_api.dataset.dataset.create_plugin_provider_manager")
@patch("controllers.service_api.dataset.dataset.current_user")
@patch("controllers.service_api.dataset.dataset.DatasetService")
def test_get_dataset_uses_default_external_retrieval_model(
self,
mock_dataset_svc,
mock_current_user,
mock_provider_mgr,
mock_perm_svc,
app: Flask,
mock_dataset,
):
from controllers.service_api.dataset.dataset import DatasetApi
mock_dataset.retrieval_model = None
mock_dataset_svc.get_dataset.return_value = mock_dataset
mock_dataset_svc.check_dataset_permission.return_value = None
mock_current_user.__class__ = Account
mock_current_user.current_tenant_id = mock_dataset.tenant_id
mock_configs = Mock()
mock_configs.get_models.return_value = []
mock_provider_mgr.return_value.get_configurations.return_value = mock_configs
with app.test_request_context(f"/datasets/{mock_dataset.id}", method="GET"):
api = DatasetApi()
response, status = api.get(_=mock_dataset.tenant_id, dataset_id=mock_dataset.id)
assert status == 200
assert_dataset_detail_shape(response)
assert response["external_retrieval_model"] == {
"top_k": 2,
"score_threshold": 0.0,
"score_threshold_enabled": None,
}
@patch("controllers.service_api.dataset.dataset.DatasetService")
def test_get_dataset_not_found(self, mock_dataset_svc, app, mock_dataset):
@ -457,6 +685,58 @@ class TestDatasetApiGet:
api.get(_=mock_dataset.tenant_id, dataset_id=mock_dataset.id)
class TestDatasetApiPatch:
"""Test suite for DatasetApi.patch() endpoint."""
@patch("controllers.service_api.dataset.dataset.DatasetPermissionService")
@patch("controllers.service_api.dataset.dataset.current_user")
@patch("controllers.service_api.dataset.dataset.DatasetService")
def test_patch_dataset_success_shape(
self,
mock_dataset_svc,
mock_current_user,
mock_perm_svc,
app: Flask,
mock_dataset,
):
from controllers.service_api.dataset.dataset import DatasetApi
updated_dataset = make_dataset(id=mock_dataset.id, tenant_id=mock_dataset.tenant_id, name="Updated Dataset")
mock_dataset_svc.get_dataset.return_value = mock_dataset
mock_dataset_svc.update_dataset.return_value = updated_dataset
mock_perm_svc.check_permission.return_value = None
mock_perm_svc.get_dataset_partial_member_list.return_value = ["user-1"]
mock_current_user.__class__ = Account
mock_current_user.current_tenant_id = mock_dataset.tenant_id
payload = {
"name": "Updated Dataset",
"permission": "partial_members",
"partial_member_list": [{"user_id": "user-1", "role": "editor"}],
}
with app.test_request_context(
f"/datasets/{mock_dataset.id}",
method="PATCH",
json=payload,
):
api = DatasetApi()
response, status = _unwrap(api.patch)(api, _=mock_dataset.tenant_id, dataset_id=mock_dataset.id)
assert status == 200
assert_dataset_detail_shape(response, with_partial_members=True)
assert response["name"] == "Updated Dataset"
assert response["partial_member_list"] == ["user-1"]
mock_dataset_svc.update_dataset.assert_called_once()
_, update_data, _ = mock_dataset_svc.update_dataset.call_args.args
assert update_data["name"] == "Updated Dataset"
assert update_data["permission"] == "partial_members"
mock_perm_svc.update_partial_member_list.assert_called_once_with(
mock_dataset.tenant_id,
mock_dataset.id,
[{"user_id": "user-1", "role": "editor"}],
)
class TestDatasetApiDelete:
"""Test suite for DatasetApi.delete() endpoint."""
@ -715,7 +995,7 @@ class TestDatasetTagsApiGet:
mock_current_user.__class__ = Account
mock_current_user.current_tenant_id = "tenant-1"
mock_tag = SimpleNamespace(id="tag-1", name="Test Tag", type="knowledge", binding_count="0")
mock_tag = make_tag(id="tag-1", name="Test Tag", binding_count=0)
mock_tag_svc.get_tags.return_value = [mock_tag]
with app.test_request_context("/datasets/tags", method="GET"):
@ -723,10 +1003,9 @@ class TestDatasetTagsApiGet:
response, status = api.get(_=None)
assert status == 200
assert len(response) == 1
assert response == [{"id": "tag-1", "name": "Test Tag", "type": "knowledge", "binding_count": "0"}]
mock_tag_svc.get_tags.assert_called_once_with("knowledge", "tenant-1")
@pytest.mark.skip(reason="Production bug: DataSetTag.binding_count is str|None but DB COUNT() returns int")
@patch("controllers.service_api.dataset.dataset.current_user")
def test_list_tags_from_db(
self,
@ -762,12 +1041,13 @@ class TestDatasetTagsApiGet:
assert status == 200
assert any(t["name"] == "Integration Tag" for t in response)
assert all(set(t) == {"id", "name", "type", "binding_count"} for t in response)
assert all(isinstance(t["binding_count"], str) for t in response)
class TestDatasetTagsApiPost:
"""Test suite for DatasetTagsApi.post() endpoint."""
@pytest.mark.skip(reason="Production bug: DataSetTag.binding_count is str|None but dataset.py passes int 0")
@patch("controllers.service_api.dataset.dataset.TagService")
@patch("controllers.service_api.dataset.dataset.current_user")
def test_create_tag_success(
@ -781,7 +1061,7 @@ class TestDatasetTagsApiPost:
mock_current_user.__class__ = Account
mock_current_user.has_edit_permission = True
mock_current_user.is_dataset_editor = True
mock_tag = SimpleNamespace(id="tag-new", name="New Tag", type="knowledge")
mock_tag = make_tag(id="tag-new", name="New Tag")
mock_tag_svc.save_tags.return_value = mock_tag
with app.test_request_context(
@ -793,7 +1073,7 @@ class TestDatasetTagsApiPost:
response, status = api.post(_=None)
assert status == 200
assert response["name"] == "New Tag"
assert response == {"id": "tag-new", "name": "New Tag", "type": "knowledge", "binding_count": "0"}
mock_tag_svc.save_tags.assert_called_once()
@patch("controllers.service_api.dataset.dataset.current_user")
@ -817,7 +1097,6 @@ class TestDatasetTagsApiPost:
class TestDatasetTagsApiPatch:
"""Test suite for DatasetTagsApi.patch() endpoint."""
@pytest.mark.skip(reason="Production bug: DataSetTag.binding_count is str|None but dataset.py passes int 0")
@patch("controllers.service_api.dataset.dataset.TagService")
@patch("controllers.service_api.dataset.dataset.service_api_ns")
@patch("controllers.service_api.dataset.dataset.current_user")
@ -834,7 +1113,7 @@ class TestDatasetTagsApiPatch:
mock_current_user.has_edit_permission = True
mock_current_user.is_dataset_editor = True
mock_tag = SimpleNamespace(id="tag-1", name="Updated Tag", type="knowledge")
mock_tag = make_tag(id="tag-1", name="Updated Tag")
mock_tag_svc.update_tags.return_value = mock_tag
mock_tag_svc.get_tag_binding_count.return_value = 5
mock_service_api_ns.payload = {"name": "Updated Tag", "tag_id": "tag-1"}
@ -848,8 +1127,11 @@ class TestDatasetTagsApiPatch:
response, status = api.patch(_=None)
assert status == 200
assert response["name"] == "Updated Tag"
mock_tag_svc.update_tags.assert_called_once_with({"name": "Updated Tag", "type": "knowledge"}, "tag-1")
assert response == {"id": "tag-1", "name": "Updated Tag", "type": "knowledge", "binding_count": "5"}
mock_tag_svc.update_tags.assert_called_once()
update_payload, tag_id = mock_tag_svc.update_tags.call_args.args
assert update_payload.name == "Updated Tag"
assert tag_id == "tag-1"
@patch("controllers.service_api.dataset.dataset.current_user")
def test_update_tag_forbidden(self, mock_current_user, app: Flask):
@ -984,7 +1266,7 @@ class TestDatasetTagBindingApiPost:
from services.tag_service import TagBindingCreatePayload
mock_tag_svc.save_tag_binding.assert_called_once_with(
TagBindingCreatePayload(tag_ids=["tag-1"], target_id="ds-1", type="knowledge")
TagBindingCreatePayload(tag_ids=["tag-1"], target_id="ds-1", type=TagType.KNOWLEDGE)
)
@patch("controllers.service_api.dataset.dataset.current_user")
@ -1035,7 +1317,7 @@ class TestDatasetTagUnbindingApiPost:
from services.tag_service import TagBindingDeletePayload
mock_tag_svc.delete_tag_binding.assert_called_once_with(
TagBindingDeletePayload(tag_ids=["tag-1"], target_id="ds-1", type="knowledge")
TagBindingDeletePayload(tag_ids=["tag-1"], target_id="ds-1", type=TagType.KNOWLEDGE)
)
@patch("controllers.service_api.dataset.dataset.TagService")
@ -1065,7 +1347,7 @@ class TestDatasetTagUnbindingApiPost:
from services.tag_service import TagBindingDeletePayload
mock_tag_svc.delete_tag_binding.assert_called_once_with(
TagBindingDeletePayload(tag_ids=["tag-1"], target_id="ds-1", type="knowledge")
TagBindingDeletePayload(tag_ids=["tag-1"], target_id="ds-1", type=TagType.KNOWLEDGE)
)
@patch("controllers.service_api.dataset.dataset.current_user")

View File

@ -1,4 +1,6 @@
import datetime
import json
from contextlib import ExitStack
from unittest.mock import MagicMock, PropertyMock, patch
import pytest
@ -31,8 +33,9 @@ from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
from core.provider_manager import ProviderManager
from core.rag.index_processor.constant.index_type import IndexStructureType
from extensions.storage.storage_type import StorageType
from models.enums import CreatorUserRole
from models.model import ApiToken, UploadFile
from models.dataset import Dataset, DatasetQuery, Document
from models.enums import CreatorUserRole, DataSourceType, DocumentCreatedFrom, IndexingStatus
from models.model import ApiToken, App, AppMode, IconType, UploadFile
from services.dataset_service import DatasetPermissionService, DatasetService
@ -42,18 +45,107 @@ def unwrap(func):
return func
class TestDatasetList:
def _mock_dataset_dict(self, **overrides):
base = {
"id": "ds-1",
"indexing_technique": "economy",
"embedding_model": None,
"embedding_model_provider": None,
"permission": "only_me",
}
base.update(overrides)
return base
@pytest.fixture(autouse=True)
def dataset_model_property_defaults():
properties: dict[str, object] = {
"app_count": 0,
"document_count": 0,
"word_count": 0,
"author_name": None,
"tags": [],
"doc_form": None,
"external_knowledge_info": None,
"doc_metadata": [],
"is_published": False,
"total_documents": 0,
"total_available_documents": 0,
}
with ExitStack() as stack:
for name, value in properties.items():
property_mock = stack.enter_context(patch.object(Dataset, name, new_callable=PropertyMock))
property_mock.return_value = value
yield
def make_dataset(**overrides) -> Dataset:
base = {
"id": "ds-1",
"tenant_id": "tenant-1",
"name": "Dataset",
"description": "desc",
"provider": "vendor",
"permission": "only_me",
"data_source_type": None,
"indexing_technique": "economy",
"created_by": "account-1",
"created_at": datetime.datetime(2024, 1, 1, 12, 0, 0, tzinfo=datetime.UTC),
"updated_by": None,
"updated_at": datetime.datetime(2024, 1, 1, 12, 0, 0, tzinfo=datetime.UTC),
"embedding_model": None,
"embedding_model_provider": None,
"retrieval_model": None,
"summary_index_setting": None,
"built_in_field_enabled": False,
"pipeline_id": None,
"runtime_mode": "general",
"chunk_structure": None,
"icon_info": None,
"enable_api": False,
"is_multimodal": False,
}
base.update(overrides)
return Dataset(**base)
def make_related_app(**overrides) -> App:
base = {
"id": "app-1",
"tenant_id": "tenant-1",
"name": "App",
"description": "desc",
"mode": AppMode.CHAT,
"icon_type": IconType.EMOJI,
"icon": "🤖",
"icon_background": "#fff",
"app_model_config_id": None,
"workflow_id": None,
"enable_site": False,
"enable_api": False,
"created_by": "account-1",
}
base.update(overrides)
return App(**base)
def make_document_status(**overrides) -> Document:
base = {
"id": "doc-1",
"tenant_id": "tenant-1",
"dataset_id": "dataset-1",
"position": 1,
"data_source_type": DataSourceType.UPLOAD_FILE,
"batch": "batch-1",
"name": "doc.txt",
"created_from": DocumentCreatedFrom.WEB,
"created_by": "account-1",
"indexing_status": IndexingStatus.COMPLETED,
"enabled": True,
"archived": False,
"processing_started_at": None,
"parsing_completed_at": None,
"cleaning_completed_at": None,
"splitting_completed_at": None,
"completed_at": None,
"paused_at": None,
"error": None,
"stopped_at": None,
}
base.update(overrides)
return Document(**base)
class TestDatasetList:
def _mock_user(self):
user = MagicMock()
user.is_dataset_editor = True
@ -64,8 +156,7 @@ class TestDatasetList:
method = unwrap(api.get)
current_user = self._mock_user()
datasets = [MagicMock()]
marshaled = [self._mock_dataset_dict()]
datasets = [make_dataset(icon_info={"icon": "📙", "icon_type": "emoji"})]
with app.test_request_context("/datasets"):
with (
@ -78,10 +169,6 @@ class TestDatasetList:
"get_datasets",
return_value=(datasets, 1),
),
patch(
"controllers.console.datasets.datasets.marshal",
return_value=marshaled,
),
patch.object(
ProviderManager,
"get_configurations",
@ -93,14 +180,19 @@ class TestDatasetList:
assert status == 200
assert resp["total"] == 1
assert resp["data"][0]["embedding_available"] is True
assert resp["data"][0]["icon_info"] == {
"icon": "📙",
"icon_background": None,
"icon_type": "emoji",
"icon_url": None,
}
def test_get_with_ids_filter(self, app: Flask):
api = DatasetListApi()
method = unwrap(api.get)
current_user = self._mock_user()
datasets = [MagicMock()]
marshaled = [self._mock_dataset_dict()]
datasets = [make_dataset()]
with app.test_request_context("/datasets?ids=1&ids=2"):
with (
@ -113,10 +205,6 @@ class TestDatasetList:
"get_datasets_by_ids",
return_value=(datasets, 2),
) as by_ids_mock,
patch(
"controllers.console.datasets.datasets.marshal",
return_value=marshaled,
),
patch.object(
ProviderManager,
"get_configurations",
@ -134,8 +222,7 @@ class TestDatasetList:
method = unwrap(api.get)
current_user = self._mock_user()
datasets = [MagicMock()]
marshaled = [self._mock_dataset_dict()]
datasets = [make_dataset()]
with app.test_request_context("/datasets?tag_ids=tag1"):
with (
@ -148,10 +235,6 @@ class TestDatasetList:
"get_datasets",
return_value=(datasets, 1),
),
patch(
"controllers.console.datasets.datasets.marshal",
return_value=marshaled,
),
patch.object(
ProviderManager,
"get_configurations",
@ -167,9 +250,8 @@ class TestDatasetList:
method = unwrap(api.get)
current_user = self._mock_user()
datasets = [MagicMock()]
marshaled = [
self._mock_dataset_dict(
datasets = [
make_dataset(
indexing_technique="high_quality",
embedding_model="text-embed",
embedding_model_provider="openai",
@ -190,10 +272,6 @@ class TestDatasetList:
"get_datasets",
return_value=(datasets, 1),
),
patch(
"controllers.console.datasets.datasets.marshal",
return_value=marshaled,
),
patch.object(
ProviderManager,
"get_configurations",
@ -209,8 +287,7 @@ class TestDatasetList:
method = unwrap(api.get)
current_user = self._mock_user()
datasets = [MagicMock()]
marshaled = [self._mock_dataset_dict(permission="partial_members")]
datasets = [make_dataset(permission="partial_members")]
with app.test_request_context("/datasets"):
with (
@ -227,10 +304,6 @@ class TestDatasetList:
"controllers.console.datasets.datasets.db.session.execute",
return_value=MagicMock(all=lambda: [("ds-1", "u1")]),
),
patch(
"controllers.console.datasets.datasets.marshal",
return_value=marshaled,
),
patch.object(
ProviderManager,
"get_configurations",
@ -257,22 +330,7 @@ class TestDatasetListApiPost:
user = MagicMock()
user.is_dataset_editor = True
dataset = MagicMock()
# ---- minimal required fields for marshal ----
dataset.embedding_available = True
dataset.built_in_field_enabled = False
dataset.is_published = False
dataset.enable_api = False
dataset.is_multimodal = False
dataset.documents = []
dataset.retrieval_model_dict = {}
dataset.tags = []
dataset.external_knowledge_info = None
dataset.external_retrieval_model = None
dataset.doc_metadata = []
dataset.icon_info = None
dataset.summary_index_setting = MagicMock()
dataset.summary_index_setting.enable = False
dataset = make_dataset(name=payload["name"], description=payload["description"])
with (
app.test_request_context("/datasets", json=payload),
@ -381,26 +439,7 @@ class TestDatasetApiGet:
user = MagicMock()
tenant_id = "tenant-1"
dataset = MagicMock()
dataset.id = dataset_id
dataset.indexing_technique = "economy"
dataset.embedding_model_provider = None
dataset.embedding_available = True
dataset.built_in_field_enabled = False
dataset.is_published = False
dataset.enable_api = False
dataset.is_multimodal = False
dataset.documents = []
dataset.retrieval_model_dict = {}
dataset.tags = []
dataset.external_knowledge_info = None
dataset.external_retrieval_model = None
dataset.doc_metadata = []
dataset.icon_info = None
dataset.summary_index_setting = MagicMock()
dataset.summary_index_setting.enable = False
dataset.permission = "only_me"
dataset = make_dataset(id=dataset_id)
with (
app.test_request_context(f"/datasets/{dataset_id}"),
@ -428,6 +467,42 @@ class TestDatasetApiGet:
assert status == 200
assert data["embedding_available"] is True
def test_get_uses_default_external_retrieval_model(self, app: Flask):
api = DatasetApi()
method = unwrap(api.get)
dataset_id = "dataset-id"
dataset = make_dataset(id=dataset_id, retrieval_model=None)
with (
app.test_request_context(f"/datasets/{dataset_id}"),
patch(
"controllers.console.datasets.datasets.current_account_with_tenant",
return_value=(MagicMock(), "tenant"),
),
patch.object(
DatasetService,
"get_dataset",
return_value=dataset,
),
patch.object(
DatasetService,
"check_dataset_permission",
return_value=None,
),
patch("controllers.console.datasets.datasets.create_plugin_provider_manager") as provider_manager_mock,
):
provider_manager_mock.return_value.get_configurations.return_value.get_models.return_value = []
data, status = method(api, dataset_id)
assert status == 200
assert data["external_retrieval_model"] == {
"top_k": 2,
"score_threshold": 0.0,
"score_threshold_enabled": None,
}
def test_get_dataset_not_found(self, app: Flask):
api = DatasetApi()
method = unwrap(api.get)
@ -484,27 +559,12 @@ class TestDatasetApiGet:
user = MagicMock()
tenant_id = "tenant-1"
dataset = MagicMock()
dataset.id = dataset_id
dataset.indexing_technique = "high_quality"
dataset.embedding_model = "text-embedding"
dataset.embedding_model_provider = "openai"
dataset.embedding_available = True
dataset.built_in_field_enabled = False
dataset.is_published = False
dataset.enable_api = False
dataset.is_multimodal = False
dataset.documents = []
dataset.retrieval_model_dict = {}
dataset.tags = []
dataset.external_knowledge_info = None
dataset.external_retrieval_model = None
dataset.doc_metadata = []
dataset.icon_info = None
dataset.summary_index_setting = MagicMock()
dataset.summary_index_setting.enable = False
dataset.permission = "only_me"
dataset = make_dataset(
id=dataset_id,
indexing_technique="high_quality",
embedding_model="text-embedding",
embedding_model_provider="openai",
)
with (
app.test_request_context(f"/datasets/{dataset_id}"),
@ -537,28 +597,9 @@ class TestDatasetApiGet:
dataset_id = "dataset-id"
dataset = MagicMock()
dataset.id = dataset_id
dataset.indexing_technique = "economy"
dataset.embedding_model_provider = None
dataset.permission = "partial_members"
dataset = make_dataset(id=dataset_id, permission="partial_members")
dataset.embedding_available = True
dataset.built_in_field_enabled = False
dataset.is_published = False
dataset.enable_api = False
dataset.is_multimodal = False
dataset.documents = []
dataset.retrieval_model_dict = {}
dataset.tags = []
dataset.external_knowledge_info = None
dataset.external_retrieval_model = None
dataset.doc_metadata = []
dataset.icon_info = None
dataset.summary_index_setting = MagicMock()
dataset.summary_index_setting.enable = False
partial_members = [{"id": "u1"}, {"id": "u2"}]
partial_members = ["u1", "u2"]
with (
app.test_request_context(f"/datasets/{dataset_id}"),
@ -605,27 +646,7 @@ class TestDatasetApiPatch:
user = MagicMock()
tenant_id = "tenant-1"
dataset = MagicMock()
dataset.id = dataset_id
dataset.tenant_id = tenant_id
dataset.permission = "only_me"
dataset.indexing_technique = "economy"
dataset.embedding_model_provider = None
dataset.embedding_available = True
dataset.built_in_field_enabled = False
dataset.is_published = False
dataset.enable_api = False
dataset.is_multimodal = False
dataset.documents = []
dataset.retrieval_model_dict = {}
dataset.tags = []
dataset.external_knowledge_info = None
dataset.external_retrieval_model = None
dataset.doc_metadata = []
dataset.icon_info = None
dataset.summary_index_setting = MagicMock()
dataset.summary_index_setting.enable = False
dataset = make_dataset(id=dataset_id, tenant_id=tenant_id)
with (
app.test_request_context(f"/datasets/{dataset_id}"),
@ -713,29 +734,10 @@ class TestDatasetApiPatch:
payload = {
"permission": "partial_members",
"partial_member_list": [{"id": "u1"}, {"id": "u2"}],
"partial_member_list": [{"user_id": "u1"}, {"user_id": "u2"}],
}
dataset = MagicMock()
dataset.id = dataset_id
dataset.permission = "partial_members"
dataset.indexing_technique = "economy"
dataset.embedding_model_provider = None
dataset.embedding_available = True
dataset.built_in_field_enabled = False
dataset.is_published = False
dataset.enable_api = False
dataset.is_multimodal = False
dataset.documents = []
dataset.retrieval_model_dict = {}
dataset.tags = []
dataset.external_knowledge_info = None
dataset.external_retrieval_model = None
dataset.doc_metadata = []
dataset.icon_info = None
dataset.summary_index_setting = MagicMock()
dataset.summary_index_setting.enable = False
dataset = make_dataset(id=dataset_id, permission="partial_members")
with (
app.test_request_context(f"/datasets/{dataset_id}"),
@ -767,12 +769,12 @@ class TestDatasetApiPatch:
patch.object(
DatasetPermissionService,
"get_dataset_partial_member_list",
return_value=payload["partial_member_list"],
return_value=["u1", "u2"],
),
):
result, _ = method(api, dataset_id)
assert result["partial_member_list"] == payload["partial_member_list"]
assert result["partial_member_list"] == ["u1", "u2"]
def test_patch_clear_partial_members(self, app: Flask):
api = DatasetApi()
@ -784,26 +786,7 @@ class TestDatasetApiPatch:
"permission": "only_me",
}
dataset = MagicMock()
dataset.id = dataset_id
dataset.permission = "only_me"
dataset.indexing_technique = "economy"
dataset.embedding_model_provider = None
dataset.embedding_available = True
dataset.built_in_field_enabled = False
dataset.is_published = False
dataset.enable_api = False
dataset.is_multimodal = False
dataset.documents = []
dataset.retrieval_model_dict = {}
dataset.tags = []
dataset.external_knowledge_info = None
dataset.external_retrieval_model = None
dataset.doc_metadata = []
dataset.icon_info = None
dataset.summary_index_setting = MagicMock()
dataset.summary_index_setting.enable = False
dataset = make_dataset(id=dataset_id)
with (
app.test_request_context(f"/datasets/{dataset_id}"),
@ -984,6 +967,27 @@ class TestDatasetUseCheckApi:
class TestDatasetQueryApi:
def _query_record(self, index: int = 1) -> DatasetQuery:
query = DatasetQuery(
dataset_id="dataset-id",
content=json.dumps(
[
{
"content_type": "text_query",
"content": f"question {index}",
"file_info": None,
}
]
),
source="hit_testing",
source_app_id=None,
created_by_role=CreatorUserRole.ACCOUNT,
created_by=f"account-{index}",
)
query.id = f"query-{index}"
query.created_at = datetime.datetime(2024, 1, index, 12, 0, 0, tzinfo=datetime.UTC)
return query
def test_get_queries_success(self, app: Flask):
api = DatasetQueryApi()
method = unwrap(api.get)
@ -995,7 +999,7 @@ class TestDatasetQueryApi:
dataset = MagicMock()
dataset.id = dataset_id
queries = [MagicMock(), MagicMock()]
queries = [self._query_record(1), self._query_record(2)]
with (
app.test_request_context("/datasets/queries?page=1&limit=20"),
@ -1027,6 +1031,21 @@ class TestDatasetQueryApi:
assert response["limit"] == 20
assert response["has_more"] is False
assert len(response["data"]) == 2
assert response["data"][0] == {
"id": "query-1",
"queries": [
{
"content_type": "text_query",
"content": "question 1",
"file_info": None,
}
],
"source": "hit_testing",
"source_app_id": None,
"created_by_role": "account",
"created_by": "account-1",
"created_at": 1704110400,
}
def test_get_queries_dataset_not_found(self, app: Flask):
api = DatasetQueryApi()
@ -1089,7 +1108,7 @@ class TestDatasetQueryApi:
dataset = MagicMock()
dataset.id = dataset_id
queries = [MagicMock() for _ in range(20)]
queries = [self._query_record(index) for index in range(1, 21)]
with (
app.test_request_context("/datasets/queries?page=1&limit=20"),
@ -1338,8 +1357,8 @@ class TestDatasetRelatedAppListApi:
dataset = MagicMock()
dataset.id = "dataset-1"
app1 = MagicMock()
app2 = MagicMock()
app1 = make_related_app(id="app-1", name="App 1")
app2 = make_related_app(id="app-2", name="App 2")
join1 = MagicMock(app=app1)
join2 = MagicMock(app=app2)
@ -1367,7 +1386,28 @@ class TestDatasetRelatedAppListApi:
assert status == 200
assert response["total"] == 2
assert response["data"] == [app1, app2]
assert response["data"] == [
{
"id": "app-1",
"name": "App 1",
"description": "desc",
"mode": "chat",
"icon_type": "emoji",
"icon": "🤖",
"icon_background": "#fff",
"icon_url": None,
},
{
"id": "app-2",
"name": "App 2",
"description": "desc",
"mode": "chat",
"icon_type": "emoji",
"icon": "🤖",
"icon_background": "#fff",
"icon_url": None,
},
]
def test_get_dataset_not_found(self, app: Flask):
api = DatasetRelatedAppListApi()
@ -1418,7 +1458,7 @@ class TestDatasetRelatedAppListApi:
dataset = MagicMock()
dataset.id = "dataset-1"
app1 = MagicMock()
app1 = make_related_app()
join1 = MagicMock(app=app1)
join2 = MagicMock(app=None)
@ -1446,7 +1486,18 @@ class TestDatasetRelatedAppListApi:
assert status == 200
assert response["total"] == 1
assert response["data"] == [app1]
assert response["data"] == [
{
"id": "app-1",
"name": "App",
"description": "desc",
"mode": "chat",
"icon_type": "emoji",
"icon": "🤖",
"icon_background": "#fff",
"icon_url": None,
}
]
class TestDatasetIndexingStatusApi:
@ -1652,7 +1703,7 @@ class TestDatasetApiKeyApi:
method(api)
assert exc_info.value.code == 400
assert exc_info.value.data == {
assert vars(exc_info.value)["data"] == {
"message": "Cannot create more than 10 API keys for this resource type.",
"custom": "max_keys_exceeded",
}
@ -1833,7 +1884,7 @@ class TestDatasetErrorDocs:
method = unwrap(api.get)
dataset = MagicMock()
error_doc = MagicMock()
error_doc = make_document_status(id="error-doc", indexing_status=IndexingStatus.ERROR, error="failed")
with (
app.test_request_context("/"),
@ -1872,7 +1923,7 @@ class TestDatasetPermissionUserListApi:
method = unwrap(api.get)
dataset = MagicMock()
users = [{"id": "u1"}, {"id": "u2"}]
users = ["u1", "u2"]
with (
app.test_request_context("/"),
@ -1929,7 +1980,7 @@ class TestDatasetAutoDisableLogApi:
method = unwrap(api.get)
dataset = MagicMock()
logs = [{"reason": "quota"}]
logs = {"document_ids": ["doc-1"], "count": 1}
with (
app.test_request_context("/"),

View File

@ -574,16 +574,10 @@ export const processRule = {
/**
* Get mock dataset retrieval settings by vector type
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get10 = oc
.route({
deprecated: true,
description:
'Get mock dataset retrieval settings by vector type\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get mock dataset retrieval settings by vector type',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsRetrievalSettingByVectorType',
@ -599,16 +593,10 @@ export const byVectorType = {
/**
* Get dataset retrieval settings
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get11 = oc
.route({
deprecated: true,
description:
'Get dataset retrieval settings\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get dataset retrieval settings',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsRetrievalSetting',
@ -643,16 +631,10 @@ export const apiKeys2 = {
/**
* Get dataset auto disable logs
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get12 = oc
.route({
deprecated: true,
description:
'Get dataset auto disable logs\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get dataset auto disable logs',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetIdAutoDisableLogs',
@ -1522,16 +1504,10 @@ export const documents = {
/**
* Get dataset error documents
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get27 = oc
.route({
deprecated: true,
description:
'Get dataset error documents\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get dataset error documents',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetIdErrorDocs',
@ -1607,16 +1583,10 @@ export const hitTesting = {
/**
* Get dataset indexing status
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get28 = oc
.route({
deprecated: true,
description:
'Get dataset indexing status\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get dataset indexing status',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetIdIndexingStatus',
@ -1739,16 +1709,10 @@ export const notion2 = {
/**
* Get dataset permission user list
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get31 = oc
.route({
deprecated: true,
description:
'Get dataset permission user list\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get dataset permission user list',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetIdPermissionPartUsers',
@ -1764,16 +1728,10 @@ export const permissionPartUsers = {
/**
* Get dataset query history
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get32 = oc
.route({
deprecated: true,
description:
'Get dataset query history\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get dataset query history',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetIdQueries',
@ -1789,16 +1747,10 @@ export const queries = {
/**
* Get applications related to dataset
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get33 = oc
.route({
deprecated: true,
description:
'Get applications related to dataset\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get applications related to dataset',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetIdRelatedApps',
@ -1870,16 +1822,10 @@ export const delete9 = oc
/**
* Get dataset details
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get35 = oc
.route({
deprecated: true,
description:
'Get dataset details\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get dataset details',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetId',
@ -2003,16 +1949,10 @@ export const byResourceId = {
/**
* Get list of datasets
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get37 = oc
.route({
deprecated: true,
description:
'Get list of datasets\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get list of datasets',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasets',
@ -2024,16 +1964,10 @@ export const get37 = oc
/**
* Create a new dataset
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const post23 = oc
.route({
deprecated: true,
description:
'Create a new dataset\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Create a new dataset',
inputStructure: 'detailed',
method: 'POST',
operationId: 'postDatasets',

View File

@ -4,6 +4,14 @@ export type ClientOptions = {
baseUrl: `${string}://${string}/console/api` | (string & {})
}
export type DatasetListResponse = {
data: Array<DatasetListItemResponse>
has_more: boolean
limit: number
page: number
total: number
}
export type DatasetCreatePayload = {
description?: string
external_knowledge_api_id?: string | null
@ -14,6 +22,44 @@ export type DatasetCreatePayload = {
provider?: string
}
export type DatasetDetailResponse = {
app_count: number
author_name: string | null
built_in_field_enabled: boolean
chunk_structure: string | null
created_at: number
created_by: string
data_source_type: string | null
description: string | null
doc_form: string | null
doc_metadata: Array<DatasetDocMetadataResponse>
document_count: number
embedding_available?: boolean | null
embedding_model: string | null
embedding_model_provider: string | null
enable_api: boolean
external_knowledge_info: DatasetExternalKnowledgeInfoResponse
external_retrieval_model: DatasetExternalRetrievalModelResponse
icon_info: DatasetIconInfoResponse
id: string
indexing_technique: string | null
is_multimodal: boolean
is_published: boolean
name: string
permission: string
pipeline_id: string | null
provider: string
retrieval_model_dict: DatasetRetrievalModelResponse
runtime_mode: string | null
summary_index_setting: DatasetSummaryIndexSettingResponse
tags: Array<DatasetTagResponse>
total_available_documents: number
total_documents: number
updated_at: number
updated_by: string | null
word_count: number
}
export type ApiBaseUrlResponse = {
api_base_url: string
}
@ -116,6 +162,12 @@ export type IndexingEstimatePayload = {
}
}
export type IndexingEstimateResponse = {
preview: Array<IndexingEstimatePreviewItemResponse>
qa_preview?: Array<IndexingEstimateQaPreviewItemResponse> | null
total_segments: number
}
export type KnowledgeConfig = {
data_source?: DataSource
doc_form?: string
@ -159,6 +211,49 @@ export type NotionEstimatePayload = {
}
}
export type RetrievalSettingResponse = {
retrieval_method: Array<string>
}
export type DatasetDetailWithPartialMembersResponse = {
app_count: number
author_name: string | null
built_in_field_enabled: boolean
chunk_structure: string | null
created_at: number
created_by: string
data_source_type: string | null
description: string | null
doc_form: string | null
doc_metadata: Array<DatasetDocMetadataResponse>
document_count: number
embedding_available?: boolean | null
embedding_model: string | null
embedding_model_provider: string | null
enable_api: boolean
external_knowledge_info: DatasetExternalKnowledgeInfoResponse
external_retrieval_model: DatasetExternalRetrievalModelResponse
icon_info: DatasetIconInfoResponse
id: string
indexing_technique: string | null
is_multimodal: boolean
is_published: boolean
name: string
partial_member_list?: Array<string> | null
permission: string
pipeline_id: string | null
provider: string
retrieval_model_dict: DatasetRetrievalModelResponse
runtime_mode: string | null
summary_index_setting: DatasetSummaryIndexSettingResponse
tags: Array<DatasetTagResponse>
total_available_documents: number
total_documents: number
updated_at: number
updated_by: string | null
word_count: number
}
export type DatasetUpdatePayload = {
description?: string | null
embedding_model?: string | null
@ -190,6 +285,11 @@ export type SimpleResultResponse = {
result: string
}
export type AutoDisableLogsResponse = {
count: number
document_ids: Array<string>
}
export type DocumentBatchDownloadZipPayload = {
document_ids: Array<string>
}
@ -271,6 +371,11 @@ export type ChildChunkUpdatePayload = {
content: string
}
export type ErrorDocsResponse = {
data: Array<DocumentStatusResponse>
total: number
}
export type ExternalHitTestingPayload = {
external_retrieval_model?: {
[key: string]: unknown
@ -295,6 +400,10 @@ export type HitTestingResponse = {
records?: Array<HitTestingRecord>
}
export type DocumentStatusListResponse = {
data: Array<DocumentStatusResponse>
}
export type DatasetMetadataListResponse = {
built_in_field_enabled: boolean
doc_metadata: Array<DatasetMetadataListItemResponse>
@ -315,21 +424,21 @@ export type MetadataUpdatePayload = {
name: string
}
export type DatasetQueryDetail = {
created_at?: {
[key: string]: unknown
}
created_by?: string
created_by_role?: string
id?: string
queries?: DatasetContent
source?: string
source_app_id?: string
export type PartialMemberListResponse = {
data: Array<string>
}
export type RelatedAppList = {
data?: Array<AppDetailKernel>
total?: number
export type DatasetQueryListResponse = {
data: Array<DatasetQueryDetailResponse>
has_more: boolean
limit: number
page: number
total: number
}
export type RelatedAppListResponse = {
data: Array<RelatedAppResponse>
total: number
}
export type DocumentRetryPayload = {
@ -340,8 +449,97 @@ export type UsageCheckResponse = {
is_using: boolean
}
export type DatasetListItemResponse = {
app_count: number
author_name: string | null
built_in_field_enabled: boolean
chunk_structure: string | null
created_at: number
created_by: string
data_source_type: string | null
description: string | null
doc_form: string | null
doc_metadata: Array<DatasetDocMetadataResponse>
document_count: number
embedding_available?: boolean | null
embedding_model: string | null
embedding_model_provider: string | null
enable_api: boolean
external_knowledge_info: DatasetExternalKnowledgeInfoResponse
external_retrieval_model: DatasetExternalRetrievalModelResponse
icon_info: DatasetIconInfoResponse
id: string
indexing_technique: string | null
is_multimodal: boolean
is_published: boolean
name: string
partial_member_list: Array<string>
permission: string
pipeline_id: string | null
provider: string
retrieval_model_dict: DatasetRetrievalModelResponse
runtime_mode: string | null
summary_index_setting: DatasetSummaryIndexSettingResponse
tags: Array<DatasetTagResponse>
total_available_documents: number
total_documents: number
updated_at: number
updated_by: string | null
word_count: number
}
export type DatasetPermissionEnum = 'all_team_members' | 'only_me' | 'partial_members'
export type DatasetDocMetadataResponse = {
id: string
name: string
type: string
}
export type DatasetExternalKnowledgeInfoResponse = {
external_knowledge_api_endpoint: string
external_knowledge_api_id: string
external_knowledge_api_name: string
external_knowledge_id: string
}
export type DatasetExternalRetrievalModelResponse = {
score_threshold: number
score_threshold_enabled?: boolean | null
top_k: number
}
export type DatasetIconInfoResponse = {
icon: string | null
icon_background?: string | null
icon_type: string | null
icon_url?: string | null
}
export type DatasetRetrievalModelResponse = {
reranking_enable: boolean
reranking_mode?: string | null
reranking_model: DatasetRerankingModelResponse
score_threshold?: number | null
score_threshold_enabled: boolean
search_method: string
top_k: number
weights?: DatasetWeightedScoreResponse
}
export type DatasetSummaryIndexSettingResponse = {
enable?: boolean | null
model_name?: string | null
model_provider_name?: string | null
summary_prompt?: string | null
}
export type DatasetTagResponse = {
id: string
name: string
type: string
}
export type DatasetDocMetadata = {
id?: string
name?: string
@ -392,6 +590,17 @@ export type Tag = {
type: string
}
export type IndexingEstimatePreviewItemResponse = {
child_chunks?: Array<string> | null
content: string
summary?: string | null
}
export type IndexingEstimateQaPreviewItemResponse = {
answer: string
question: string
}
export type DataSource = {
info_list: InfoList
}
@ -442,6 +651,21 @@ export type DocumentMetadataResponse = {
value?: string | null
}
export type DocumentStatusResponse = {
cleaning_completed_at: number | null
completed_at: number | null
completed_segments?: number | null
error: string | null
id: string
indexing_status: string
parsing_completed_at: number | null
paused_at: number | null
processing_started_at: number | null
splitting_completed_at: number | null
stopped_at: number | null
total_segments?: number | null
}
export type HitTestingRecord = {
child_chunks?: Array<HitTestingChildChunk>
files?: Array<HitTestingFile>
@ -458,23 +682,36 @@ export type DatasetMetadataListItemResponse = {
type: string
}
export type DatasetContent = {
content?: string
content_type?: string
file_info?: DatasetFileInfo
export type DatasetQueryDetailResponse = {
created_at: number
created_by: string
created_by_role: string
id: string
queries: Array<DatasetQueryContentResponse>
source: string
source_app_id: string | null
}
export type AppDetailKernel = {
description?: string
icon?: string
icon_background?: string
icon_type?: string
icon_url?: {
[key: string]: unknown
}
id?: string
mode?: string
name?: string
export type RelatedAppResponse = {
description: string
icon: string | null
icon_background: string | null
icon_type: string | null
icon_url?: string | null
id: string
mode: string
name: string
}
export type DatasetRerankingModelResponse = {
reranking_model_name?: string | null
reranking_provider_name?: string | null
}
export type DatasetWeightedScoreResponse = {
keyword_setting: DatasetKeywordSettingResponse
vector_setting: DatasetVectorSettingResponse
weight_type: string | null
}
export type DatasetRerankingModel = {
@ -572,13 +809,20 @@ export type HitTestingSegment = {
word_count?: number | null
}
export type DatasetFileInfo = {
extension?: string
id?: string
mime_type?: string
name?: string
size?: number
source_url?: string
export type DatasetQueryContentResponse = {
content: string
content_type: string
file_info?: DatasetQueryFileInfoResponse
}
export type DatasetKeywordSettingResponse = {
keyword_weight: number
}
export type DatasetVectorSettingResponse = {
embedding_model_name: string
embedding_provider_name: string
vector_weight: number
}
export type DatasetKeywordSetting = {
@ -661,6 +905,15 @@ export type HitTestingDocument = {
name?: string | null
}
export type DatasetQueryFileInfoResponse = {
extension: string
id: string
mime_type: string
name: string
size: number
source_url: string
}
export type NotionPage = {
page_icon?: NotionIcon
page_id: string
@ -678,20 +931,18 @@ export type GetDatasetsData = {
body?: never
path?: never
query?: {
ids?: string
include_all?: string
ids?: Array<string>
include_all?: boolean
keyword?: string
limit?: string
page?: string
tag_ids?: string
limit?: number
page?: number
tag_ids?: Array<string>
}
url: '/datasets'
}
export type GetDatasetsResponses = {
200: {
[key: string]: unknown
}
200: DatasetListResponse
}
export type GetDatasetsResponse = GetDatasetsResponses[keyof GetDatasetsResponses]
@ -712,9 +963,7 @@ export type PostDatasetsErrors = {
export type PostDatasetsError = PostDatasetsErrors[keyof PostDatasetsErrors]
export type PostDatasetsResponses = {
201: {
[key: string]: unknown
}
201: DatasetDetailResponse
}
export type PostDatasetsResponse = PostDatasetsResponses[keyof PostDatasetsResponses]
@ -967,9 +1216,7 @@ export type PostDatasetsIndexingEstimateData = {
}
export type PostDatasetsIndexingEstimateResponses = {
200: {
[key: string]: unknown
}
200: IndexingEstimateResponse
}
export type PostDatasetsIndexingEstimateResponse
@ -1066,9 +1313,7 @@ export type GetDatasetsRetrievalSettingData = {
}
export type GetDatasetsRetrievalSettingResponses = {
200: {
[key: string]: unknown
}
200: RetrievalSettingResponse
}
export type GetDatasetsRetrievalSettingResponse
@ -1084,9 +1329,7 @@ export type GetDatasetsRetrievalSettingByVectorTypeData = {
}
export type GetDatasetsRetrievalSettingByVectorTypeResponses = {
200: {
[key: string]: unknown
}
200: RetrievalSettingResponse
}
export type GetDatasetsRetrievalSettingByVectorTypeResponse
@ -1132,7 +1375,7 @@ export type GetDatasetsByDatasetIdError
= GetDatasetsByDatasetIdErrors[keyof GetDatasetsByDatasetIdErrors]
export type GetDatasetsByDatasetIdResponses = {
200: DatasetDetail
200: DatasetDetailWithPartialMembersResponse
}
export type GetDatasetsByDatasetIdResponse
@ -1160,7 +1403,7 @@ export type PatchDatasetsByDatasetIdError
= PatchDatasetsByDatasetIdErrors[keyof PatchDatasetsByDatasetIdErrors]
export type PatchDatasetsByDatasetIdResponses = {
200: DatasetDetail
200: DatasetDetailWithPartialMembersResponse
}
export type PatchDatasetsByDatasetIdResponse
@ -1202,9 +1445,7 @@ export type GetDatasetsByDatasetIdAutoDisableLogsError
= GetDatasetsByDatasetIdAutoDisableLogsErrors[keyof GetDatasetsByDatasetIdAutoDisableLogsErrors]
export type GetDatasetsByDatasetIdAutoDisableLogsResponses = {
200: {
[key: string]: unknown
}
200: AutoDisableLogsResponse
}
export type GetDatasetsByDatasetIdAutoDisableLogsResponse
@ -1985,9 +2226,7 @@ export type GetDatasetsByDatasetIdErrorDocsError
= GetDatasetsByDatasetIdErrorDocsErrors[keyof GetDatasetsByDatasetIdErrorDocsErrors]
export type GetDatasetsByDatasetIdErrorDocsResponses = {
200: {
[key: string]: unknown
}
200: ErrorDocsResponse
}
export type GetDatasetsByDatasetIdErrorDocsResponse
@ -2061,9 +2300,7 @@ export type GetDatasetsByDatasetIdIndexingStatusData = {
}
export type GetDatasetsByDatasetIdIndexingStatusResponses = {
200: {
[key: string]: unknown
}
200: DocumentStatusListResponse
}
export type GetDatasetsByDatasetIdIndexingStatusResponse
@ -2194,9 +2431,7 @@ export type GetDatasetsByDatasetIdPermissionPartUsersError
= GetDatasetsByDatasetIdPermissionPartUsersErrors[keyof GetDatasetsByDatasetIdPermissionPartUsersErrors]
export type GetDatasetsByDatasetIdPermissionPartUsersResponses = {
200: {
[key: string]: unknown
}
200: PartialMemberListResponse
}
export type GetDatasetsByDatasetIdPermissionPartUsersResponse
@ -2212,7 +2447,7 @@ export type GetDatasetsByDatasetIdQueriesData = {
}
export type GetDatasetsByDatasetIdQueriesResponses = {
200: DatasetQueryDetail
200: DatasetQueryListResponse
}
export type GetDatasetsByDatasetIdQueriesResponse
@ -2228,7 +2463,7 @@ export type GetDatasetsByDatasetIdRelatedAppsData = {
}
export type GetDatasetsByDatasetIdRelatedAppsResponses = {
200: RelatedAppList
200: RelatedAppListResponse
}
export type GetDatasetsByDatasetIdRelatedAppsResponse

View File

@ -98,6 +98,13 @@ export const zNotionEstimatePayload = z.object({
process_rule: z.record(z.string(), z.unknown()),
})
/**
* RetrievalSettingResponse
*/
export const zRetrievalSettingResponse = z.object({
retrieval_method: z.array(z.string()),
})
/**
* SimpleResultResponse
*/
@ -105,6 +112,14 @@ export const zSimpleResultResponse = z.object({
result: z.string(),
})
/**
* AutoDisableLogsResponse
*/
export const zAutoDisableLogsResponse = z.object({
count: z.int(),
document_ids: z.array(z.string()),
})
/**
* DocumentBatchDownloadZipPayload
*
@ -220,6 +235,13 @@ export const zMetadataUpdatePayload = z.object({
name: z.string(),
})
/**
* PartialMemberListResponse
*/
export const zPartialMemberListResponse = z.object({
data: z.array(z.string()),
})
/**
* DocumentRetryPayload
*/
@ -272,6 +294,63 @@ export const zDatasetUpdatePayload = z.object({
summary_index_setting: z.record(z.string(), z.unknown()).nullish(),
})
/**
* DatasetDocMetadataResponse
*/
export const zDatasetDocMetadataResponse = z.object({
id: z.string(),
name: z.string(),
type: z.string(),
})
/**
* DatasetExternalKnowledgeInfoResponse
*/
export const zDatasetExternalKnowledgeInfoResponse = z.object({
external_knowledge_api_endpoint: z.string(),
external_knowledge_api_id: z.string(),
external_knowledge_api_name: z.string(),
external_knowledge_id: z.string(),
})
/**
* DatasetExternalRetrievalModelResponse
*/
export const zDatasetExternalRetrievalModelResponse = z.object({
score_threshold: z.number(),
score_threshold_enabled: z.boolean().nullish(),
top_k: z.int(),
})
/**
* DatasetIconInfoResponse
*/
export const zDatasetIconInfoResponse = z.object({
icon: z.string().nullable(),
icon_background: z.string().nullish(),
icon_type: z.string().nullable(),
icon_url: z.string().nullish(),
})
/**
* DatasetSummaryIndexSettingResponse
*/
export const zDatasetSummaryIndexSettingResponse = z.object({
enable: z.boolean().nullish(),
model_name: z.string().nullish(),
model_provider_name: z.string().nullish(),
summary_prompt: z.string().nullish(),
})
/**
* DatasetTagResponse
*/
export const zDatasetTagResponse = z.object({
id: z.string(),
name: z.string(),
type: z.string(),
})
export const zDatasetDocMetadata = z.object({
id: z.string().optional(),
name: z.string().optional(),
@ -314,6 +393,32 @@ export const zTag = z.object({
type: z.string(),
})
/**
* IndexingEstimatePreviewItemResponse
*/
export const zIndexingEstimatePreviewItemResponse = z.object({
child_chunks: z.array(z.string()).nullish(),
content: z.string(),
summary: z.string().nullish(),
})
/**
* IndexingEstimateQaPreviewItemResponse
*/
export const zIndexingEstimateQaPreviewItemResponse = z.object({
answer: z.string(),
question: z.string(),
})
/**
* IndexingEstimateResponse
*/
export const zIndexingEstimateResponse = z.object({
preview: z.array(zIndexingEstimatePreviewItemResponse),
qa_preview: z.array(zIndexingEstimateQaPreviewItemResponse).nullish(),
total_segments: z.int(),
})
/**
* DatasetResponse
*/
@ -392,6 +497,39 @@ export const zDatasetAndDocumentResponse = z.object({
documents: z.array(zDocumentResponse),
})
/**
* DocumentStatusResponse
*/
export const zDocumentStatusResponse = z.object({
cleaning_completed_at: z.int().nullable(),
completed_at: z.int().nullable(),
completed_segments: z.int().nullish(),
error: z.string().nullable(),
id: z.string(),
indexing_status: z.string(),
parsing_completed_at: z.int().nullable(),
paused_at: z.int().nullable(),
processing_started_at: z.int().nullable(),
splitting_completed_at: z.int().nullable(),
stopped_at: z.int().nullable(),
total_segments: z.int().nullish(),
})
/**
* ErrorDocsResponse
*/
export const zErrorDocsResponse = z.object({
data: z.array(zDocumentStatusResponse),
total: z.int(),
})
/**
* DocumentStatusListResponse
*/
export const zDocumentStatusListResponse = z.object({
data: z.array(zDocumentStatusResponse),
})
/**
* DatasetMetadataListItemResponse
*/
@ -410,20 +548,34 @@ export const zDatasetMetadataListResponse = z.object({
doc_metadata: z.array(zDatasetMetadataListItemResponse),
})
export const zAppDetailKernel = z.object({
description: z.string().optional(),
icon: z.string().optional(),
icon_background: z.string().optional(),
icon_type: z.string().optional(),
icon_url: z.record(z.string(), z.unknown()).optional(),
id: z.string().optional(),
mode: z.string().optional(),
name: z.string().optional(),
/**
* RelatedAppResponse
*/
export const zRelatedAppResponse = z.object({
description: z.string(),
icon: z.string().nullable(),
icon_background: z.string().nullable(),
icon_type: z.string().nullable(),
icon_url: z.string().nullish(),
id: z.string(),
mode: z.string(),
name: z.string(),
})
export const zRelatedAppList = z.object({
data: z.array(zAppDetailKernel).optional(),
total: z.int().optional(),
/**
* RelatedAppListResponse
*/
export const zRelatedAppListResponse = z.object({
data: z.array(zRelatedAppResponse),
total: z.int(),
})
/**
* DatasetRerankingModelResponse
*/
export const zDatasetRerankingModelResponse = z.object({
reranking_model_name: z.string().nullish(),
reranking_provider_name: z.string().nullish(),
})
export const zDatasetRerankingModel = z.object({
@ -498,29 +650,179 @@ export const zHitTestingFile = z.object({
source_url: z.string().nullish(),
})
export const zDatasetFileInfo = z.object({
extension: z.string().optional(),
id: z.string().optional(),
mime_type: z.string().optional(),
name: z.string().optional(),
size: z.int().optional(),
source_url: z.string().optional(),
/**
* DatasetKeywordSettingResponse
*/
export const zDatasetKeywordSettingResponse = z.object({
keyword_weight: z.number(),
})
export const zDatasetContent = z.object({
content: z.string().optional(),
content_type: z.string().optional(),
file_info: zDatasetFileInfo.optional(),
/**
* DatasetVectorSettingResponse
*/
export const zDatasetVectorSettingResponse = z.object({
embedding_model_name: z.string(),
embedding_provider_name: z.string(),
vector_weight: z.number(),
})
export const zDatasetQueryDetail = z.object({
created_at: z.record(z.string(), z.unknown()).optional(),
created_by: z.string().optional(),
created_by_role: z.string().optional(),
id: z.string().optional(),
queries: zDatasetContent.optional(),
source: z.string().optional(),
source_app_id: z.string().optional(),
/**
* DatasetWeightedScoreResponse
*/
export const zDatasetWeightedScoreResponse = z.object({
keyword_setting: zDatasetKeywordSettingResponse,
vector_setting: zDatasetVectorSettingResponse,
weight_type: z.string().nullable(),
})
/**
* DatasetRetrievalModelResponse
*/
export const zDatasetRetrievalModelResponse = z.object({
reranking_enable: z.boolean(),
reranking_mode: z.string().nullish(),
reranking_model: zDatasetRerankingModelResponse,
score_threshold: z.number().nullish(),
score_threshold_enabled: z.boolean(),
search_method: z.string(),
top_k: z.int(),
weights: zDatasetWeightedScoreResponse.optional(),
})
/**
* DatasetDetailResponse
*/
export const zDatasetDetailResponse = z.object({
app_count: z.int(),
author_name: z.string().nullable(),
built_in_field_enabled: z.boolean(),
chunk_structure: z.string().nullable(),
created_at: z.int(),
created_by: z.string(),
data_source_type: z.string().nullable(),
description: z.string().nullable(),
doc_form: z.string().nullable(),
doc_metadata: z.array(zDatasetDocMetadataResponse),
document_count: z.int(),
embedding_available: z.boolean().nullish(),
embedding_model: z.string().nullable(),
embedding_model_provider: z.string().nullable(),
enable_api: z.boolean(),
external_knowledge_info: zDatasetExternalKnowledgeInfoResponse,
external_retrieval_model: zDatasetExternalRetrievalModelResponse,
icon_info: zDatasetIconInfoResponse,
id: z.string(),
indexing_technique: z.string().nullable(),
is_multimodal: z.boolean(),
is_published: z.boolean(),
name: z.string(),
permission: z.string(),
pipeline_id: z.string().nullable(),
provider: z.string(),
retrieval_model_dict: zDatasetRetrievalModelResponse,
runtime_mode: z.string().nullable(),
summary_index_setting: zDatasetSummaryIndexSettingResponse,
tags: z.array(zDatasetTagResponse),
total_available_documents: z.int(),
total_documents: z.int(),
updated_at: z.int(),
updated_by: z.string().nullable(),
word_count: z.int(),
})
/**
* DatasetDetailWithPartialMembersResponse
*/
export const zDatasetDetailWithPartialMembersResponse = z.object({
app_count: z.int(),
author_name: z.string().nullable(),
built_in_field_enabled: z.boolean(),
chunk_structure: z.string().nullable(),
created_at: z.int(),
created_by: z.string(),
data_source_type: z.string().nullable(),
description: z.string().nullable(),
doc_form: z.string().nullable(),
doc_metadata: z.array(zDatasetDocMetadataResponse),
document_count: z.int(),
embedding_available: z.boolean().nullish(),
embedding_model: z.string().nullable(),
embedding_model_provider: z.string().nullable(),
enable_api: z.boolean(),
external_knowledge_info: zDatasetExternalKnowledgeInfoResponse,
external_retrieval_model: zDatasetExternalRetrievalModelResponse,
icon_info: zDatasetIconInfoResponse,
id: z.string(),
indexing_technique: z.string().nullable(),
is_multimodal: z.boolean(),
is_published: z.boolean(),
name: z.string(),
partial_member_list: z.array(z.string()).nullish(),
permission: z.string(),
pipeline_id: z.string().nullable(),
provider: z.string(),
retrieval_model_dict: zDatasetRetrievalModelResponse,
runtime_mode: z.string().nullable(),
summary_index_setting: zDatasetSummaryIndexSettingResponse,
tags: z.array(zDatasetTagResponse),
total_available_documents: z.int(),
total_documents: z.int(),
updated_at: z.int(),
updated_by: z.string().nullable(),
word_count: z.int(),
})
/**
* DatasetListItemResponse
*/
export const zDatasetListItemResponse = z.object({
app_count: z.int(),
author_name: z.string().nullable(),
built_in_field_enabled: z.boolean(),
chunk_structure: z.string().nullable(),
created_at: z.int(),
created_by: z.string(),
data_source_type: z.string().nullable(),
description: z.string().nullable(),
doc_form: z.string().nullable(),
doc_metadata: z.array(zDatasetDocMetadataResponse),
document_count: z.int(),
embedding_available: z.boolean().nullish(),
embedding_model: z.string().nullable(),
embedding_model_provider: z.string().nullable(),
enable_api: z.boolean(),
external_knowledge_info: zDatasetExternalKnowledgeInfoResponse,
external_retrieval_model: zDatasetExternalRetrievalModelResponse,
icon_info: zDatasetIconInfoResponse,
id: z.string(),
indexing_technique: z.string().nullable(),
is_multimodal: z.boolean(),
is_published: z.boolean(),
name: z.string(),
partial_member_list: z.array(z.string()),
permission: z.string(),
pipeline_id: z.string().nullable(),
provider: z.string(),
retrieval_model_dict: zDatasetRetrievalModelResponse,
runtime_mode: z.string().nullable(),
summary_index_setting: zDatasetSummaryIndexSettingResponse,
tags: z.array(zDatasetTagResponse),
total_available_documents: z.int(),
total_documents: z.int(),
updated_at: z.int(),
updated_by: z.string().nullable(),
word_count: z.int(),
})
/**
* DatasetListResponse
*/
export const zDatasetListResponse = z.object({
data: z.array(zDatasetListItemResponse),
has_more: z.boolean(),
limit: z.int(),
page: z.int(),
total: z.int(),
})
export const zDatasetKeywordSetting = z.object({
@ -790,6 +1092,51 @@ export const zHitTestingResponse = z.object({
records: z.array(zHitTestingRecord).optional(),
})
/**
* DatasetQueryFileInfoResponse
*/
export const zDatasetQueryFileInfoResponse = z.object({
extension: z.string(),
id: z.string(),
mime_type: z.string(),
name: z.string(),
size: z.int(),
source_url: z.string(),
})
/**
* DatasetQueryContentResponse
*/
export const zDatasetQueryContentResponse = z.object({
content: z.string(),
content_type: z.string(),
file_info: zDatasetQueryFileInfoResponse.optional(),
})
/**
* DatasetQueryDetailResponse
*/
export const zDatasetQueryDetailResponse = z.object({
created_at: z.int(),
created_by: z.string(),
created_by_role: z.string(),
id: z.string(),
queries: z.array(zDatasetQueryContentResponse),
source: z.string(),
source_app_id: z.string().nullable(),
})
/**
* DatasetQueryListResponse
*/
export const zDatasetQueryListResponse = z.object({
data: z.array(zDatasetQueryDetailResponse),
has_more: z.boolean(),
limit: z.int(),
page: z.int(),
total: z.int(),
})
/**
* NotionIcon
*/
@ -855,25 +1202,25 @@ export const zKnowledgeConfig = z.object({
})
export const zGetDatasetsQuery = z.object({
ids: z.string().optional(),
include_all: z.string().optional(),
ids: z.array(z.string()).optional(),
include_all: z.boolean().optional().default(false),
keyword: z.string().optional(),
limit: z.string().optional(),
page: z.string().optional(),
tag_ids: z.string().optional(),
limit: z.int().optional().default(20),
page: z.int().optional().default(1),
tag_ids: z.array(z.string()).optional(),
})
/**
* Datasets retrieved successfully
*/
export const zGetDatasetsResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsResponse = zDatasetListResponse
export const zPostDatasetsBody = zDatasetCreatePayload
/**
* Dataset created successfully
*/
export const zPostDatasetsResponse = z.record(z.string(), z.unknown())
export const zPostDatasetsResponse = zDatasetDetailResponse
/**
* API base info retrieved successfully
@ -998,7 +1345,7 @@ export const zPostDatasetsIndexingEstimateBody = zIndexingEstimatePayload
/**
* Indexing estimate calculated successfully
*/
export const zPostDatasetsIndexingEstimateResponse = z.record(z.string(), z.unknown())
export const zPostDatasetsIndexingEstimateResponse = zIndexingEstimateResponse
export const zPostDatasetsInitBody = zKnowledgeConfig
@ -1036,7 +1383,7 @@ export const zGetDatasetsProcessRuleResponse = z.record(z.string(), z.unknown())
/**
* Retrieval settings retrieved successfully
*/
export const zGetDatasetsRetrievalSettingResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsRetrievalSettingResponse = zRetrievalSettingResponse
export const zGetDatasetsRetrievalSettingByVectorTypePath = z.object({
vector_type: z.string(),
@ -1045,7 +1392,7 @@ export const zGetDatasetsRetrievalSettingByVectorTypePath = z.object({
/**
* Mock retrieval settings retrieved successfully
*/
export const zGetDatasetsRetrievalSettingByVectorTypeResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsRetrievalSettingByVectorTypeResponse = zRetrievalSettingResponse
export const zDeleteDatasetsByDatasetIdPath = z.object({
dataset_id: z.string(),
@ -1063,7 +1410,7 @@ export const zGetDatasetsByDatasetIdPath = z.object({
/**
* Dataset retrieved successfully
*/
export const zGetDatasetsByDatasetIdResponse = zDatasetDetail
export const zGetDatasetsByDatasetIdResponse = zDatasetDetailWithPartialMembersResponse
export const zPatchDatasetsByDatasetIdBody = zDatasetUpdatePayload
@ -1074,7 +1421,7 @@ export const zPatchDatasetsByDatasetIdPath = z.object({
/**
* Dataset updated successfully
*/
export const zPatchDatasetsByDatasetIdResponse = zDatasetDetail
export const zPatchDatasetsByDatasetIdResponse = zDatasetDetailWithPartialMembersResponse
export const zPostDatasetsByDatasetIdApiKeysByStatusPath = z.object({
dataset_id: z.string(),
@ -1093,7 +1440,7 @@ export const zGetDatasetsByDatasetIdAutoDisableLogsPath = z.object({
/**
* Auto disable logs retrieved successfully
*/
export const zGetDatasetsByDatasetIdAutoDisableLogsResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsByDatasetIdAutoDisableLogsResponse = zAutoDisableLogsResponse
export const zGetDatasetsByDatasetIdBatchByBatchIndexingEstimatePath = z.object({
batch: z.string(),
@ -1570,7 +1917,7 @@ export const zGetDatasetsByDatasetIdErrorDocsPath = z.object({
/**
* Error documents retrieved successfully
*/
export const zGetDatasetsByDatasetIdErrorDocsResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsByDatasetIdErrorDocsResponse = zErrorDocsResponse
export const zPostDatasetsByDatasetIdExternalHitTestingBody = zExternalHitTestingPayload
@ -1601,7 +1948,7 @@ export const zGetDatasetsByDatasetIdIndexingStatusPath = z.object({
/**
* Indexing status retrieved successfully
*/
export const zGetDatasetsByDatasetIdIndexingStatusResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsByDatasetIdIndexingStatusResponse = zDocumentStatusListResponse
export const zGetDatasetsByDatasetIdMetadataPath = z.object({
dataset_id: z.string(),
@ -1677,7 +2024,7 @@ export const zGetDatasetsByDatasetIdPermissionPartUsersPath = z.object({
/**
* Permission users retrieved successfully
*/
export const zGetDatasetsByDatasetIdPermissionPartUsersResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsByDatasetIdPermissionPartUsersResponse = zPartialMemberListResponse
export const zGetDatasetsByDatasetIdQueriesPath = z.object({
dataset_id: z.string(),
@ -1686,7 +2033,7 @@ export const zGetDatasetsByDatasetIdQueriesPath = z.object({
/**
* Query history retrieved successfully
*/
export const zGetDatasetsByDatasetIdQueriesResponse = zDatasetQueryDetail
export const zGetDatasetsByDatasetIdQueriesResponse = zDatasetQueryListResponse
export const zGetDatasetsByDatasetIdRelatedAppsPath = z.object({
dataset_id: z.string(),
@ -1695,7 +2042,7 @@ export const zGetDatasetsByDatasetIdRelatedAppsPath = z.object({
/**
* Related apps retrieved successfully
*/
export const zGetDatasetsByDatasetIdRelatedAppsResponse = zRelatedAppList
export const zGetDatasetsByDatasetIdRelatedAppsResponse = zRelatedAppListResponse
export const zPostDatasetsByDatasetIdRetryBody = zDocumentRetryPayload

View File

@ -57,6 +57,7 @@ import {
zGetDatasetsByDatasetIdResponse,
zGetDatasetsByDatasetIdTagsPath,
zGetDatasetsByDatasetIdTagsResponse,
zGetDatasetsQuery,
zGetDatasetsResponse,
zGetDatasetsTagsResponse,
zGetEndUsersByEndUserIdPath,
@ -793,16 +794,10 @@ export const delete3 = oc
* Get all knowledge type tags
*
* Get all knowledge type tags
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get7 = oc
.route({
deprecated: true,
description:
'Get all knowledge type tags\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get all knowledge type tags',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsTags',
@ -814,16 +809,10 @@ export const get7 = oc
/**
* Update a knowledge type tag
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const patch = oc
.route({
deprecated: true,
description:
'Update a knowledge type tag\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Update a knowledge type tag',
inputStructure: 'detailed',
method: 'PATCH',
operationId: 'patchDatasetsTags',
@ -837,16 +826,10 @@ export const patch = oc
* Add a knowledge type tag
*
* Add a knowledge type tag
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const post12 = oc
.route({
deprecated: true,
description:
'Add a knowledge type tag\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Add a knowledge type tag',
inputStructure: 'detailed',
method: 'POST',
operationId: 'postDatasetsTags',
@ -1894,16 +1877,10 @@ export const retrieve = {
* Get all knowledge type tags
*
* Get tags bound to a specific dataset
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get18 = oc
.route({
deprecated: true,
description:
'Get tags bound to a specific dataset\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get tags bound to a specific dataset',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetIdTags',
@ -1951,16 +1928,10 @@ export const delete8 = oc
/**
* Get a specific dataset by ID
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get19 = oc
.route({
deprecated: true,
description:
'Get a specific dataset by ID\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get a specific dataset by ID',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetId',
@ -2008,16 +1979,10 @@ export const byDatasetId = {
* Resource for getting datasets
*
* List all datasets
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get20 = oc
.route({
deprecated: true,
description:
'List all datasets\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'List all datasets',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasets',
@ -2025,6 +1990,7 @@ export const get20 = oc
summary: 'Resource for getting datasets',
tags: ['service_api'],
})
.input(z.object({ query: zGetDatasetsQuery.optional() }))
.output(zGetDatasetsResponse)
/**

View File

@ -141,11 +141,14 @@ export type ConversationVariablesQuery = {
variable_name?: string | null
}
export type DataSetTag = {
binding_count?: string | null
export type DatasetBoundTagListResponse = {
data: Array<DatasetBoundTagResponse>
total: number
}
export type DatasetBoundTagResponse = {
id: string
name: string
type: string
}
export type DatasetCreatePayload = {
@ -164,6 +167,113 @@ export type DatasetCreatePayload = {
} | null
}
export type DatasetDetailResponse = {
app_count: number
author_name: string | null
built_in_field_enabled: boolean
chunk_structure: string | null
created_at: number
created_by: string
data_source_type: string | null
description: string | null
doc_form: string | null
doc_metadata: Array<DatasetDocMetadataResponse>
document_count: number
embedding_available?: boolean | null
embedding_model: string | null
embedding_model_provider: string | null
enable_api: boolean
external_knowledge_info: DatasetExternalKnowledgeInfoResponse
external_retrieval_model: DatasetExternalRetrievalModelResponse
icon_info: DatasetIconInfoResponse
id: string
indexing_technique: string | null
is_multimodal: boolean
is_published: boolean
name: string
permission: string
pipeline_id: string | null
provider: string
retrieval_model_dict: DatasetRetrievalModelResponse
runtime_mode: string | null
summary_index_setting: DatasetSummaryIndexSettingResponse
tags: Array<DatasetTagResponse>
total_available_documents: number
total_documents: number
updated_at: number
updated_by: string | null
word_count: number
}
export type DatasetDetailWithPartialMembersResponse = {
app_count: number
author_name: string | null
built_in_field_enabled: boolean
chunk_structure: string | null
created_at: number
created_by: string
data_source_type: string | null
description: string | null
doc_form: string | null
doc_metadata: Array<DatasetDocMetadataResponse>
document_count: number
embedding_available?: boolean | null
embedding_model: string | null
embedding_model_provider: string | null
enable_api: boolean
external_knowledge_info: DatasetExternalKnowledgeInfoResponse
external_retrieval_model: DatasetExternalRetrievalModelResponse
icon_info: DatasetIconInfoResponse
id: string
indexing_technique: string | null
is_multimodal: boolean
is_published: boolean
name: string
partial_member_list?: Array<string> | null
permission: string
pipeline_id: string | null
provider: string
retrieval_model_dict: DatasetRetrievalModelResponse
runtime_mode: string | null
summary_index_setting: DatasetSummaryIndexSettingResponse
tags: Array<DatasetTagResponse>
total_available_documents: number
total_documents: number
updated_at: number
updated_by: string | null
word_count: number
}
export type DatasetDocMetadataResponse = {
id: string
name: string
type: string
}
export type DatasetExternalKnowledgeInfoResponse = {
external_knowledge_api_endpoint: string
external_knowledge_api_id: string
external_knowledge_api_name: string
external_knowledge_id: string
}
export type DatasetExternalRetrievalModelResponse = {
score_threshold: number
score_threshold_enabled?: boolean | null
top_k: number
}
export type DatasetIconInfoResponse = {
icon: string | null
icon_background?: string | null
icon_type: string | null
icon_url?: string | null
}
export type DatasetKeywordSettingResponse = {
keyword_weight: number
}
export type DatasetListQuery = {
include_all?: boolean
keyword?: string | null
@ -172,6 +282,14 @@ export type DatasetListQuery = {
tag_ids?: Array<string>
}
export type DatasetListResponse = {
data: Array<DatasetDetailResponse>
has_more: boolean
limit: number
page: number
total: number
}
export type DatasetMetadataActionResponse = {
result: string
}
@ -205,6 +323,35 @@ export type DatasetMetadataResponse = {
export type DatasetPermissionEnum = 'all_team_members' | 'only_me' | 'partial_members'
export type DatasetRerankingModelResponse = {
reranking_model_name?: string | null
reranking_provider_name?: string | null
}
export type DatasetRetrievalModelResponse = {
reranking_enable: boolean
reranking_mode?: string | null
reranking_model: DatasetRerankingModelResponse
score_threshold?: number | null
score_threshold_enabled: boolean
search_method: string
top_k: number
weights?: DatasetWeightedScoreResponse
}
export type DatasetSummaryIndexSettingResponse = {
enable?: boolean | null
model_name?: string | null
model_provider_name?: string | null
summary_prompt?: string | null
}
export type DatasetTagResponse = {
id: string
name: string
type: string
}
export type DatasetUpdatePayload = {
description?: string | null
embedding_model?: string | null
@ -223,6 +370,18 @@ export type DatasetUpdatePayload = {
retrieval_model?: RetrievalModel
}
export type DatasetVectorSettingResponse = {
embedding_model_name: string
embedding_provider_name: string
vector_weight: number
}
export type DatasetWeightedScoreResponse = {
keyword_setting: DatasetKeywordSettingResponse
vector_setting: DatasetVectorSettingResponse
weight_type: string | null
}
export type DatasourceNodeRunPayload = {
credential_id?: string | null
datasource_type: string
@ -334,6 +493,15 @@ export type IndexInfoResponse = {
export type JsonValue = unknown
export type KnowledgeTagListResponse = Array<KnowledgeTagResponse>
export type KnowledgeTagResponse = {
binding_count?: string | null
id: string
name: string
type: string
}
export type MessageFeedbackPayload = {
content?: string | null
rating?: 'dislike' | 'like' | null
@ -1155,7 +1323,13 @@ export type PutConversationsByCIdVariablesByVariableIdResponse
export type GetDatasetsData = {
body?: never
path?: never
query?: never
query?: {
include_all?: boolean
keyword?: string
limit?: number
page?: number
tag_ids?: Array<string>
}
url: '/datasets'
}
@ -1168,9 +1342,7 @@ export type GetDatasetsErrors = {
export type GetDatasetsError = GetDatasetsErrors[keyof GetDatasetsErrors]
export type GetDatasetsResponses = {
200: {
[key: string]: unknown
}
200: DatasetListResponse
}
export type GetDatasetsResponse = GetDatasetsResponses[keyof GetDatasetsResponses]
@ -1194,9 +1366,7 @@ export type PostDatasetsErrors = {
export type PostDatasetsError = PostDatasetsErrors[keyof PostDatasetsErrors]
export type PostDatasetsResponses = {
200: {
[key: string]: unknown
}
200: DatasetDetailResponse
}
export type PostDatasetsResponse = PostDatasetsResponses[keyof PostDatasetsResponses]
@ -1278,9 +1448,7 @@ export type GetDatasetsTagsErrors = {
export type GetDatasetsTagsError = GetDatasetsTagsErrors[keyof GetDatasetsTagsErrors]
export type GetDatasetsTagsResponses = {
200: {
[key: string]: unknown
}
200: KnowledgeTagListResponse
}
export type GetDatasetsTagsResponse = GetDatasetsTagsResponses[keyof GetDatasetsTagsResponses]
@ -1304,9 +1472,7 @@ export type PatchDatasetsTagsErrors = {
export type PatchDatasetsTagsError = PatchDatasetsTagsErrors[keyof PatchDatasetsTagsErrors]
export type PatchDatasetsTagsResponses = {
200: {
[key: string]: unknown
}
200: KnowledgeTagResponse
}
export type PatchDatasetsTagsResponse = PatchDatasetsTagsResponses[keyof PatchDatasetsTagsResponses]
@ -1330,9 +1496,7 @@ export type PostDatasetsTagsErrors = {
export type PostDatasetsTagsError = PostDatasetsTagsErrors[keyof PostDatasetsTagsErrors]
export type PostDatasetsTagsResponses = {
200: {
[key: string]: unknown
}
200: KnowledgeTagResponse
}
export type PostDatasetsTagsResponse = PostDatasetsTagsResponses[keyof PostDatasetsTagsResponses]
@ -1451,9 +1615,7 @@ export type GetDatasetsByDatasetIdError
= GetDatasetsByDatasetIdErrors[keyof GetDatasetsByDatasetIdErrors]
export type GetDatasetsByDatasetIdResponses = {
200: {
[key: string]: unknown
}
200: DatasetDetailWithPartialMembersResponse
}
export type GetDatasetsByDatasetIdResponse
@ -1484,9 +1646,7 @@ export type PatchDatasetsByDatasetIdError
= PatchDatasetsByDatasetIdErrors[keyof PatchDatasetsByDatasetIdErrors]
export type PatchDatasetsByDatasetIdResponses = {
200: {
[key: string]: unknown
}
200: DatasetDetailWithPartialMembersResponse
}
export type PatchDatasetsByDatasetIdResponse
@ -2661,9 +2821,7 @@ export type GetDatasetsByDatasetIdTagsError
= GetDatasetsByDatasetIdTagsErrors[keyof GetDatasetsByDatasetIdTagsErrors]
export type GetDatasetsByDatasetIdTagsResponses = {
200: {
[key: string]: unknown
}
200: DatasetBoundTagListResponse
}
export type GetDatasetsByDatasetIdTagsResponse

View File

@ -189,15 +189,66 @@ export const zConversationVariablesQuery = z.object({
})
/**
* DataSetTag
* DatasetBoundTagResponse
*/
export const zDataSetTag = z.object({
binding_count: z.string().nullish(),
export const zDatasetBoundTagResponse = z.object({
id: z.string(),
name: z.string(),
})
/**
* DatasetBoundTagListResponse
*/
export const zDatasetBoundTagListResponse = z.object({
data: z.array(zDatasetBoundTagResponse),
total: z.int(),
})
/**
* DatasetDocMetadataResponse
*/
export const zDatasetDocMetadataResponse = z.object({
id: z.string(),
name: z.string(),
type: z.string(),
})
/**
* DatasetExternalKnowledgeInfoResponse
*/
export const zDatasetExternalKnowledgeInfoResponse = z.object({
external_knowledge_api_endpoint: z.string(),
external_knowledge_api_id: z.string(),
external_knowledge_api_name: z.string(),
external_knowledge_id: z.string(),
})
/**
* DatasetExternalRetrievalModelResponse
*/
export const zDatasetExternalRetrievalModelResponse = z.object({
score_threshold: z.number(),
score_threshold_enabled: z.boolean().nullish(),
top_k: z.int(),
})
/**
* DatasetIconInfoResponse
*/
export const zDatasetIconInfoResponse = z.object({
icon: z.string().nullable(),
icon_background: z.string().nullish(),
icon_type: z.string().nullable(),
icon_url: z.string().nullish(),
})
/**
* DatasetKeywordSettingResponse
*/
export const zDatasetKeywordSettingResponse = z.object({
keyword_weight: z.number(),
})
/**
* DatasetListQuery
*/
@ -263,6 +314,159 @@ export const zDatasetMetadataResponse = z.object({
*/
export const zDatasetPermissionEnum = z.enum(['all_team_members', 'only_me', 'partial_members'])
/**
* DatasetRerankingModelResponse
*/
export const zDatasetRerankingModelResponse = z.object({
reranking_model_name: z.string().nullish(),
reranking_provider_name: z.string().nullish(),
})
/**
* DatasetSummaryIndexSettingResponse
*/
export const zDatasetSummaryIndexSettingResponse = z.object({
enable: z.boolean().nullish(),
model_name: z.string().nullish(),
model_provider_name: z.string().nullish(),
summary_prompt: z.string().nullish(),
})
/**
* DatasetTagResponse
*/
export const zDatasetTagResponse = z.object({
id: z.string(),
name: z.string(),
type: z.string(),
})
/**
* DatasetVectorSettingResponse
*/
export const zDatasetVectorSettingResponse = z.object({
embedding_model_name: z.string(),
embedding_provider_name: z.string(),
vector_weight: z.number(),
})
/**
* DatasetWeightedScoreResponse
*/
export const zDatasetWeightedScoreResponse = z.object({
keyword_setting: zDatasetKeywordSettingResponse,
vector_setting: zDatasetVectorSettingResponse,
weight_type: z.string().nullable(),
})
/**
* DatasetRetrievalModelResponse
*/
export const zDatasetRetrievalModelResponse = z.object({
reranking_enable: z.boolean(),
reranking_mode: z.string().nullish(),
reranking_model: zDatasetRerankingModelResponse,
score_threshold: z.number().nullish(),
score_threshold_enabled: z.boolean(),
search_method: z.string(),
top_k: z.int(),
weights: zDatasetWeightedScoreResponse.optional(),
})
/**
* DatasetDetailResponse
*/
export const zDatasetDetailResponse = z.object({
app_count: z.int(),
author_name: z.string().nullable(),
built_in_field_enabled: z.boolean(),
chunk_structure: z.string().nullable(),
created_at: z.int(),
created_by: z.string(),
data_source_type: z.string().nullable(),
description: z.string().nullable(),
doc_form: z.string().nullable(),
doc_metadata: z.array(zDatasetDocMetadataResponse),
document_count: z.int(),
embedding_available: z.boolean().nullish(),
embedding_model: z.string().nullable(),
embedding_model_provider: z.string().nullable(),
enable_api: z.boolean(),
external_knowledge_info: zDatasetExternalKnowledgeInfoResponse,
external_retrieval_model: zDatasetExternalRetrievalModelResponse,
icon_info: zDatasetIconInfoResponse,
id: z.string(),
indexing_technique: z.string().nullable(),
is_multimodal: z.boolean(),
is_published: z.boolean(),
name: z.string(),
permission: z.string(),
pipeline_id: z.string().nullable(),
provider: z.string(),
retrieval_model_dict: zDatasetRetrievalModelResponse,
runtime_mode: z.string().nullable(),
summary_index_setting: zDatasetSummaryIndexSettingResponse,
tags: z.array(zDatasetTagResponse),
total_available_documents: z.int(),
total_documents: z.int(),
updated_at: z.int(),
updated_by: z.string().nullable(),
word_count: z.int(),
})
/**
* DatasetDetailWithPartialMembersResponse
*/
export const zDatasetDetailWithPartialMembersResponse = z.object({
app_count: z.int(),
author_name: z.string().nullable(),
built_in_field_enabled: z.boolean(),
chunk_structure: z.string().nullable(),
created_at: z.int(),
created_by: z.string(),
data_source_type: z.string().nullable(),
description: z.string().nullable(),
doc_form: z.string().nullable(),
doc_metadata: z.array(zDatasetDocMetadataResponse),
document_count: z.int(),
embedding_available: z.boolean().nullish(),
embedding_model: z.string().nullable(),
embedding_model_provider: z.string().nullable(),
enable_api: z.boolean(),
external_knowledge_info: zDatasetExternalKnowledgeInfoResponse,
external_retrieval_model: zDatasetExternalRetrievalModelResponse,
icon_info: zDatasetIconInfoResponse,
id: z.string(),
indexing_technique: z.string().nullable(),
is_multimodal: z.boolean(),
is_published: z.boolean(),
name: z.string(),
partial_member_list: z.array(z.string()).nullish(),
permission: z.string(),
pipeline_id: z.string().nullable(),
provider: z.string(),
retrieval_model_dict: zDatasetRetrievalModelResponse,
runtime_mode: z.string().nullable(),
summary_index_setting: zDatasetSummaryIndexSettingResponse,
tags: z.array(zDatasetTagResponse),
total_available_documents: z.int(),
total_documents: z.int(),
updated_at: z.int(),
updated_by: z.string().nullable(),
word_count: z.int(),
})
/**
* DatasetListResponse
*/
export const zDatasetListResponse = z.object({
data: z.array(zDatasetDetailResponse),
has_more: z.boolean(),
limit: z.int(),
page: z.int(),
total: z.int(),
})
/**
* DatasourceNodeRunPayload
*/
@ -368,6 +572,21 @@ export const zHumanInputFormSubmitPayload = z.object({
inputs: z.record(z.string(), zJsonValue),
})
/**
* KnowledgeTagResponse
*/
export const zKnowledgeTagResponse = z.object({
binding_count: z.string().nullish(),
id: z.string(),
name: z.string(),
type: z.string(),
})
/**
* KnowledgeTagListResponse
*/
export const zKnowledgeTagListResponse = z.array(zKnowledgeTagResponse)
/**
* MessageFeedbackPayload
*/
@ -1039,17 +1258,25 @@ export const zPutConversationsByCIdVariablesByVariableIdPath = z.object({
*/
export const zPutConversationsByCIdVariablesByVariableIdResponse = zConversationVariableResponse
export const zGetDatasetsQuery = z.object({
include_all: z.boolean().optional().default(false),
keyword: z.string().optional(),
limit: z.int().optional().default(20),
page: z.int().optional().default(1),
tag_ids: z.array(z.string()).optional(),
})
/**
* Datasets retrieved successfully
*/
export const zGetDatasetsResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsResponse = zDatasetListResponse
export const zPostDatasetsBody = zDatasetCreatePayload
/**
* Dataset created successfully
*/
export const zPostDatasetsResponse = z.record(z.string(), z.unknown())
export const zPostDatasetsResponse = zDatasetDetailResponse
/**
* File uploaded successfully
@ -1066,21 +1293,21 @@ export const zDeleteDatasetsTagsResponse = z.record(z.string(), z.never())
/**
* Tags retrieved successfully
*/
export const zGetDatasetsTagsResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsTagsResponse = zKnowledgeTagListResponse
export const zPatchDatasetsTagsBody = zTagUpdatePayload
/**
* Tag updated successfully
*/
export const zPatchDatasetsTagsResponse = z.record(z.string(), z.unknown())
export const zPatchDatasetsTagsResponse = zKnowledgeTagResponse
export const zPostDatasetsTagsBody = zTagCreatePayload
/**
* Tag created successfully
*/
export const zPostDatasetsTagsResponse = z.record(z.string(), z.unknown())
export const zPostDatasetsTagsResponse = zKnowledgeTagResponse
export const zPostDatasetsTagsBindingBody = zTagBindingPayload
@ -1112,7 +1339,7 @@ export const zGetDatasetsByDatasetIdPath = z.object({
/**
* Dataset retrieved successfully
*/
export const zGetDatasetsByDatasetIdResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsByDatasetIdResponse = zDatasetDetailWithPartialMembersResponse
export const zPatchDatasetsByDatasetIdBody = zDatasetUpdatePayload
@ -1123,7 +1350,7 @@ export const zPatchDatasetsByDatasetIdPath = z.object({
/**
* Dataset updated successfully
*/
export const zPatchDatasetsByDatasetIdResponse = z.record(z.string(), z.unknown())
export const zPatchDatasetsByDatasetIdResponse = zDatasetDetailWithPartialMembersResponse
export const zPostDatasetsByDatasetIdDocumentCreateByFilePath = z.object({
dataset_id: z.string(),
@ -1616,7 +1843,7 @@ export const zGetDatasetsByDatasetIdTagsPath = z.object({
/**
* Tags retrieved successfully
*/
export const zGetDatasetsByDatasetIdTagsResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsByDatasetIdTagsResponse = zDatasetBoundTagListResponse
export const zGetEndUsersByEndUserIdPath = z.object({
end_user_id: z.string(),