diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index e1666d92d1..3cc1e6b028 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -1,15 +1,16 @@ -from typing import Any, cast +from datetime import datetime +from typing import Any from flask import request -from flask_restx import Resource, fields, marshal, marshal_with -from pydantic import BaseModel, Field, field_validator +from flask_restx import Resource +from pydantic import BaseModel, Field, field_validator, model_validator from sqlalchemy import func, select from werkzeug.exceptions import Forbidden, NotFound import services from configs import dify_config from controllers.common.fields import ApiBaseUrlResponse, SimpleResultResponse, UsageCheckResponse -from controllers.common.schema import get_or_create_model, register_response_schema_models, register_schema_models +from controllers.common.schema import query_params_from_model, register_response_schema_models, register_schema_models from controllers.console import console_ns from controllers.console.apikey import ApiKeyItem, ApiKeyList from controllers.console.app.error import ProviderNotInitializeError @@ -30,26 +31,10 @@ from core.rag.extractor.entity.extract_setting import ExtractSetting, NotionInfo from core.rag.index_processor.constant.index_type import IndexTechniqueType from core.rag.retrieval.retrieval_methods import RetrievalMethod from extensions.ext_database import db -from fields.app_fields import app_detail_kernel_fields, related_app_list -from fields.dataset_fields import ( - content_fields, - dataset_detail_fields, - dataset_fields, - dataset_query_detail_fields, - dataset_retrieval_model_fields, - doc_metadata_fields, - external_knowledge_info_fields, - external_retrieval_model_fields, - file_info_fields, - icon_info_fields, - keyword_setting_fields, - reranking_model_fields, - tag_fields, - vector_setting_fields, - weighted_score_fields, -) -from fields.document_fields import document_status_fields +from fields.base import ResponseModel +from fields.dataset_fields import DatasetDetailResponse from graphon.model_runtime.entities.model_entities import ModelType +from libs.helper import build_icon_url, dump_response, to_timestamp from libs.login import current_account_with_tenant, login_required from libs.url_utils import normalize_api_base_url from models import ApiToken, Dataset, Document, DocumentSegment, UploadFile @@ -61,58 +46,6 @@ from services.dataset_service import DatasetPermissionService, DatasetService, D register_response_schema_models(console_ns, ApiBaseUrlResponse, SimpleResultResponse, UsageCheckResponse) -# Register models for flask_restx to avoid dict type issues in Swagger -dataset_base_model = get_or_create_model("DatasetBase", dataset_fields) - -tag_model = get_or_create_model("Tag", tag_fields) - -keyword_setting_model = get_or_create_model("DatasetKeywordSetting", keyword_setting_fields) -vector_setting_model = get_or_create_model("DatasetVectorSetting", vector_setting_fields) - -weighted_score_fields_copy = weighted_score_fields.copy() -weighted_score_fields_copy["keyword_setting"] = fields.Nested(keyword_setting_model) -weighted_score_fields_copy["vector_setting"] = fields.Nested(vector_setting_model) -weighted_score_model = get_or_create_model("DatasetWeightedScore", weighted_score_fields_copy) - -reranking_model = get_or_create_model("DatasetRerankingModel", reranking_model_fields) - -dataset_retrieval_model_fields_copy = dataset_retrieval_model_fields.copy() -dataset_retrieval_model_fields_copy["reranking_model"] = fields.Nested(reranking_model) -dataset_retrieval_model_fields_copy["weights"] = fields.Nested(weighted_score_model, allow_null=True) -dataset_retrieval_model = get_or_create_model("DatasetRetrievalModel", dataset_retrieval_model_fields_copy) - -external_knowledge_info_model = get_or_create_model("ExternalKnowledgeInfo", external_knowledge_info_fields) - -external_retrieval_model = get_or_create_model("ExternalRetrievalModel", external_retrieval_model_fields) - -doc_metadata_model = get_or_create_model("DatasetDocMetadata", doc_metadata_fields) - -icon_info_model = get_or_create_model("DatasetIconInfo", icon_info_fields) - -dataset_detail_fields_copy = dataset_detail_fields.copy() -dataset_detail_fields_copy["retrieval_model_dict"] = fields.Nested(dataset_retrieval_model) -dataset_detail_fields_copy["tags"] = fields.List(fields.Nested(tag_model)) -dataset_detail_fields_copy["external_knowledge_info"] = fields.Nested(external_knowledge_info_model) -dataset_detail_fields_copy["external_retrieval_model"] = fields.Nested(external_retrieval_model, allow_null=True) -dataset_detail_fields_copy["doc_metadata"] = fields.List(fields.Nested(doc_metadata_model)) -dataset_detail_fields_copy["icon_info"] = fields.Nested(icon_info_model) -dataset_detail_model = get_or_create_model("DatasetDetail", dataset_detail_fields_copy) - -file_info_model = get_or_create_model("DatasetFileInfo", file_info_fields) - -content_fields_copy = content_fields.copy() -content_fields_copy["file_info"] = fields.Nested(file_info_model, allow_null=True) -content_model = get_or_create_model("DatasetContent", content_fields_copy) - -dataset_query_detail_fields_copy = dataset_query_detail_fields.copy() -dataset_query_detail_fields_copy["queries"] = fields.Nested(content_model) -dataset_query_detail_model = get_or_create_model("DatasetQueryDetail", dataset_query_detail_fields_copy) - -app_detail_kernel_model = get_or_create_model("AppDetailKernel", app_detail_kernel_fields) -related_app_list_copy = related_app_list.copy() -related_app_list_copy["data"] = fields.List(fields.Nested(app_detail_kernel_model)) -related_app_list_model = get_or_create_model("RelatedAppList", related_app_list_copy) - def _validate_indexing_technique(value: str | None) -> str | None: if value is None: @@ -208,9 +141,165 @@ class ConsoleDatasetListQuery(BaseModel): tag_ids: list[str] = Field(default_factory=list, description="Filter by tag IDs") +class DatasetListItemResponse(DatasetDetailResponse): + partial_member_list: list[str] + + +class DatasetListResponse(ResponseModel): + data: list[DatasetListItemResponse] + has_more: bool + limit: int + total: int + page: int + + +class DatasetDetailWithPartialMembersResponse(DatasetDetailResponse): + partial_member_list: list[str] | None = None + + +class DatasetQueryFileInfoResponse(ResponseModel): + id: str + name: str + size: int + extension: str + mime_type: str + source_url: str + + +class DatasetQueryContentResponse(ResponseModel): + content_type: str + content: str + file_info: DatasetQueryFileInfoResponse | None = None + + +class DatasetQueryDetailResponse(ResponseModel): + id: str + queries: list[DatasetQueryContentResponse] + source: str + source_app_id: str | None + created_by_role: str + created_by: str + created_at: int + + @field_validator("created_at", mode="before") + @classmethod + def _normalize_created_at(cls, value: datetime | int | None) -> int | None: + return to_timestamp(value) + + +class DatasetQueryListResponse(ResponseModel): + data: list[DatasetQueryDetailResponse] + has_more: bool + limit: int + total: int + page: int + + +class RelatedAppResponse(ResponseModel): + id: str + name: str + description: str + mode: str = Field(validation_alias="mode_compatible_with_agent") + icon_type: str | None + icon: str | None + icon_background: str | None + icon_url: str | None = None + + @model_validator(mode="after") + def _set_icon_url(self) -> "RelatedAppResponse": + self.icon_url = self.icon_url or build_icon_url(self.icon_type, self.icon) + return self + + +class RelatedAppListResponse(ResponseModel): + data: list[RelatedAppResponse] + total: int + + +class DocumentStatusResponse(ResponseModel): + id: str + indexing_status: str + processing_started_at: int | None + parsing_completed_at: int | None + cleaning_completed_at: int | None + splitting_completed_at: int | None + completed_at: int | None + paused_at: int | None + error: str | None + stopped_at: int | None + completed_segments: int | None = None + total_segments: int | None = None + + @field_validator( + "processing_started_at", + "parsing_completed_at", + "cleaning_completed_at", + "splitting_completed_at", + "completed_at", + "paused_at", + "stopped_at", + mode="before", + ) + @classmethod + def _normalize_timestamp(cls, value: datetime | int | None) -> int | None: + return to_timestamp(value) + + +class DocumentStatusListResponse(ResponseModel): + data: list[DocumentStatusResponse] + + +class ErrorDocsResponse(DocumentStatusListResponse): + total: int + + +class IndexingEstimatePreviewItemResponse(ResponseModel): + content: str + child_chunks: list[str] | None = None + summary: str | None = None + + +class IndexingEstimateQaPreviewItemResponse(ResponseModel): + question: str + answer: str + + +class IndexingEstimateResponse(ResponseModel): + total_segments: int + preview: list[IndexingEstimatePreviewItemResponse] + qa_preview: list[IndexingEstimateQaPreviewItemResponse] | None = None + + +class RetrievalSettingResponse(ResponseModel): + retrieval_method: list[str] + + +class PartialMemberListResponse(ResponseModel): + data: list[str] + + +class AutoDisableLogsResponse(ResponseModel): + document_ids: list[str] + count: int + + register_schema_models( console_ns, DatasetCreatePayload, DatasetUpdatePayload, IndexingEstimatePayload, ConsoleDatasetListQuery ) +register_response_schema_models( + console_ns, + DatasetDetailResponse, + DatasetDetailWithPartialMembersResponse, + DatasetListResponse, + DatasetQueryListResponse, + IndexingEstimateResponse, + RelatedAppListResponse, + DocumentStatusListResponse, + ErrorDocsResponse, + RetrievalSettingResponse, + PartialMemberListResponse, + AutoDisableLogsResponse, +) def _get_retrieval_methods_by_vector_type(vector_type: str | None, is_mock: bool = False) -> dict[str, list[str]]: @@ -293,17 +382,8 @@ def _get_retrieval_methods_by_vector_type(vector_type: str | None, is_mock: bool class DatasetListApi(Resource): @console_ns.doc("get_datasets") @console_ns.doc(description="Get list of datasets") - @console_ns.doc( - params={ - "page": "Page number (default: 1)", - "limit": "Number of items per page (default: 20)", - "ids": "Filter by dataset IDs (list)", - "keyword": "Search keyword", - "tag_ids": "Filter by tag IDs (list)", - "include_all": "Include all datasets (default: false)", - } - ) - @console_ns.response(200, "Datasets retrieved successfully") + @console_ns.doc(params=query_params_from_model(ConsoleDatasetListQuery)) + @console_ns.response(200, "Datasets retrieved successfully", console_ns.models[DatasetListResponse.__name__]) @setup_required @login_required @account_initialization_required @@ -342,7 +422,7 @@ class DatasetListApi(Resource): for embedding_model in embedding_models: model_names.append(f"{embedding_model.model}:{embedding_model.provider.provider}") - data = cast(list[dict[str, Any]], marshal(datasets, dataset_detail_fields)) + data = [dump_response(DatasetDetailResponse, dataset) for dataset in datasets] dataset_ids = [item["id"] for item in data if item.get("permission") == "partial_members"] partial_members_map: dict[str, list[str]] = {} if dataset_ids: @@ -379,12 +459,12 @@ class DatasetListApi(Resource): "total": total, "page": query.page, } - return response, 200 + return dump_response(DatasetListResponse, response), 200 @console_ns.doc("create_dataset") @console_ns.doc(description="Create a new dataset") @console_ns.expect(console_ns.models[DatasetCreatePayload.__name__]) - @console_ns.response(201, "Dataset created successfully") + @console_ns.response(201, "Dataset created successfully", console_ns.models[DatasetDetailResponse.__name__]) @console_ns.response(400, "Invalid request parameters") @setup_required @login_required @@ -413,7 +493,7 @@ class DatasetListApi(Resource): except services.errors.dataset.DatasetNameDuplicateError: raise DatasetNameDuplicateError() - return marshal(dataset, dataset_detail_fields), 201 + return dump_response(DatasetDetailResponse, dataset), 201 @console_ns.route("/datasets/") @@ -421,7 +501,11 @@ class DatasetApi(Resource): @console_ns.doc("get_dataset") @console_ns.doc(description="Get dataset details") @console_ns.doc(params={"dataset_id": "Dataset ID"}) - @console_ns.response(200, "Dataset retrieved successfully", dataset_detail_model) + @console_ns.response( + 200, + "Dataset retrieved successfully", + console_ns.models[DatasetDetailWithPartialMembersResponse.__name__], + ) @console_ns.response(404, "Dataset not found") @console_ns.response(403, "Permission denied") @setup_required @@ -437,7 +521,7 @@ class DatasetApi(Resource): DatasetService.check_dataset_permission(dataset, current_user) except services.errors.account.NoPermissionError as e: raise Forbidden(str(e)) - data = cast(dict[str, Any], marshal(dataset, dataset_detail_fields)) + data = dump_response(DatasetDetailResponse, dataset) if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY: if dataset.embedding_model_provider: provider_id = ModelProviderID(dataset.embedding_model_provider) @@ -470,7 +554,11 @@ class DatasetApi(Resource): @console_ns.doc("update_dataset") @console_ns.doc(description="Update dataset details") @console_ns.expect(console_ns.models[DatasetUpdatePayload.__name__]) - @console_ns.response(200, "Dataset updated successfully", dataset_detail_model) + @console_ns.response( + 200, + "Dataset updated successfully", + console_ns.models[DatasetDetailWithPartialMembersResponse.__name__], + ) @console_ns.response(404, "Dataset not found") @console_ns.response(403, "Permission denied") @setup_required @@ -506,7 +594,7 @@ class DatasetApi(Resource): if dataset is None: raise NotFound("Dataset not found.") - result_data = cast(dict[str, Any], marshal(dataset, dataset_detail_fields)) + result_data = dump_response(DatasetDetailResponse, dataset) tenant_id = current_tenant_id if payload.partial_member_list is not None and payload.permission == DatasetPermissionEnum.PARTIAL_TEAM: @@ -567,7 +655,11 @@ class DatasetQueryApi(Resource): @console_ns.doc("get_dataset_queries") @console_ns.doc(description="Get dataset query history") @console_ns.doc(params={"dataset_id": "Dataset ID"}) - @console_ns.response(200, "Query history retrieved successfully", dataset_query_detail_model) + @console_ns.response( + 200, + "Query history retrieved successfully", + console_ns.models[DatasetQueryListResponse.__name__], + ) @setup_required @login_required @account_initialization_required @@ -589,20 +681,24 @@ class DatasetQueryApi(Resource): dataset_queries, total = DatasetService.get_dataset_queries(dataset_id=dataset.id, page=page, per_page=limit) response = { - "data": marshal(dataset_queries, dataset_query_detail_model), + "data": dataset_queries, "has_more": len(dataset_queries) == limit, "limit": limit, "total": total, "page": page, } - return response, 200 + return dump_response(DatasetQueryListResponse, response), 200 @console_ns.route("/datasets/indexing-estimate") class DatasetIndexingEstimateApi(Resource): @console_ns.doc("estimate_dataset_indexing") @console_ns.doc(description="Estimate dataset indexing cost") - @console_ns.response(200, "Indexing estimate calculated successfully") + @console_ns.response( + 200, + "Indexing estimate calculated successfully", + console_ns.models[IndexingEstimateResponse.__name__], + ) @setup_required @login_required @account_initialization_required @@ -699,11 +795,14 @@ class DatasetRelatedAppListApi(Resource): @console_ns.doc("get_dataset_related_apps") @console_ns.doc(description="Get applications related to dataset") @console_ns.doc(params={"dataset_id": "Dataset ID"}) - @console_ns.response(200, "Related apps retrieved successfully", related_app_list_model) + @console_ns.response( + 200, + "Related apps retrieved successfully", + console_ns.models[RelatedAppListResponse.__name__], + ) @setup_required @login_required @account_initialization_required - @marshal_with(related_app_list_model) def get(self, dataset_id): current_user, _ = current_account_with_tenant() dataset_id_str = str(dataset_id) @@ -724,7 +823,7 @@ class DatasetRelatedAppListApi(Resource): if app_model: related_apps.append(app_model) - return {"data": related_apps, "total": len(related_apps)}, 200 + return dump_response(RelatedAppListResponse, {"data": related_apps, "total": len(related_apps)}), 200 @console_ns.route("/datasets//indexing-status") @@ -732,7 +831,11 @@ class DatasetIndexingStatusApi(Resource): @console_ns.doc("get_dataset_indexing_status") @console_ns.doc(description="Get dataset indexing status") @console_ns.doc(params={"dataset_id": "Dataset ID"}) - @console_ns.response(200, "Indexing status retrieved successfully") + @console_ns.response( + 200, + "Indexing status retrieved successfully", + console_ns.models[DocumentStatusListResponse.__name__], + ) @setup_required @login_required @account_initialization_required @@ -778,9 +881,8 @@ class DatasetIndexingStatusApi(Resource): "completed_segments": completed_segments, "total_segments": total_segments, } - documents_status.append(marshal(document_dict, document_status_fields)) - data = {"data": documents_status} - return data, 200 + documents_status.append(document_dict) + return dump_response(DocumentStatusListResponse, {"data": documents_status}), 200 @console_ns.route("/datasets/api-keys") @@ -907,13 +1009,18 @@ class DatasetApiBaseUrlApi(Resource): class DatasetRetrievalSettingApi(Resource): @console_ns.doc("get_dataset_retrieval_setting") @console_ns.doc(description="Get dataset retrieval settings") - @console_ns.response(200, "Retrieval settings retrieved successfully") + @console_ns.response( + 200, "Retrieval settings retrieved successfully", console_ns.models[RetrievalSettingResponse.__name__] + ) @setup_required @login_required @account_initialization_required def get(self): vector_type = dify_config.VECTOR_STORE - return _get_retrieval_methods_by_vector_type(vector_type, is_mock=False) + return dump_response( + RetrievalSettingResponse, + _get_retrieval_methods_by_vector_type(vector_type, is_mock=False), + ) @console_ns.route("/datasets/retrieval-setting/") @@ -921,12 +1028,19 @@ class DatasetRetrievalSettingMockApi(Resource): @console_ns.doc("get_dataset_retrieval_setting_mock") @console_ns.doc(description="Get mock dataset retrieval settings by vector type") @console_ns.doc(params={"vector_type": "Vector store type"}) - @console_ns.response(200, "Mock retrieval settings retrieved successfully") + @console_ns.response( + 200, + "Mock retrieval settings retrieved successfully", + console_ns.models[RetrievalSettingResponse.__name__], + ) @setup_required @login_required @account_initialization_required def get(self, vector_type): - return _get_retrieval_methods_by_vector_type(vector_type, is_mock=True) + return dump_response( + RetrievalSettingResponse, + _get_retrieval_methods_by_vector_type(vector_type, is_mock=True), + ) @console_ns.route("/datasets//error-docs") @@ -934,7 +1048,7 @@ class DatasetErrorDocs(Resource): @console_ns.doc("get_dataset_error_docs") @console_ns.doc(description="Get dataset error documents") @console_ns.doc(params={"dataset_id": "Dataset ID"}) - @console_ns.response(200, "Error documents retrieved successfully") + @console_ns.response(200, "Error documents retrieved successfully", console_ns.models[ErrorDocsResponse.__name__]) @console_ns.response(404, "Dataset not found") @setup_required @login_required @@ -946,7 +1060,7 @@ class DatasetErrorDocs(Resource): raise NotFound("Dataset not found.") results = DocumentService.get_error_documents_by_dataset_id(dataset_id_str) - return {"data": [marshal(item, document_status_fields) for item in results], "total": len(results)}, 200 + return dump_response(ErrorDocsResponse, {"data": results, "total": len(results)}), 200 @console_ns.route("/datasets//permission-part-users") @@ -954,7 +1068,11 @@ class DatasetPermissionUserListApi(Resource): @console_ns.doc("get_dataset_permission_users") @console_ns.doc(description="Get dataset permission user list") @console_ns.doc(params={"dataset_id": "Dataset ID"}) - @console_ns.response(200, "Permission users retrieved successfully") + @console_ns.response( + 200, + "Permission users retrieved successfully", + console_ns.models[PartialMemberListResponse.__name__], + ) @console_ns.response(404, "Dataset not found") @console_ns.response(403, "Permission denied") @setup_required @@ -973,9 +1091,7 @@ class DatasetPermissionUserListApi(Resource): partial_members_list = DatasetPermissionService.get_dataset_partial_member_list(dataset_id_str) - return { - "data": partial_members_list, - }, 200 + return dump_response(PartialMemberListResponse, {"data": partial_members_list}), 200 @console_ns.route("/datasets//auto-disable-logs") @@ -983,7 +1099,11 @@ class DatasetAutoDisableLogApi(Resource): @console_ns.doc("get_dataset_auto_disable_logs") @console_ns.doc(description="Get dataset auto disable logs") @console_ns.doc(params={"dataset_id": "Dataset ID"}) - @console_ns.response(200, "Auto disable logs retrieved successfully") + @console_ns.response( + 200, + "Auto disable logs retrieved successfully", + console_ns.models[AutoDisableLogsResponse.__name__], + ) @console_ns.response(404, "Dataset not found") @setup_required @login_required @@ -993,4 +1113,4 @@ class DatasetAutoDisableLogApi(Resource): dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: raise NotFound("Dataset not found.") - return DatasetService.get_dataset_auto_disable_logs(dataset_id_str), 200 + return dump_response(AutoDisableLogsResponse, DatasetService.get_dataset_auto_disable_logs(dataset_id_str)), 200 diff --git a/api/controllers/service_api/dataset/dataset.py b/api/controllers/service_api/dataset/dataset.py index 3e4ea0a0ba..4745ca1275 100644 --- a/api/controllers/service_api/dataset/dataset.py +++ b/api/controllers/service_api/dataset/dataset.py @@ -1,13 +1,17 @@ -from typing import Any, Literal, cast +from typing import Any, Literal from flask import request -from flask_restx import marshal -from pydantic import BaseModel, Field, TypeAdapter, field_validator, model_validator +from pydantic import BaseModel, ConfigDict, Field, RootModel, field_validator, model_validator from werkzeug.exceptions import Forbidden, NotFound import services from controllers.common.fields import SimpleResultResponse -from controllers.common.schema import register_enum_models, register_response_schema_models, register_schema_models +from controllers.common.schema import ( + query_params_from_model, + register_enum_models, + register_response_schema_models, + register_schema_models, +) from controllers.console.wraps import edit_permission_required from controllers.service_api import service_api_ns from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError, InvalidActionError @@ -17,9 +21,10 @@ from controllers.service_api.wraps import ( ) from core.plugin.impl.model_runtime_factory import create_plugin_provider_manager from core.rag.index_processor.constant.index_type import IndexTechniqueType -from fields.dataset_fields import dataset_detail_fields -from fields.tag_fields import DataSetTag +from fields.base import ResponseModel +from fields.dataset_fields import DatasetDetailResponse from graphon.model_runtime.entities.model_entities import ModelType +from libs.helper import dump_response from libs.login import current_user from models.account import Account from models.dataset import DatasetPermissionEnum @@ -119,6 +124,21 @@ class TagUnbindingPayload(BaseModel): return self +class KnowledgeTagResponse(ResponseModel): + model_config = ConfigDict(coerce_numbers_to_str=True) + + id: str + name: str + type: str + # TODO: The public Service API docs expose binding_count as string|null. + # Keep matching the old RESTX fields.String coercion until that contract is intentionally migrated. + binding_count: str | None = None + + +class KnowledgeTagListResponse(RootModel[list[KnowledgeTagResponse]]): + pass + + class DatasetListQuery(BaseModel): page: int = Field(default=1, description="Page number") limit: int = Field(default=20, description="Number of items per page") @@ -127,6 +147,29 @@ class DatasetListQuery(BaseModel): tag_ids: list[str] = Field(default_factory=list, description="Filter by tag IDs") +class DatasetDetailWithPartialMembersResponse(DatasetDetailResponse): + partial_member_list: list[str] | None = None + + +# todo: duplicate code, but the partial_member_list has different nullability +class DatasetListResponse(ResponseModel): + data: list[DatasetDetailResponse] + has_more: bool + limit: int + total: int + page: int + + +class DatasetBoundTagResponse(ResponseModel): + id: str + name: str + + +class DatasetBoundTagListResponse(ResponseModel): + data: list[DatasetBoundTagResponse] + total: int + + register_schema_models( service_api_ns, DatasetCreatePayload, @@ -137,9 +180,17 @@ register_schema_models( TagBindingPayload, TagUnbindingPayload, DatasetListQuery, - DataSetTag, ) -register_response_schema_models(service_api_ns, SimpleResultResponse) +register_response_schema_models( + service_api_ns, + SimpleResultResponse, + KnowledgeTagResponse, + KnowledgeTagListResponse, + DatasetDetailResponse, + DatasetDetailWithPartialMembersResponse, + DatasetListResponse, + DatasetBoundTagListResponse, +) @service_api_ns.route("/datasets") @@ -154,9 +205,18 @@ class DatasetListApi(DatasetApiResource): 401: "Unauthorized - invalid API token", } ) + @service_api_ns.doc(params=query_params_from_model(DatasetListQuery)) + @service_api_ns.response( + 200, + "Datasets retrieved successfully", + service_api_ns.models[DatasetListResponse.__name__], + ) def get(self, tenant_id): """Resource for getting datasets.""" - query = DatasetListQuery.model_validate(request.args.to_dict()) + query_params: dict[str, str | list[str]] = dict(request.args.to_dict()) + if "tag_ids" in request.args: + query_params["tag_ids"] = request.args.getlist("tag_ids") + query = DatasetListQuery.model_validate(query_params) # provider = request.args.get("provider", default="vendor") datasets, total = DatasetService.get_datasets( @@ -175,17 +235,17 @@ class DatasetListApi(DatasetApiResource): for embedding_model in embedding_models: model_names.append(f"{embedding_model.model}:{embedding_model.provider.provider}") - data = marshal(datasets, dataset_detail_fields) + data = [dump_response(DatasetDetailResponse, dataset) for dataset in datasets] for item in data: if item["indexing_technique"] == IndexTechniqueType.HIGH_QUALITY and item["embedding_model_provider"]: item["embedding_model_provider"] = str(ModelProviderID(item["embedding_model_provider"])) item_model = f"{item['embedding_model']}:{item['embedding_model_provider']}" if item_model in model_names: - item["embedding_available"] = True # type: ignore + item["embedding_available"] = True else: - item["embedding_available"] = False # type: ignore + item["embedding_available"] = False else: - item["embedding_available"] = True # type: ignore + item["embedding_available"] = True response = { "data": data, "has_more": len(datasets) == query.limit, @@ -193,7 +253,7 @@ class DatasetListApi(DatasetApiResource): "total": total, "page": query.page, } - return response, 200 + return dump_response(DatasetListResponse, response), 200 @service_api_ns.expect(service_api_ns.models[DatasetCreatePayload.__name__]) @service_api_ns.doc("create_dataset") @@ -205,6 +265,11 @@ class DatasetListApi(DatasetApiResource): 400: "Bad request - invalid parameters", } ) + @service_api_ns.response( + 200, + "Dataset created successfully", + service_api_ns.models[DatasetDetailResponse.__name__], + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id): """Resource for creating datasets.""" @@ -248,7 +313,7 @@ class DatasetListApi(DatasetApiResource): except services.errors.dataset.DatasetNameDuplicateError: raise DatasetNameDuplicateError() - return marshal(dataset, dataset_detail_fields), 200 + return dump_response(DatasetDetailResponse, dataset), 200 @service_api_ns.route("/datasets/") @@ -266,6 +331,11 @@ class DatasetApi(DatasetApiResource): 404: "Dataset not found", } ) + @service_api_ns.response( + 200, + "Dataset retrieved successfully", + service_api_ns.models[DatasetDetailWithPartialMembersResponse.__name__], + ) def get(self, _, dataset_id): dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) @@ -275,7 +345,7 @@ class DatasetApi(DatasetApiResource): DatasetService.check_dataset_permission(dataset, current_user) except services.errors.account.NoPermissionError as e: raise Forbidden(str(e)) - data = cast(dict[str, Any], marshal(dataset, dataset_detail_fields)) + data = dump_response(DatasetDetailResponse, dataset) # check embedding setting assert isinstance(current_user, Account) cid = current_user.current_tenant_id @@ -307,7 +377,13 @@ class DatasetApi(DatasetApiResource): part_users_list = DatasetPermissionService.get_dataset_partial_member_list(dataset_id_str) data.update({"partial_member_list": part_users_list}) - return data, 200 + return ( + DatasetDetailWithPartialMembersResponse.model_validate(data).model_dump( + mode="json", + exclude={"partial_member_list"} if "partial_member_list" not in data else set(), + ), + 200, + ) @service_api_ns.expect(service_api_ns.models[DatasetUpdatePayload.__name__]) @service_api_ns.doc("update_dataset") @@ -321,6 +397,11 @@ class DatasetApi(DatasetApiResource): 404: "Dataset not found", } ) + @service_api_ns.response( + 200, + "Dataset updated successfully", + service_api_ns.models[DatasetDetailWithPartialMembersResponse.__name__], + ) @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def patch(self, _, dataset_id): dataset_id_str = str(dataset_id) @@ -371,7 +452,7 @@ class DatasetApi(DatasetApiResource): if dataset is None: raise NotFound("Dataset not found.") - result_data = cast(dict[str, Any], marshal(dataset, dataset_detail_fields)) + result_data = dump_response(DatasetDetailResponse, dataset) assert isinstance(current_user, Account) tenant_id = current_user.current_tenant_id @@ -384,7 +465,7 @@ class DatasetApi(DatasetApiResource): partial_member_list = DatasetPermissionService.get_dataset_partial_member_list(dataset_id_str) result_data.update({"partial_member_list": partial_member_list}) - return result_data, 200 + return DatasetDetailWithPartialMembersResponse.model_validate(result_data).model_dump(mode="json"), 200 @service_api_ns.doc("delete_dataset") @service_api_ns.doc(description="Delete a dataset") @@ -497,7 +578,7 @@ class DocumentStatusApi(DatasetApiResource): except ValueError as e: raise InvalidActionError(str(e)) - return {"result": "success"}, 200 + return dump_response(SimpleResultResponse, {"result": "success"}), 200 @service_api_ns.route("/datasets/tags") @@ -510,14 +591,18 @@ class DatasetTagsApi(DatasetApiResource): 401: "Unauthorized - invalid API token", } ) + @service_api_ns.response( + 200, + "Tags retrieved successfully", + service_api_ns.models[KnowledgeTagListResponse.__name__], + ) def get(self, _): """Get all knowledge type tags.""" assert isinstance(current_user, Account) cid = current_user.current_tenant_id assert cid is not None tags = TagService.get_tags("knowledge", cid) - tag_models = TypeAdapter(list[DataSetTag]).validate_python(tags, from_attributes=True) - return [tag.model_dump(mode="json") for tag in tag_models], 200 + return dump_response(KnowledgeTagListResponse, tags), 200 @service_api_ns.expect(service_api_ns.models[TagCreatePayload.__name__]) @service_api_ns.doc("create_dataset_tag") @@ -529,6 +614,11 @@ class DatasetTagsApi(DatasetApiResource): 403: "Forbidden - insufficient permissions", } ) + @service_api_ns.response( + 200, + "Tag created successfully", + service_api_ns.models[KnowledgeTagResponse.__name__], + ) def post(self, _): """Add a knowledge type tag.""" assert isinstance(current_user, Account) @@ -538,9 +628,10 @@ class DatasetTagsApi(DatasetApiResource): payload = TagCreatePayload.model_validate(service_api_ns.payload or {}) tag = TagService.save_tags(SaveTagPayload(name=payload.name, type=TagType.KNOWLEDGE)) - response = DataSetTag.model_validate( - {"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": 0} - ).model_dump(mode="json") + response = dump_response( + KnowledgeTagResponse, + {"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": 0}, + ) return response, 200 @service_api_ns.expect(service_api_ns.models[TagUpdatePayload.__name__]) @@ -553,6 +644,11 @@ class DatasetTagsApi(DatasetApiResource): 403: "Forbidden - insufficient permissions", } ) + @service_api_ns.response( + 200, + "Tag updated successfully", + service_api_ns.models[KnowledgeTagResponse.__name__], + ) def patch(self, _): assert isinstance(current_user, Account) if not (current_user.has_edit_permission or current_user.is_dataset_editor): @@ -564,9 +660,10 @@ class DatasetTagsApi(DatasetApiResource): binding_count = TagService.get_tag_binding_count(tag_id) - response = DataSetTag.model_validate( - {"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": binding_count} - ).model_dump(mode="json") + response = dump_response( + KnowledgeTagResponse, + {"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": binding_count}, + ) return response, 200 @service_api_ns.expect(service_api_ns.models[TagDeletePayload.__name__]) @@ -651,6 +748,11 @@ class DatasetTagsBindingStatusApi(DatasetApiResource): 401: "Unauthorized - invalid API token", } ) + @service_api_ns.response( + 200, + "Tags retrieved successfully", + service_api_ns.models[DatasetBoundTagListResponse.__name__], + ) def get(self, _, *args, **kwargs): """Get all knowledge type tags.""" dataset_id = kwargs.get("dataset_id") @@ -658,5 +760,4 @@ class DatasetTagsBindingStatusApi(DatasetApiResource): assert current_user.current_tenant_id is not None tags = TagService.get_tags_by_target_id("knowledge", current_user.current_tenant_id, str(dataset_id)) tags_list = [{"id": tag.id, "name": tag.name} for tag in tags] - response = {"data": tags_list, "total": len(tags)} - return response, 200 + return dump_response(DatasetBoundTagListResponse, {"data": tags_list, "total": len(tags)}), 200 diff --git a/api/fields/dataset_fields.py b/api/fields/dataset_fields.py index 4411ee0465..b44e6a1365 100644 --- a/api/fields/dataset_fields.py +++ b/api/fields/dataset_fields.py @@ -1,7 +1,10 @@ +from datetime import datetime + from flask_restx import fields +from pydantic import field_validator from fields.base import ResponseModel -from libs.helper import TimestampField +from libs.helper import TimestampField, to_timestamp dataset_fields = { "id": fields.String, @@ -142,27 +145,116 @@ dataset_detail_fields = { "is_multimodal": fields.Boolean, } -file_info_fields = { - "id": fields.String, - "name": fields.String, - "size": fields.Integer, - "extension": fields.String, - "mime_type": fields.String, - "source_url": fields.String, -} -content_fields = { - "content_type": fields.String, - "content": fields.String, - "file_info": fields.Nested(file_info_fields, allow_null=True), -} +class DatasetRerankingModelResponse(ResponseModel): + reranking_provider_name: str | None = None + reranking_model_name: str | None = None -dataset_query_detail_fields = { - "id": fields.String, - "queries": fields.Nested(content_fields), - "source": fields.String, - "source_app_id": fields.String, - "created_by_role": fields.String, - "created_by": fields.String, - "created_at": TimestampField, -} + +class DatasetKeywordSettingResponse(ResponseModel): + keyword_weight: float + + +class DatasetVectorSettingResponse(ResponseModel): + vector_weight: float + embedding_model_name: str + embedding_provider_name: str + + +class DatasetWeightedScoreResponse(ResponseModel): + weight_type: str | None + keyword_setting: DatasetKeywordSettingResponse | None + vector_setting: DatasetVectorSettingResponse | None + + +class DatasetRetrievalModelResponse(ResponseModel): + search_method: str + reranking_enable: bool + reranking_mode: str | None = None + reranking_model: DatasetRerankingModelResponse | None + weights: DatasetWeightedScoreResponse | None = None + top_k: int + score_threshold_enabled: bool + score_threshold: float | None = None + + +class DatasetSummaryIndexSettingResponse(ResponseModel): + enable: bool | None = None + model_name: str | None = None + model_provider_name: str | None = None + summary_prompt: str | None = None + + +class DatasetTagResponse(ResponseModel): + id: str + name: str + type: str + + +class DatasetExternalKnowledgeInfoResponse(ResponseModel): + external_knowledge_id: str + external_knowledge_api_id: str + external_knowledge_api_name: str + external_knowledge_api_endpoint: str + + +class DatasetExternalRetrievalModelResponse(ResponseModel): + top_k: int + score_threshold: float + score_threshold_enabled: bool | None = None + + +class DatasetDocMetadataResponse(ResponseModel): + id: str + name: str + type: str + + +class DatasetIconInfoResponse(ResponseModel): + icon_type: str | None + icon: str | None + icon_background: str | None = None + icon_url: str | None = None + + +class DatasetDetailResponse(ResponseModel): + id: str + name: str + description: str | None + provider: str + permission: str + data_source_type: str | None + indexing_technique: str | None + app_count: int + document_count: int + word_count: int + created_by: str + author_name: str | None + created_at: int + updated_by: str | None + updated_at: int + embedding_model: str | None + embedding_model_provider: str | None + embedding_available: bool | None = None + retrieval_model_dict: DatasetRetrievalModelResponse + summary_index_setting: DatasetSummaryIndexSettingResponse | None + tags: list[DatasetTagResponse] + doc_form: str | None + external_knowledge_info: DatasetExternalKnowledgeInfoResponse | None + external_retrieval_model: DatasetExternalRetrievalModelResponse | None + doc_metadata: list[DatasetDocMetadataResponse] + built_in_field_enabled: bool + pipeline_id: str | None + runtime_mode: str | None + chunk_structure: str | None + icon_info: DatasetIconInfoResponse | None + is_published: bool + total_documents: int + total_available_documents: int + enable_api: bool + is_multimodal: bool + + @field_validator("created_at", "updated_at", mode="before") + @classmethod + def _normalize_timestamp(cls, value: datetime | int | None) -> int | None: + return to_timestamp(value) diff --git a/api/fields/tag_fields.py b/api/fields/tag_fields.py deleted file mode 100644 index a3629f477a..0000000000 --- a/api/fields/tag_fields.py +++ /dev/null @@ -1,10 +0,0 @@ -from __future__ import annotations - -from fields.base import ResponseModel - - -class DataSetTag(ResponseModel): - id: str - name: str - type: str - binding_count: str | None = None diff --git a/api/openapi/markdown/console-swagger.md b/api/openapi/markdown/console-swagger.md index 8536cc93ae..188e1a4a9f 100644 --- a/api/openapi/markdown/console-swagger.md +++ b/api/openapi/markdown/console-swagger.md @@ -4135,18 +4135,18 @@ Get list of datasets | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| ids | query | Filter by dataset IDs (list) | No | string | -| include_all | query | Include all datasets (default: false) | No | string | +| ids | query | Filter by dataset IDs | No | [ string ] | +| include_all | query | Include all datasets | No | boolean | | keyword | query | Search keyword | No | string | -| limit | query | Number of items per page (default: 20) | No | string | -| page | query | Page number (default: 1) | No | string | -| tag_ids | query | Filter by tag IDs (list) | No | string | +| limit | query | Number of items per page | No | integer | +| page | query | Page number | No | integer | +| tag_ids | query | Filter by tag IDs | No | [ string ] | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Datasets retrieved successfully | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Datasets retrieved successfully | [DatasetListResponse](#datasetlistresponse) | #### POST ##### Description @@ -4161,10 +4161,10 @@ Create a new dataset ##### Responses -| Code | Description | -| ---- | ----------- | -| 201 | Dataset created successfully | -| 400 | Invalid request parameters | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 201 | Dataset created successfully | [DatasetDetailResponse](#datasetdetailresponse) | +| 400 | Invalid request parameters | | ### /datasets/api-base-info @@ -4384,9 +4384,9 @@ Estimate dataset indexing cost ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Indexing estimate calculated successfully | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Indexing estimate calculated successfully | [IndexingEstimateResponse](#indexingestimateresponse) | ### /datasets/init @@ -4467,9 +4467,9 @@ Get dataset retrieval settings ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Retrieval settings retrieved successfully | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Retrieval settings retrieved successfully | [RetrievalSettingResponse](#retrievalsettingresponse) | ### /datasets/retrieval-setting/{vector_type} @@ -4486,9 +4486,9 @@ Get mock dataset retrieval settings by vector type ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Mock retrieval settings retrieved successfully | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Mock retrieval settings retrieved successfully | [RetrievalSettingResponse](#retrievalsettingresponse) | ### /datasets/{dataset_id} @@ -4520,7 +4520,7 @@ Get dataset details | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Dataset retrieved successfully | [DatasetDetail](#datasetdetail) | +| 200 | Dataset retrieved successfully | [DatasetDetailWithPartialMembersResponse](#datasetdetailwithpartialmembersresponse) | | 403 | Permission denied | | | 404 | Dataset not found | | @@ -4540,7 +4540,7 @@ Update dataset details | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Dataset updated successfully | [DatasetDetail](#datasetdetail) | +| 200 | Dataset updated successfully | [DatasetDetailWithPartialMembersResponse](#datasetdetailwithpartialmembersresponse) | | 403 | Permission denied | | | 404 | Dataset not found | | @@ -4575,10 +4575,10 @@ Get dataset auto disable logs ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Auto disable logs retrieved successfully | -| 404 | Dataset not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Auto disable logs retrieved successfully | [AutoDisableLogsResponse](#autodisablelogsresponse) | +| 404 | Dataset not found | | ### /datasets/{dataset_id}/batch/{batch}/indexing-estimate @@ -5263,10 +5263,10 @@ Get dataset error documents ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Error documents retrieved successfully | -| 404 | Dataset not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Error documents retrieved successfully | [ErrorDocsResponse](#errordocsresponse) | +| 404 | Dataset not found | | ### /datasets/{dataset_id}/external-hit-testing @@ -5327,9 +5327,9 @@ Get dataset indexing status ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Indexing status retrieved successfully | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Indexing status retrieved successfully | [DocumentStatusListResponse](#documentstatuslistresponse) | ### /datasets/{dataset_id}/metadata @@ -5437,11 +5437,11 @@ Get dataset permission user list ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Permission users retrieved successfully | -| 403 | Permission denied | -| 404 | Dataset not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Permission users retrieved successfully | [PartialMemberListResponse](#partialmemberlistresponse) | +| 403 | Permission denied | | +| 404 | Dataset not found | | ### /datasets/{dataset_id}/queries @@ -5460,7 +5460,7 @@ Get dataset query history | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Query history retrieved successfully | [DatasetQueryDetail](#datasetquerydetail) | +| 200 | Query history retrieved successfully | [DatasetQueryListResponse](#datasetquerylistresponse) | ### /datasets/{dataset_id}/related-apps @@ -5479,7 +5479,7 @@ Get applications related to dataset | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Related apps retrieved successfully | [RelatedAppList](#relatedapplist) | +| 200 | Related apps retrieved successfully | [RelatedAppListResponse](#relatedapplistresponse) | ### /datasets/{dataset_id}/retry @@ -10863,19 +10863,6 @@ Enum class for api provider schema type. | use_icon_as_answer_icon | boolean | | No | | workflow | [WorkflowPartial](#workflowpartial) | | No | -#### AppDetailKernel - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| description | string | | No | -| icon | string | | No | -| icon_background | string | | No | -| icon_type | string | | No | -| icon_url | object | | No | -| id | string | | No | -| mode | string | | No | -| name | string | | No | - #### AppDetailWithSite | Name | Type | Description | Required | @@ -11081,6 +11068,13 @@ AppMCPServer Status Enum | ---- | ---- | ----------- | -------- | | text | string | Transcribed text from audio | Yes | +#### AutoDisableLogsResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| count | integer | | Yes | +| document_ids | [ string ] | | Yes | + #### AvatarUrlResponse | Name | Type | Description | Required | @@ -11643,27 +11637,6 @@ Condition detail | dataset | [DatasetResponse](#datasetresponse) | | Yes | | documents | [ [DocumentResponse](#documentresponse) ] | | Yes | -#### DatasetBase - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| created_at | object | | No | -| created_by | string | | No | -| data_source_type | string | | No | -| description | string | | No | -| id | string | | No | -| indexing_technique | string | | No | -| name | string | | No | -| permission | string | | No | - -#### DatasetContent - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| content | string | | No | -| content_type | string | | No | -| file_info | [DatasetFileInfo](#datasetfileinfo) | | No | - #### DatasetCreatePayload | Name | Type | Description | Required | @@ -11716,6 +11689,87 @@ Condition detail | updated_by | string | | No | | word_count | integer | | No | +#### DatasetDetailResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| app_count | integer | | Yes | +| author_name | string | | Yes | +| built_in_field_enabled | boolean | | Yes | +| chunk_structure | string | | Yes | +| created_at | integer | | Yes | +| created_by | string | | Yes | +| data_source_type | string | | Yes | +| description | string | | Yes | +| doc_form | string | | Yes | +| doc_metadata | [ [DatasetDocMetadataResponse](#datasetdocmetadataresponse) ] | | Yes | +| document_count | integer | | Yes | +| embedding_available | boolean | | No | +| embedding_model | string | | Yes | +| embedding_model_provider | string | | Yes | +| enable_api | boolean | | Yes | +| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes | +| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes | +| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes | +| id | string | | Yes | +| indexing_technique | string | | Yes | +| is_multimodal | boolean | | Yes | +| is_published | boolean | | Yes | +| name | string | | Yes | +| permission | string | | Yes | +| pipeline_id | string | | Yes | +| provider | string | | Yes | +| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes | +| runtime_mode | string | | Yes | +| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes | +| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes | +| total_available_documents | integer | | Yes | +| total_documents | integer | | Yes | +| updated_at | integer | | Yes | +| updated_by | string | | Yes | +| word_count | integer | | Yes | + +#### DatasetDetailWithPartialMembersResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| app_count | integer | | Yes | +| author_name | string | | Yes | +| built_in_field_enabled | boolean | | Yes | +| chunk_structure | string | | Yes | +| created_at | integer | | Yes | +| created_by | string | | Yes | +| data_source_type | string | | Yes | +| description | string | | Yes | +| doc_form | string | | Yes | +| doc_metadata | [ [DatasetDocMetadataResponse](#datasetdocmetadataresponse) ] | | Yes | +| document_count | integer | | Yes | +| embedding_available | boolean | | No | +| embedding_model | string | | Yes | +| embedding_model_provider | string | | Yes | +| enable_api | boolean | | Yes | +| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes | +| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes | +| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes | +| id | string | | Yes | +| indexing_technique | string | | Yes | +| is_multimodal | boolean | | Yes | +| is_published | boolean | | Yes | +| name | string | | Yes | +| partial_member_list | [ string ] | | No | +| permission | string | | Yes | +| pipeline_id | string | | Yes | +| provider | string | | Yes | +| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes | +| runtime_mode | string | | Yes | +| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes | +| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes | +| total_available_documents | integer | | Yes | +| total_documents | integer | | Yes | +| updated_at | integer | | Yes | +| updated_by | string | | Yes | +| word_count | integer | | Yes | + #### DatasetDocMetadata | Name | Type | Description | Required | @@ -11724,16 +11778,30 @@ Condition detail | name | string | | No | | type | string | | No | -#### DatasetFileInfo +#### DatasetDocMetadataResponse | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| extension | string | | No | -| id | string | | No | -| mime_type | string | | No | -| name | string | | No | -| size | integer | | No | -| source_url | string | | No | +| id | string | | Yes | +| name | string | | Yes | +| type | string | | Yes | + +#### DatasetExternalKnowledgeInfoResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| external_knowledge_api_endpoint | string | | Yes | +| external_knowledge_api_id | string | | Yes | +| external_knowledge_api_name | string | | Yes | +| external_knowledge_id | string | | Yes | + +#### DatasetExternalRetrievalModelResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| score_threshold | number | | Yes | +| score_threshold_enabled | boolean | | No | +| top_k | integer | | Yes | #### DatasetIconInfo @@ -11744,12 +11812,78 @@ Condition detail | icon_type | string | | No | | icon_url | string | | No | +#### DatasetIconInfoResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| icon | string | | Yes | +| icon_background | string | | No | +| icon_type | string | | Yes | +| icon_url | string | | No | + #### DatasetKeywordSetting | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | | keyword_weight | number | | No | +#### DatasetKeywordSettingResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| keyword_weight | number | | Yes | + +#### DatasetListItemResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| app_count | integer | | Yes | +| author_name | string | | Yes | +| built_in_field_enabled | boolean | | Yes | +| chunk_structure | string | | Yes | +| created_at | integer | | Yes | +| created_by | string | | Yes | +| data_source_type | string | | Yes | +| description | string | | Yes | +| doc_form | string | | Yes | +| doc_metadata | [ [DatasetDocMetadataResponse](#datasetdocmetadataresponse) ] | | Yes | +| document_count | integer | | Yes | +| embedding_available | boolean | | No | +| embedding_model | string | | Yes | +| embedding_model_provider | string | | Yes | +| enable_api | boolean | | Yes | +| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes | +| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes | +| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes | +| id | string | | Yes | +| indexing_technique | string | | Yes | +| is_multimodal | boolean | | Yes | +| is_published | boolean | | Yes | +| name | string | | Yes | +| partial_member_list | [ string ] | | Yes | +| permission | string | | Yes | +| pipeline_id | string | | Yes | +| provider | string | | Yes | +| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes | +| runtime_mode | string | | Yes | +| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes | +| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes | +| total_available_documents | integer | | Yes | +| total_documents | integer | | Yes | +| updated_at | integer | | Yes | +| updated_by | string | | Yes | +| word_count | integer | | Yes | + +#### DatasetListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [DatasetListItemResponse](#datasetlistitemresponse) ] | | Yes | +| has_more | boolean | | Yes | +| limit | integer | | Yes | +| page | integer | | Yes | +| total | integer | | Yes | + #### DatasetMetadataBuiltInFieldResponse | Name | Type | Description | Required | @@ -11793,17 +11927,46 @@ Condition detail | ---- | ---- | ----------- | -------- | | DatasetPermissionEnum | string | | | -#### DatasetQueryDetail +#### DatasetQueryContentResponse | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| created_at | object | | No | -| created_by | string | | No | -| created_by_role | string | | No | -| id | string | | No | -| queries | [DatasetContent](#datasetcontent) | | No | -| source | string | | No | -| source_app_id | string | | No | +| content | string | | Yes | +| content_type | string | | Yes | +| file_info | [DatasetQueryFileInfoResponse](#datasetqueryfileinforesponse) | | No | + +#### DatasetQueryDetailResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| created_at | integer | | Yes | +| created_by | string | | Yes | +| created_by_role | string | | Yes | +| id | string | | Yes | +| queries | [ [DatasetQueryContentResponse](#datasetquerycontentresponse) ] | | Yes | +| source | string | | Yes | +| source_app_id | string | | Yes | + +#### DatasetQueryFileInfoResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| extension | string | | Yes | +| id | string | | Yes | +| mime_type | string | | Yes | +| name | string | | Yes | +| size | integer | | Yes | +| source_url | string | | Yes | + +#### DatasetQueryListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [DatasetQueryDetailResponse](#datasetquerydetailresponse) ] | | Yes | +| has_more | boolean | | Yes | +| limit | integer | | Yes | +| page | integer | | Yes | +| total | integer | | Yes | #### DatasetRerankingModel @@ -11812,6 +11975,13 @@ Condition detail | reranking_model_name | string | | No | | reranking_provider_name | string | | No | +#### DatasetRerankingModelResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| reranking_model_name | string | | No | +| reranking_provider_name | string | | No | + #### DatasetResponse | Name | Type | Description | Required | @@ -11838,6 +12008,36 @@ Condition detail | top_k | integer | | No | | weights | [DatasetWeightedScore](#datasetweightedscore) | | No | +#### DatasetRetrievalModelResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| reranking_enable | boolean | | Yes | +| reranking_mode | string | | No | +| reranking_model | [DatasetRerankingModelResponse](#datasetrerankingmodelresponse) | | Yes | +| score_threshold | number | | No | +| score_threshold_enabled | boolean | | Yes | +| search_method | string | | Yes | +| top_k | integer | | Yes | +| weights | [DatasetWeightedScoreResponse](#datasetweightedscoreresponse) | | No | + +#### DatasetSummaryIndexSettingResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| enable | boolean | | No | +| model_name | string | | No | +| model_provider_name | string | | No | +| summary_prompt | string | | No | + +#### DatasetTagResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| id | string | | Yes | +| name | string | | Yes | +| type | string | | Yes | + #### DatasetUpdatePayload | Name | Type | Description | Required | @@ -11865,6 +12065,14 @@ Condition detail | embedding_provider_name | string | | No | | vector_weight | number | | No | +#### DatasetVectorSettingResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| embedding_model_name | string | | Yes | +| embedding_provider_name | string | | Yes | +| vector_weight | number | | Yes | + #### DatasetWeightedScore | Name | Type | Description | Required | @@ -11873,6 +12081,14 @@ Condition detail | vector_setting | [DatasetVectorSetting](#datasetvectorsetting) | | No | | weight_type | string | | No | +#### DatasetWeightedScoreResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| keyword_setting | [DatasetKeywordSettingResponse](#datasetkeywordsettingresponse) | | Yes | +| vector_setting | [DatasetVectorSettingResponse](#datasetvectorsettingresponse) | | Yes | +| weight_type | string | | Yes | + #### DatasourceCredentialDeletePayload | Name | Type | Description | Required | @@ -12066,6 +12282,29 @@ Request payload for bulk downloading documents as a zip archive. | ---- | ---- | ----------- | -------- | | document_ids | [ string ] | | Yes | +#### DocumentStatusListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [DocumentStatusResponse](#documentstatusresponse) ] | | Yes | + +#### DocumentStatusResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| cleaning_completed_at | integer | | Yes | +| completed_at | integer | | Yes | +| completed_segments | integer | | No | +| error | string | | Yes | +| id | string | | Yes | +| indexing_status | string | | Yes | +| parsing_completed_at | integer | | Yes | +| paused_at | integer | | Yes | +| processing_started_at | integer | | Yes | +| splitting_completed_at | integer | | Yes | +| stopped_at | integer | | Yes | +| total_segments | integer | | No | + #### DocumentWithSegmentsResponse | Name | Type | Description | Required | @@ -12310,6 +12549,13 @@ Request payload for bulk downloading documents as a zip archive. | ---- | ---- | ----------- | -------- | | environment_variables | [ object ] | Environment variables for the draft workflow | Yes | +#### ErrorDocsResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [DocumentStatusResponse](#documentstatusresponse) ] | | Yes | +| total | integer | | Yes | + #### ExecutionContentType | Name | Type | Description | Required | @@ -12746,6 +12992,29 @@ Request payload for bulk downloading documents as a zip archive. | info_list | object | | Yes | | process_rule | object | | Yes | +#### IndexingEstimatePreviewItemResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| child_chunks | [ string ] | | No | +| content | string | | Yes | +| summary | string | | No | + +#### IndexingEstimateQaPreviewItemResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| answer | string | | Yes | +| question | string | | Yes | + +#### IndexingEstimateResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| preview | [ [IndexingEstimatePreviewItemResponse](#indexingestimatepreviewitemresponse) ] | | Yes | +| qa_preview | [ [IndexingEstimateQaPreviewItemResponse](#indexingestimateqapreviewitemresponse) ] | | No | +| total_segments | integer | | Yes | + #### InfoList | Name | Type | Description | Required | @@ -13649,6 +13918,12 @@ Form input definition. | model | string | | Yes | | model_type | [ModelType](#modeltype) | | Yes | +#### PartialMemberListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ string ] | | Yes | + #### PartnerTenantsPayload | Name | Type | Description | Required | @@ -13901,12 +14176,25 @@ Form input definition. | ---- | ---- | ----------- | -------- | | redirect_url | string | | Yes | -#### RelatedAppList +#### RelatedAppListResponse | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| data | [ [AppDetailKernel](#appdetailkernel) ] | | No | -| total | integer | | No | +| data | [ [RelatedAppResponse](#relatedappresponse) ] | | Yes | +| total | integer | | Yes | + +#### RelatedAppResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| description | string | | Yes | +| icon | string | | Yes | +| icon_background | string | | Yes | +| icon_type | string | | Yes | +| icon_url | string | | No | +| id | string | | Yes | +| mode | string | | Yes | +| name | string | | Yes | #### RemoteFileInfo @@ -13954,6 +14242,12 @@ Form input definition. | top_k | integer | | Yes | | weights | [WeightModel](#weightmodel) | | No | +#### RetrievalSettingResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| retrieval_method | [ string ] | | Yes | + #### RosterAgentCreatePayload | Name | Type | Description | Required | diff --git a/api/openapi/markdown/service-swagger.md b/api/openapi/markdown/service-swagger.md index 17058e5e62..7f5591e32e 100644 --- a/api/openapi/markdown/service-swagger.md +++ b/api/openapi/markdown/service-swagger.md @@ -469,12 +469,22 @@ Resource for getting datasets List all datasets +##### Parameters + +| Name | Located in | Description | Required | Schema | +| ---- | ---------- | ----------- | -------- | ------ | +| include_all | query | Include all datasets | No | boolean | +| keyword | query | Search keyword | No | string | +| limit | query | Number of items per page | No | integer | +| page | query | Page number | No | integer | +| tag_ids | query | Filter by tag IDs | No | [ string ] | + ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Datasets retrieved successfully | -| 401 | Unauthorized - invalid API token | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Datasets retrieved successfully | [DatasetListResponse](#datasetlistresponse) | +| 401 | Unauthorized - invalid API token | | #### POST ##### Summary @@ -493,11 +503,11 @@ Create a new dataset ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Dataset created successfully | -| 400 | Bad request - invalid parameters | -| 401 | Unauthorized - invalid API token | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Dataset created successfully | [DatasetDetailResponse](#datasetdetailresponse) | +| 400 | Bad request - invalid parameters | | +| 401 | Unauthorized - invalid API token | | ### /datasets/pipeline/file-upload @@ -557,10 +567,10 @@ Get all knowledge type tags ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Tags retrieved successfully | -| 401 | Unauthorized - invalid API token | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Tags retrieved successfully | [KnowledgeTagListResponse](#knowledgetaglistresponse) | +| 401 | Unauthorized - invalid API token | | #### PATCH ##### Description @@ -575,11 +585,11 @@ Update a knowledge type tag ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Tag updated successfully | -| 401 | Unauthorized - invalid API token | -| 403 | Forbidden - insufficient permissions | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Tag updated successfully | [KnowledgeTagResponse](#knowledgetagresponse) | +| 401 | Unauthorized - invalid API token | | +| 403 | Forbidden - insufficient permissions | | #### POST ##### Summary @@ -598,11 +608,11 @@ Add a knowledge type tag ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Tag created successfully | -| 401 | Unauthorized - invalid API token | -| 403 | Forbidden - insufficient permissions | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Tag created successfully | [KnowledgeTagResponse](#knowledgetagresponse) | +| 401 | Unauthorized - invalid API token | | +| 403 | Forbidden - insufficient permissions | | ### /datasets/tags/binding @@ -696,12 +706,12 @@ Get a specific dataset by ID ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Dataset retrieved successfully | -| 401 | Unauthorized - invalid API token | -| 403 | Forbidden - insufficient permissions | -| 404 | Dataset not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Dataset retrieved successfully | [DatasetDetailWithPartialMembersResponse](#datasetdetailwithpartialmembersresponse) | +| 401 | Unauthorized - invalid API token | | +| 403 | Forbidden - insufficient permissions | | +| 404 | Dataset not found | | #### PATCH ##### Description @@ -717,12 +727,12 @@ Update an existing dataset ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Dataset updated successfully | -| 401 | Unauthorized - invalid API token | -| 403 | Forbidden - insufficient permissions | -| 404 | Dataset not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Dataset updated successfully | [DatasetDetailWithPartialMembersResponse](#datasetdetailwithpartialmembersresponse) | +| 401 | Unauthorized - invalid API token | | +| 403 | Forbidden - insufficient permissions | | +| 404 | Dataset not found | | ### /datasets/{dataset_id}/document/create-by-file @@ -1629,10 +1639,10 @@ Get tags bound to a specific dataset ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Tags retrieved successfully | -| 401 | Unauthorized - invalid API token | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Tags retrieved successfully | [DatasetBoundTagListResponse](#datasetboundtaglistresponse) | +| 401 | Unauthorized - invalid API token | | ### /end-users/{end_user_id} @@ -2279,14 +2289,19 @@ Condition detail | limit | integer | Number of variables to return | No | | variable_name | string | Filter variables by name | No | -#### DataSetTag +#### DatasetBoundTagListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [DatasetBoundTagResponse](#datasetboundtagresponse) ] | | Yes | +| total | integer | | Yes | + +#### DatasetBoundTagResponse | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| binding_count | string | | No | | id | string | | Yes | | name | string | | Yes | -| type | string | | Yes | #### DatasetCreatePayload @@ -2304,6 +2319,127 @@ Condition detail | retrieval_model | [RetrievalModel](#retrievalmodel) | | No | | summary_index_setting | object | | No | +#### DatasetDetailResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| app_count | integer | | Yes | +| author_name | string | | Yes | +| built_in_field_enabled | boolean | | Yes | +| chunk_structure | string | | Yes | +| created_at | integer | | Yes | +| created_by | string | | Yes | +| data_source_type | string | | Yes | +| description | string | | Yes | +| doc_form | string | | Yes | +| doc_metadata | [ [DatasetDocMetadataResponse](#datasetdocmetadataresponse) ] | | Yes | +| document_count | integer | | Yes | +| embedding_available | boolean | | No | +| embedding_model | string | | Yes | +| embedding_model_provider | string | | Yes | +| enable_api | boolean | | Yes | +| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes | +| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes | +| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes | +| id | string | | Yes | +| indexing_technique | string | | Yes | +| is_multimodal | boolean | | Yes | +| is_published | boolean | | Yes | +| name | string | | Yes | +| permission | string | | Yes | +| pipeline_id | string | | Yes | +| provider | string | | Yes | +| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes | +| runtime_mode | string | | Yes | +| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes | +| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes | +| total_available_documents | integer | | Yes | +| total_documents | integer | | Yes | +| updated_at | integer | | Yes | +| updated_by | string | | Yes | +| word_count | integer | | Yes | + +#### DatasetDetailWithPartialMembersResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| app_count | integer | | Yes | +| author_name | string | | Yes | +| built_in_field_enabled | boolean | | Yes | +| chunk_structure | string | | Yes | +| created_at | integer | | Yes | +| created_by | string | | Yes | +| data_source_type | string | | Yes | +| description | string | | Yes | +| doc_form | string | | Yes | +| doc_metadata | [ [DatasetDocMetadataResponse](#datasetdocmetadataresponse) ] | | Yes | +| document_count | integer | | Yes | +| embedding_available | boolean | | No | +| embedding_model | string | | Yes | +| embedding_model_provider | string | | Yes | +| enable_api | boolean | | Yes | +| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | Yes | +| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes | +| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | Yes | +| id | string | | Yes | +| indexing_technique | string | | Yes | +| is_multimodal | boolean | | Yes | +| is_published | boolean | | Yes | +| name | string | | Yes | +| partial_member_list | [ string ] | | No | +| permission | string | | Yes | +| pipeline_id | string | | Yes | +| provider | string | | Yes | +| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes | +| runtime_mode | string | | Yes | +| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | Yes | +| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes | +| total_available_documents | integer | | Yes | +| total_documents | integer | | Yes | +| updated_at | integer | | Yes | +| updated_by | string | | Yes | +| word_count | integer | | Yes | + +#### DatasetDocMetadataResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| id | string | | Yes | +| name | string | | Yes | +| type | string | | Yes | + +#### DatasetExternalKnowledgeInfoResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| external_knowledge_api_endpoint | string | | Yes | +| external_knowledge_api_id | string | | Yes | +| external_knowledge_api_name | string | | Yes | +| external_knowledge_id | string | | Yes | + +#### DatasetExternalRetrievalModelResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| score_threshold | number | | Yes | +| score_threshold_enabled | boolean | | No | +| top_k | integer | | Yes | + +#### DatasetIconInfoResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| icon | string | | Yes | +| icon_background | string | | No | +| icon_type | string | | Yes | +| icon_url | string | | No | + +#### DatasetKeywordSettingResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| keyword_weight | number | | Yes | + #### DatasetListQuery | Name | Type | Description | Required | @@ -2314,6 +2450,16 @@ Condition detail | page | integer | Page number | No | | tag_ids | [ string ] | Filter by tag IDs | No | +#### DatasetListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [DatasetDetailResponse](#datasetdetailresponse) ] | | Yes | +| has_more | boolean | | Yes | +| limit | integer | | Yes | +| page | integer | | Yes | +| total | integer | | Yes | + #### DatasetMetadataActionResponse | Name | Type | Description | Required | @@ -2363,6 +2509,43 @@ Condition detail | ---- | ---- | ----------- | -------- | | DatasetPermissionEnum | string | | | +#### DatasetRerankingModelResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| reranking_model_name | string | | No | +| reranking_provider_name | string | | No | + +#### DatasetRetrievalModelResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| reranking_enable | boolean | | Yes | +| reranking_mode | string | | No | +| reranking_model | [DatasetRerankingModelResponse](#datasetrerankingmodelresponse) | | Yes | +| score_threshold | number | | No | +| score_threshold_enabled | boolean | | Yes | +| search_method | string | | Yes | +| top_k | integer | | Yes | +| weights | [DatasetWeightedScoreResponse](#datasetweightedscoreresponse) | | No | + +#### DatasetSummaryIndexSettingResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| enable | boolean | | No | +| model_name | string | | No | +| model_provider_name | string | | No | +| summary_prompt | string | | No | + +#### DatasetTagResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| id | string | | Yes | +| name | string | | Yes | +| type | string | | Yes | + #### DatasetUpdatePayload | Name | Type | Description | Required | @@ -2379,6 +2562,22 @@ Condition detail | permission | [DatasetPermissionEnum](#datasetpermissionenum) | | No | | retrieval_model | [RetrievalModel](#retrievalmodel) | | No | +#### DatasetVectorSettingResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| embedding_model_name | string | | Yes | +| embedding_provider_name | string | | Yes | +| vector_weight | number | | Yes | + +#### DatasetWeightedScoreResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| keyword_setting | [DatasetKeywordSettingResponse](#datasetkeywordsettingresponse) | | Yes | +| vector_setting | [DatasetVectorSettingResponse](#datasetvectorsettingresponse) | | Yes | +| weight_type | string | | Yes | + #### DatasourceNodeRunPayload | Name | Type | Description | Required | @@ -2522,6 +2721,21 @@ Note: The SQLAlchemy model defines an `is_anonymous` property for Flask-Login se | ---- | ---- | ----------- | -------- | | JsonValue | | | | +#### KnowledgeTagListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| KnowledgeTagListResponse | array | | | + +#### KnowledgeTagResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| binding_count | string | | No | +| id | string | | Yes | +| name | string | | Yes | +| type | string | | Yes | + #### MessageFeedbackPayload | Name | Type | Description | Required | diff --git a/api/tests/test_containers_integration_tests/controllers/service_api/dataset/test_dataset.py b/api/tests/test_containers_integration_tests/controllers/service_api/dataset/test_dataset.py index b73d28e4c4..6d35655817 100644 --- a/api/tests/test_containers_integration_tests/controllers/service_api/dataset/test_dataset.py +++ b/api/tests/test_containers_integration_tests/controllers/service_api/dataset/test_dataset.py @@ -14,8 +14,9 @@ since these test controller-level behavior. """ import uuid -from types import SimpleNamespace -from unittest.mock import Mock, patch +from contextlib import ExitStack +from datetime import UTC, datetime +from unittest.mock import Mock, PropertyMock, patch import pytest from flask import Flask @@ -35,7 +36,7 @@ from controllers.service_api.dataset.dataset import ( ) from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError, InvalidActionError from models.account import Account -from models.dataset import DatasetPermissionEnum +from models.dataset import Dataset, DatasetPermissionEnum from models.enums import TagType from models.model import Tag @@ -116,6 +117,7 @@ class TestDatasetUpdatePayload: partial_member_list=[{"user_id": "user_123", "role": "editor"}], ) assert payload.permission == DatasetPermissionEnum.PARTIAL_TEAM + assert payload.partial_member_list is not None assert len(payload.partial_member_list) == 1 def test_payload_name_length_validation(self): @@ -181,7 +183,7 @@ class TestTagUpdatePayload: def test_payload_requires_tag_id(self): with pytest.raises(ValueError): - TagUpdatePayload(name="Updated Tag") + TagUpdatePayload.model_validate({"name": "Updated Tag"}) class TestTagDeletePayload: @@ -193,7 +195,7 @@ class TestTagDeletePayload: def test_payload_requires_tag_id(self): with pytest.raises(ValueError): - TagDeletePayload() + TagDeletePayload.model_validate({}) class TestTagBindingPayload: @@ -264,13 +266,134 @@ def mock_tenant(): @pytest.fixture def mock_dataset(): - dataset = Mock() - dataset.id = str(uuid.uuid4()) - dataset.tenant_id = str(uuid.uuid4()) - dataset.indexing_technique = "economy" - dataset.embedding_model_provider = None - dataset.embedding_model = None - return dataset + return make_dataset(id=str(uuid.uuid4()), tenant_id=str(uuid.uuid4())) + + +@pytest.fixture(autouse=True) +def dataset_model_property_defaults(): + properties: dict[str, object] = { + "app_count": 0, + "document_count": 0, + "word_count": 0, + "author_name": None, + "tags": [], + "doc_form": None, + "external_knowledge_info": None, + "doc_metadata": [], + "is_published": False, + "total_documents": 0, + "total_available_documents": 0, + } + + with ExitStack() as stack: + for name, value in properties.items(): + property_mock = stack.enter_context(patch.object(Dataset, name, new_callable=PropertyMock)) + property_mock.return_value = value + yield + + +def make_dataset(**overrides) -> Dataset: + base = { + "id": "ds-1", + "tenant_id": "tenant-1", + "name": "Dataset", + "description": "desc", + "provider": "vendor", + "permission": "only_me", + "data_source_type": None, + "indexing_technique": "economy", + "created_by": "account-1", + "created_at": datetime(2024, 1, 1, 12, 0, 0, tzinfo=UTC), + "updated_by": None, + "updated_at": datetime(2024, 1, 1, 12, 0, 0, tzinfo=UTC), + "embedding_model": None, + "embedding_model_provider": None, + "retrieval_model": None, + "summary_index_setting": None, + "built_in_field_enabled": False, + "pipeline_id": None, + "runtime_mode": "general", + "chunk_structure": None, + "icon_info": None, + "enable_api": False, + "is_multimodal": False, + } + base.update(overrides) + return Dataset(**base) + + +def make_tag(*, id: str, name: str, binding_count: int | None = None) -> Tag: + tag = Tag(tenant_id="tenant-1", type=TagType.KNOWLEDGE, name=name, created_by="account-1") + tag.id = id + if binding_count is not None: + tag.__dict__["binding_count"] = binding_count + return tag + + +DATASET_DETAIL_KEYS = { + "id", + "name", + "description", + "provider", + "permission", + "data_source_type", + "indexing_technique", + "app_count", + "document_count", + "word_count", + "created_by", + "author_name", + "created_at", + "updated_by", + "updated_at", + "embedding_model", + "embedding_model_provider", + "embedding_available", + "retrieval_model_dict", + "summary_index_setting", + "tags", + "doc_form", + "external_knowledge_info", + "external_retrieval_model", + "doc_metadata", + "built_in_field_enabled", + "pipeline_id", + "runtime_mode", + "chunk_structure", + "icon_info", + "is_published", + "total_documents", + "total_available_documents", + "enable_api", + "is_multimodal", +} + + +def assert_dataset_detail_shape(response: dict, *, with_partial_members: bool = False) -> None: + expected_keys = set(DATASET_DETAIL_KEYS) + if with_partial_members: + expected_keys.add("partial_member_list") + assert set(response) == expected_keys + assert isinstance(response["created_at"], int) + assert isinstance(response["updated_at"], int) + assert set(response["retrieval_model_dict"]) == { + "search_method", + "reranking_enable", + "reranking_mode", + "reranking_model", + "weights", + "top_k", + "score_threshold_enabled", + "score_threshold", + } + if response["external_retrieval_model"] is not None: + assert set(response["external_retrieval_model"]) == { + "top_k", + "score_threshold", + "score_threshold_enabled", + } + if not with_partial_members: + assert "partial_member_list" not in response # --------------------------------------------------------------------------- @@ -281,7 +404,6 @@ def mock_dataset(): class TestDatasetListApiGet: """Test suite for DatasetListApi.get() endpoint.""" - @patch("controllers.service_api.dataset.dataset.marshal") @patch("controllers.service_api.dataset.dataset.create_plugin_provider_manager") @patch("controllers.service_api.dataset.dataset.current_user") @patch("controllers.service_api.dataset.dataset.DatasetService") @@ -290,7 +412,6 @@ class TestDatasetListApiGet: mock_dataset_svc, mock_current_user, mock_provider_mgr, - mock_marshal, app: Flask, mock_tenant, ): @@ -298,42 +419,79 @@ class TestDatasetListApiGet: mock_current_user.__class__ = Account mock_current_user.current_tenant_id = mock_tenant.id - mock_dataset_svc.get_datasets.return_value = ([Mock()], 1) + mock_dataset_svc.get_datasets.return_value = ([make_dataset()], 1) mock_configs = Mock() mock_configs.get_models.return_value = [] mock_provider_mgr.return_value.get_configurations.return_value = mock_configs - mock_marshal.return_value = [{"indexing_technique": "economy", "embedding_model_provider": None}] - with app.test_request_context("/datasets?page=1&limit=20", method="GET"): api = DatasetListApi() response, status = api.get(tenant_id=mock_tenant.id) assert status == 200 - assert "data" in response - assert "total" in response + assert set(response) == {"data", "has_more", "limit", "total", "page"} + assert response["has_more"] is False + assert response["limit"] == 20 + assert response["total"] == 1 + assert response["page"] == 1 + assert len(response["data"]) == 1 + assert_dataset_detail_shape(response["data"][0]) - -class TestDatasetListApiPost: - """Test suite for DatasetListApi.post() endpoint.""" - - @patch("controllers.service_api.dataset.dataset.marshal") + @patch("controllers.service_api.dataset.dataset.create_plugin_provider_manager") @patch("controllers.service_api.dataset.dataset.current_user") @patch("controllers.service_api.dataset.dataset.DatasetService") - def test_create_dataset_success( + def test_list_datasets_preserves_repeated_tag_ids( self, mock_dataset_svc, mock_current_user, - mock_marshal, + mock_provider_mgr, app: Flask, mock_tenant, ): from controllers.service_api.dataset.dataset import DatasetListApi mock_current_user.__class__ = Account - mock_dataset_svc.create_empty_dataset.return_value = Mock() - mock_marshal.return_value = {"id": "ds-1", "name": "New Dataset"} + mock_current_user.current_tenant_id = mock_tenant.id + mock_dataset_svc.get_datasets.return_value = ([make_dataset()], 1) + + mock_configs = Mock() + mock_configs.get_models.return_value = [] + mock_provider_mgr.return_value.get_configurations.return_value = mock_configs + + with app.test_request_context("/datasets?tag_ids=tag-a&tag_ids=tag-b", method="GET"): + api = DatasetListApi() + response, status = api.get(tenant_id=mock_tenant.id) + + assert status == 200 + assert response["total"] == 1 + mock_dataset_svc.get_datasets.assert_called_once_with( + 1, + 20, + mock_tenant.id, + mock_current_user, + None, + ["tag-a", "tag-b"], + False, + ) + + +class TestDatasetListApiPost: + """Test suite for DatasetListApi.post() endpoint.""" + + @patch("controllers.service_api.dataset.dataset.current_user") + @patch("controllers.service_api.dataset.dataset.DatasetService") + def test_create_dataset_success( + self, + mock_dataset_svc, + mock_current_user, + app: Flask, + mock_tenant, + ): + from controllers.service_api.dataset.dataset import DatasetListApi + + mock_current_user.__class__ = Account + mock_dataset_svc.create_empty_dataset.return_value = make_dataset(name="New Dataset") with app.test_request_context( "/datasets", @@ -344,6 +502,8 @@ class TestDatasetListApiPost: response, status = _unwrap(api.post)(api, tenant_id=mock_tenant.id) assert status == 200 + assert_dataset_detail_shape(response) + assert response["name"] == "New Dataset" mock_dataset_svc.create_empty_dataset.assert_called_once() @patch("controllers.service_api.dataset.dataset.current_user") @@ -379,7 +539,6 @@ class TestDatasetApiGet: """Test suite for DatasetApi.get() endpoint.""" @patch("controllers.service_api.dataset.dataset.DatasetPermissionService") - @patch("controllers.service_api.dataset.dataset.marshal") @patch("controllers.service_api.dataset.dataset.create_plugin_provider_manager") @patch("controllers.service_api.dataset.dataset.current_user") @patch("controllers.service_api.dataset.dataset.DatasetService") @@ -388,7 +547,6 @@ class TestDatasetApiGet: mock_dataset_svc, mock_current_user, mock_provider_mgr, - mock_marshal, mock_perm_svc, app: Flask, mock_dataset, @@ -404,11 +562,43 @@ class TestDatasetApiGet: mock_configs.get_models.return_value = [] mock_provider_mgr.return_value.get_configurations.return_value = mock_configs - mock_marshal.return_value = { - "indexing_technique": "economy", - "embedding_model_provider": None, - "permission": "only_me", - } + with app.test_request_context( + f"/datasets/{mock_dataset.id}", + method="GET", + ): + api = DatasetApi() + response, status = api.get(_=mock_dataset.tenant_id, dataset_id=mock_dataset.id) + + assert status == 200 + assert_dataset_detail_shape(response) + assert response["embedding_available"] is True + assert response["retrieval_model_dict"]["search_method"] == "keyword_search" + + @patch("controllers.service_api.dataset.dataset.DatasetPermissionService") + @patch("controllers.service_api.dataset.dataset.create_plugin_provider_manager") + @patch("controllers.service_api.dataset.dataset.current_user") + @patch("controllers.service_api.dataset.dataset.DatasetService") + def test_get_dataset_partial_members_shape( + self, + mock_dataset_svc, + mock_current_user, + mock_provider_mgr, + mock_perm_svc, + app: Flask, + mock_dataset, + ): + from controllers.service_api.dataset.dataset import DatasetApi + + mock_dataset.permission = "partial_members" + mock_dataset_svc.get_dataset.return_value = mock_dataset + mock_dataset_svc.check_dataset_permission.return_value = None + mock_current_user.__class__ = Account + mock_current_user.current_tenant_id = mock_dataset.tenant_id + mock_perm_svc.get_dataset_partial_member_list.return_value = ["user-1", "user-2"] + + mock_configs = Mock() + mock_configs.get_models.return_value = [] + mock_provider_mgr.return_value.get_configurations.return_value = mock_configs with app.test_request_context( f"/datasets/{mock_dataset.id}", @@ -418,7 +608,45 @@ class TestDatasetApiGet: response, status = api.get(_=mock_dataset.tenant_id, dataset_id=mock_dataset.id) assert status == 200 - assert response["embedding_available"] is True + assert_dataset_detail_shape(response, with_partial_members=True) + assert response["partial_member_list"] == ["user-1", "user-2"] + + @patch("controllers.service_api.dataset.dataset.DatasetPermissionService") + @patch("controllers.service_api.dataset.dataset.create_plugin_provider_manager") + @patch("controllers.service_api.dataset.dataset.current_user") + @patch("controllers.service_api.dataset.dataset.DatasetService") + def test_get_dataset_uses_default_external_retrieval_model( + self, + mock_dataset_svc, + mock_current_user, + mock_provider_mgr, + mock_perm_svc, + app: Flask, + mock_dataset, + ): + from controllers.service_api.dataset.dataset import DatasetApi + + mock_dataset.retrieval_model = None + mock_dataset_svc.get_dataset.return_value = mock_dataset + mock_dataset_svc.check_dataset_permission.return_value = None + mock_current_user.__class__ = Account + mock_current_user.current_tenant_id = mock_dataset.tenant_id + + mock_configs = Mock() + mock_configs.get_models.return_value = [] + mock_provider_mgr.return_value.get_configurations.return_value = mock_configs + + with app.test_request_context(f"/datasets/{mock_dataset.id}", method="GET"): + api = DatasetApi() + response, status = api.get(_=mock_dataset.tenant_id, dataset_id=mock_dataset.id) + + assert status == 200 + assert_dataset_detail_shape(response) + assert response["external_retrieval_model"] == { + "top_k": 2, + "score_threshold": 0.0, + "score_threshold_enabled": None, + } @patch("controllers.service_api.dataset.dataset.DatasetService") def test_get_dataset_not_found(self, mock_dataset_svc, app, mock_dataset): @@ -457,6 +685,58 @@ class TestDatasetApiGet: api.get(_=mock_dataset.tenant_id, dataset_id=mock_dataset.id) +class TestDatasetApiPatch: + """Test suite for DatasetApi.patch() endpoint.""" + + @patch("controllers.service_api.dataset.dataset.DatasetPermissionService") + @patch("controllers.service_api.dataset.dataset.current_user") + @patch("controllers.service_api.dataset.dataset.DatasetService") + def test_patch_dataset_success_shape( + self, + mock_dataset_svc, + mock_current_user, + mock_perm_svc, + app: Flask, + mock_dataset, + ): + from controllers.service_api.dataset.dataset import DatasetApi + + updated_dataset = make_dataset(id=mock_dataset.id, tenant_id=mock_dataset.tenant_id, name="Updated Dataset") + mock_dataset_svc.get_dataset.return_value = mock_dataset + mock_dataset_svc.update_dataset.return_value = updated_dataset + mock_perm_svc.check_permission.return_value = None + mock_perm_svc.get_dataset_partial_member_list.return_value = ["user-1"] + mock_current_user.__class__ = Account + mock_current_user.current_tenant_id = mock_dataset.tenant_id + + payload = { + "name": "Updated Dataset", + "permission": "partial_members", + "partial_member_list": [{"user_id": "user-1", "role": "editor"}], + } + with app.test_request_context( + f"/datasets/{mock_dataset.id}", + method="PATCH", + json=payload, + ): + api = DatasetApi() + response, status = _unwrap(api.patch)(api, _=mock_dataset.tenant_id, dataset_id=mock_dataset.id) + + assert status == 200 + assert_dataset_detail_shape(response, with_partial_members=True) + assert response["name"] == "Updated Dataset" + assert response["partial_member_list"] == ["user-1"] + mock_dataset_svc.update_dataset.assert_called_once() + _, update_data, _ = mock_dataset_svc.update_dataset.call_args.args + assert update_data["name"] == "Updated Dataset" + assert update_data["permission"] == "partial_members" + mock_perm_svc.update_partial_member_list.assert_called_once_with( + mock_dataset.tenant_id, + mock_dataset.id, + [{"user_id": "user-1", "role": "editor"}], + ) + + class TestDatasetApiDelete: """Test suite for DatasetApi.delete() endpoint.""" @@ -715,7 +995,7 @@ class TestDatasetTagsApiGet: mock_current_user.__class__ = Account mock_current_user.current_tenant_id = "tenant-1" - mock_tag = SimpleNamespace(id="tag-1", name="Test Tag", type="knowledge", binding_count="0") + mock_tag = make_tag(id="tag-1", name="Test Tag", binding_count=0) mock_tag_svc.get_tags.return_value = [mock_tag] with app.test_request_context("/datasets/tags", method="GET"): @@ -723,10 +1003,9 @@ class TestDatasetTagsApiGet: response, status = api.get(_=None) assert status == 200 - assert len(response) == 1 + assert response == [{"id": "tag-1", "name": "Test Tag", "type": "knowledge", "binding_count": "0"}] mock_tag_svc.get_tags.assert_called_once_with("knowledge", "tenant-1") - @pytest.mark.skip(reason="Production bug: DataSetTag.binding_count is str|None but DB COUNT() returns int") @patch("controllers.service_api.dataset.dataset.current_user") def test_list_tags_from_db( self, @@ -762,12 +1041,13 @@ class TestDatasetTagsApiGet: assert status == 200 assert any(t["name"] == "Integration Tag" for t in response) + assert all(set(t) == {"id", "name", "type", "binding_count"} for t in response) + assert all(isinstance(t["binding_count"], str) for t in response) class TestDatasetTagsApiPost: """Test suite for DatasetTagsApi.post() endpoint.""" - @pytest.mark.skip(reason="Production bug: DataSetTag.binding_count is str|None but dataset.py passes int 0") @patch("controllers.service_api.dataset.dataset.TagService") @patch("controllers.service_api.dataset.dataset.current_user") def test_create_tag_success( @@ -781,7 +1061,7 @@ class TestDatasetTagsApiPost: mock_current_user.__class__ = Account mock_current_user.has_edit_permission = True mock_current_user.is_dataset_editor = True - mock_tag = SimpleNamespace(id="tag-new", name="New Tag", type="knowledge") + mock_tag = make_tag(id="tag-new", name="New Tag") mock_tag_svc.save_tags.return_value = mock_tag with app.test_request_context( @@ -793,7 +1073,7 @@ class TestDatasetTagsApiPost: response, status = api.post(_=None) assert status == 200 - assert response["name"] == "New Tag" + assert response == {"id": "tag-new", "name": "New Tag", "type": "knowledge", "binding_count": "0"} mock_tag_svc.save_tags.assert_called_once() @patch("controllers.service_api.dataset.dataset.current_user") @@ -817,7 +1097,6 @@ class TestDatasetTagsApiPost: class TestDatasetTagsApiPatch: """Test suite for DatasetTagsApi.patch() endpoint.""" - @pytest.mark.skip(reason="Production bug: DataSetTag.binding_count is str|None but dataset.py passes int 0") @patch("controllers.service_api.dataset.dataset.TagService") @patch("controllers.service_api.dataset.dataset.service_api_ns") @patch("controllers.service_api.dataset.dataset.current_user") @@ -834,7 +1113,7 @@ class TestDatasetTagsApiPatch: mock_current_user.has_edit_permission = True mock_current_user.is_dataset_editor = True - mock_tag = SimpleNamespace(id="tag-1", name="Updated Tag", type="knowledge") + mock_tag = make_tag(id="tag-1", name="Updated Tag") mock_tag_svc.update_tags.return_value = mock_tag mock_tag_svc.get_tag_binding_count.return_value = 5 mock_service_api_ns.payload = {"name": "Updated Tag", "tag_id": "tag-1"} @@ -848,8 +1127,11 @@ class TestDatasetTagsApiPatch: response, status = api.patch(_=None) assert status == 200 - assert response["name"] == "Updated Tag" - mock_tag_svc.update_tags.assert_called_once_with({"name": "Updated Tag", "type": "knowledge"}, "tag-1") + assert response == {"id": "tag-1", "name": "Updated Tag", "type": "knowledge", "binding_count": "5"} + mock_tag_svc.update_tags.assert_called_once() + update_payload, tag_id = mock_tag_svc.update_tags.call_args.args + assert update_payload.name == "Updated Tag" + assert tag_id == "tag-1" @patch("controllers.service_api.dataset.dataset.current_user") def test_update_tag_forbidden(self, mock_current_user, app: Flask): @@ -984,7 +1266,7 @@ class TestDatasetTagBindingApiPost: from services.tag_service import TagBindingCreatePayload mock_tag_svc.save_tag_binding.assert_called_once_with( - TagBindingCreatePayload(tag_ids=["tag-1"], target_id="ds-1", type="knowledge") + TagBindingCreatePayload(tag_ids=["tag-1"], target_id="ds-1", type=TagType.KNOWLEDGE) ) @patch("controllers.service_api.dataset.dataset.current_user") @@ -1035,7 +1317,7 @@ class TestDatasetTagUnbindingApiPost: from services.tag_service import TagBindingDeletePayload mock_tag_svc.delete_tag_binding.assert_called_once_with( - TagBindingDeletePayload(tag_ids=["tag-1"], target_id="ds-1", type="knowledge") + TagBindingDeletePayload(tag_ids=["tag-1"], target_id="ds-1", type=TagType.KNOWLEDGE) ) @patch("controllers.service_api.dataset.dataset.TagService") @@ -1065,7 +1347,7 @@ class TestDatasetTagUnbindingApiPost: from services.tag_service import TagBindingDeletePayload mock_tag_svc.delete_tag_binding.assert_called_once_with( - TagBindingDeletePayload(tag_ids=["tag-1"], target_id="ds-1", type="knowledge") + TagBindingDeletePayload(tag_ids=["tag-1"], target_id="ds-1", type=TagType.KNOWLEDGE) ) @patch("controllers.service_api.dataset.dataset.current_user") diff --git a/api/tests/unit_tests/controllers/console/datasets/test_datasets.py b/api/tests/unit_tests/controllers/console/datasets/test_datasets.py index 4b0dff037f..3de2260c42 100644 --- a/api/tests/unit_tests/controllers/console/datasets/test_datasets.py +++ b/api/tests/unit_tests/controllers/console/datasets/test_datasets.py @@ -1,4 +1,6 @@ import datetime +import json +from contextlib import ExitStack from unittest.mock import MagicMock, PropertyMock, patch import pytest @@ -31,8 +33,9 @@ from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.provider_manager import ProviderManager from core.rag.index_processor.constant.index_type import IndexStructureType from extensions.storage.storage_type import StorageType -from models.enums import CreatorUserRole -from models.model import ApiToken, UploadFile +from models.dataset import Dataset, DatasetQuery, Document +from models.enums import CreatorUserRole, DataSourceType, DocumentCreatedFrom, IndexingStatus +from models.model import ApiToken, App, AppMode, IconType, UploadFile from services.dataset_service import DatasetPermissionService, DatasetService @@ -42,18 +45,107 @@ def unwrap(func): return func -class TestDatasetList: - def _mock_dataset_dict(self, **overrides): - base = { - "id": "ds-1", - "indexing_technique": "economy", - "embedding_model": None, - "embedding_model_provider": None, - "permission": "only_me", - } - base.update(overrides) - return base +@pytest.fixture(autouse=True) +def dataset_model_property_defaults(): + properties: dict[str, object] = { + "app_count": 0, + "document_count": 0, + "word_count": 0, + "author_name": None, + "tags": [], + "doc_form": None, + "external_knowledge_info": None, + "doc_metadata": [], + "is_published": False, + "total_documents": 0, + "total_available_documents": 0, + } + with ExitStack() as stack: + for name, value in properties.items(): + property_mock = stack.enter_context(patch.object(Dataset, name, new_callable=PropertyMock)) + property_mock.return_value = value + yield + + +def make_dataset(**overrides) -> Dataset: + base = { + "id": "ds-1", + "tenant_id": "tenant-1", + "name": "Dataset", + "description": "desc", + "provider": "vendor", + "permission": "only_me", + "data_source_type": None, + "indexing_technique": "economy", + "created_by": "account-1", + "created_at": datetime.datetime(2024, 1, 1, 12, 0, 0, tzinfo=datetime.UTC), + "updated_by": None, + "updated_at": datetime.datetime(2024, 1, 1, 12, 0, 0, tzinfo=datetime.UTC), + "embedding_model": None, + "embedding_model_provider": None, + "retrieval_model": None, + "summary_index_setting": None, + "built_in_field_enabled": False, + "pipeline_id": None, + "runtime_mode": "general", + "chunk_structure": None, + "icon_info": None, + "enable_api": False, + "is_multimodal": False, + } + base.update(overrides) + return Dataset(**base) + + +def make_related_app(**overrides) -> App: + base = { + "id": "app-1", + "tenant_id": "tenant-1", + "name": "App", + "description": "desc", + "mode": AppMode.CHAT, + "icon_type": IconType.EMOJI, + "icon": "🤖", + "icon_background": "#fff", + "app_model_config_id": None, + "workflow_id": None, + "enable_site": False, + "enable_api": False, + "created_by": "account-1", + } + base.update(overrides) + return App(**base) + + +def make_document_status(**overrides) -> Document: + base = { + "id": "doc-1", + "tenant_id": "tenant-1", + "dataset_id": "dataset-1", + "position": 1, + "data_source_type": DataSourceType.UPLOAD_FILE, + "batch": "batch-1", + "name": "doc.txt", + "created_from": DocumentCreatedFrom.WEB, + "created_by": "account-1", + "indexing_status": IndexingStatus.COMPLETED, + "enabled": True, + "archived": False, + "processing_started_at": None, + "parsing_completed_at": None, + "cleaning_completed_at": None, + "splitting_completed_at": None, + "completed_at": None, + "paused_at": None, + "error": None, + "stopped_at": None, + } + base.update(overrides) + return Document(**base) + + +class TestDatasetList: def _mock_user(self): user = MagicMock() user.is_dataset_editor = True @@ -64,8 +156,7 @@ class TestDatasetList: method = unwrap(api.get) current_user = self._mock_user() - datasets = [MagicMock()] - marshaled = [self._mock_dataset_dict()] + datasets = [make_dataset(icon_info={"icon": "📙", "icon_type": "emoji"})] with app.test_request_context("/datasets"): with ( @@ -78,10 +169,6 @@ class TestDatasetList: "get_datasets", return_value=(datasets, 1), ), - patch( - "controllers.console.datasets.datasets.marshal", - return_value=marshaled, - ), patch.object( ProviderManager, "get_configurations", @@ -93,14 +180,19 @@ class TestDatasetList: assert status == 200 assert resp["total"] == 1 assert resp["data"][0]["embedding_available"] is True + assert resp["data"][0]["icon_info"] == { + "icon": "📙", + "icon_background": None, + "icon_type": "emoji", + "icon_url": None, + } def test_get_with_ids_filter(self, app: Flask): api = DatasetListApi() method = unwrap(api.get) current_user = self._mock_user() - datasets = [MagicMock()] - marshaled = [self._mock_dataset_dict()] + datasets = [make_dataset()] with app.test_request_context("/datasets?ids=1&ids=2"): with ( @@ -113,10 +205,6 @@ class TestDatasetList: "get_datasets_by_ids", return_value=(datasets, 2), ) as by_ids_mock, - patch( - "controllers.console.datasets.datasets.marshal", - return_value=marshaled, - ), patch.object( ProviderManager, "get_configurations", @@ -134,8 +222,7 @@ class TestDatasetList: method = unwrap(api.get) current_user = self._mock_user() - datasets = [MagicMock()] - marshaled = [self._mock_dataset_dict()] + datasets = [make_dataset()] with app.test_request_context("/datasets?tag_ids=tag1"): with ( @@ -148,10 +235,6 @@ class TestDatasetList: "get_datasets", return_value=(datasets, 1), ), - patch( - "controllers.console.datasets.datasets.marshal", - return_value=marshaled, - ), patch.object( ProviderManager, "get_configurations", @@ -167,9 +250,8 @@ class TestDatasetList: method = unwrap(api.get) current_user = self._mock_user() - datasets = [MagicMock()] - marshaled = [ - self._mock_dataset_dict( + datasets = [ + make_dataset( indexing_technique="high_quality", embedding_model="text-embed", embedding_model_provider="openai", @@ -190,10 +272,6 @@ class TestDatasetList: "get_datasets", return_value=(datasets, 1), ), - patch( - "controllers.console.datasets.datasets.marshal", - return_value=marshaled, - ), patch.object( ProviderManager, "get_configurations", @@ -209,8 +287,7 @@ class TestDatasetList: method = unwrap(api.get) current_user = self._mock_user() - datasets = [MagicMock()] - marshaled = [self._mock_dataset_dict(permission="partial_members")] + datasets = [make_dataset(permission="partial_members")] with app.test_request_context("/datasets"): with ( @@ -227,10 +304,6 @@ class TestDatasetList: "controllers.console.datasets.datasets.db.session.execute", return_value=MagicMock(all=lambda: [("ds-1", "u1")]), ), - patch( - "controllers.console.datasets.datasets.marshal", - return_value=marshaled, - ), patch.object( ProviderManager, "get_configurations", @@ -257,22 +330,7 @@ class TestDatasetListApiPost: user = MagicMock() user.is_dataset_editor = True - dataset = MagicMock() - # ---- minimal required fields for marshal ---- - dataset.embedding_available = True - dataset.built_in_field_enabled = False - dataset.is_published = False - dataset.enable_api = False - dataset.is_multimodal = False - dataset.documents = [] - dataset.retrieval_model_dict = {} - dataset.tags = [] - dataset.external_knowledge_info = None - dataset.external_retrieval_model = None - dataset.doc_metadata = [] - dataset.icon_info = None - dataset.summary_index_setting = MagicMock() - dataset.summary_index_setting.enable = False + dataset = make_dataset(name=payload["name"], description=payload["description"]) with ( app.test_request_context("/datasets", json=payload), @@ -381,26 +439,7 @@ class TestDatasetApiGet: user = MagicMock() tenant_id = "tenant-1" - dataset = MagicMock() - dataset.id = dataset_id - dataset.indexing_technique = "economy" - dataset.embedding_model_provider = None - - dataset.embedding_available = True - dataset.built_in_field_enabled = False - dataset.is_published = False - dataset.enable_api = False - dataset.is_multimodal = False - dataset.documents = [] - dataset.retrieval_model_dict = {} - dataset.tags = [] - dataset.external_knowledge_info = None - dataset.external_retrieval_model = None - dataset.doc_metadata = [] - dataset.icon_info = None - dataset.summary_index_setting = MagicMock() - dataset.summary_index_setting.enable = False - dataset.permission = "only_me" + dataset = make_dataset(id=dataset_id) with ( app.test_request_context(f"/datasets/{dataset_id}"), @@ -428,6 +467,42 @@ class TestDatasetApiGet: assert status == 200 assert data["embedding_available"] is True + def test_get_uses_default_external_retrieval_model(self, app: Flask): + api = DatasetApi() + method = unwrap(api.get) + + dataset_id = "dataset-id" + dataset = make_dataset(id=dataset_id, retrieval_model=None) + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + return_value=None, + ), + patch("controllers.console.datasets.datasets.create_plugin_provider_manager") as provider_manager_mock, + ): + provider_manager_mock.return_value.get_configurations.return_value.get_models.return_value = [] + + data, status = method(api, dataset_id) + + assert status == 200 + assert data["external_retrieval_model"] == { + "top_k": 2, + "score_threshold": 0.0, + "score_threshold_enabled": None, + } + def test_get_dataset_not_found(self, app: Flask): api = DatasetApi() method = unwrap(api.get) @@ -484,27 +559,12 @@ class TestDatasetApiGet: user = MagicMock() tenant_id = "tenant-1" - dataset = MagicMock() - dataset.id = dataset_id - dataset.indexing_technique = "high_quality" - dataset.embedding_model = "text-embedding" - dataset.embedding_model_provider = "openai" - - dataset.embedding_available = True - dataset.built_in_field_enabled = False - dataset.is_published = False - dataset.enable_api = False - dataset.is_multimodal = False - dataset.documents = [] - dataset.retrieval_model_dict = {} - dataset.tags = [] - dataset.external_knowledge_info = None - dataset.external_retrieval_model = None - dataset.doc_metadata = [] - dataset.icon_info = None - dataset.summary_index_setting = MagicMock() - dataset.summary_index_setting.enable = False - dataset.permission = "only_me" + dataset = make_dataset( + id=dataset_id, + indexing_technique="high_quality", + embedding_model="text-embedding", + embedding_model_provider="openai", + ) with ( app.test_request_context(f"/datasets/{dataset_id}"), @@ -537,28 +597,9 @@ class TestDatasetApiGet: dataset_id = "dataset-id" - dataset = MagicMock() - dataset.id = dataset_id - dataset.indexing_technique = "economy" - dataset.embedding_model_provider = None - dataset.permission = "partial_members" + dataset = make_dataset(id=dataset_id, permission="partial_members") - dataset.embedding_available = True - dataset.built_in_field_enabled = False - dataset.is_published = False - dataset.enable_api = False - dataset.is_multimodal = False - dataset.documents = [] - dataset.retrieval_model_dict = {} - dataset.tags = [] - dataset.external_knowledge_info = None - dataset.external_retrieval_model = None - dataset.doc_metadata = [] - dataset.icon_info = None - dataset.summary_index_setting = MagicMock() - dataset.summary_index_setting.enable = False - - partial_members = [{"id": "u1"}, {"id": "u2"}] + partial_members = ["u1", "u2"] with ( app.test_request_context(f"/datasets/{dataset_id}"), @@ -605,27 +646,7 @@ class TestDatasetApiPatch: user = MagicMock() tenant_id = "tenant-1" - dataset = MagicMock() - dataset.id = dataset_id - dataset.tenant_id = tenant_id - dataset.permission = "only_me" - dataset.indexing_technique = "economy" - dataset.embedding_model_provider = None - - dataset.embedding_available = True - dataset.built_in_field_enabled = False - dataset.is_published = False - dataset.enable_api = False - dataset.is_multimodal = False - dataset.documents = [] - dataset.retrieval_model_dict = {} - dataset.tags = [] - dataset.external_knowledge_info = None - dataset.external_retrieval_model = None - dataset.doc_metadata = [] - dataset.icon_info = None - dataset.summary_index_setting = MagicMock() - dataset.summary_index_setting.enable = False + dataset = make_dataset(id=dataset_id, tenant_id=tenant_id) with ( app.test_request_context(f"/datasets/{dataset_id}"), @@ -713,29 +734,10 @@ class TestDatasetApiPatch: payload = { "permission": "partial_members", - "partial_member_list": [{"id": "u1"}, {"id": "u2"}], + "partial_member_list": [{"user_id": "u1"}, {"user_id": "u2"}], } - dataset = MagicMock() - dataset.id = dataset_id - dataset.permission = "partial_members" - dataset.indexing_technique = "economy" - dataset.embedding_model_provider = None - - dataset.embedding_available = True - dataset.built_in_field_enabled = False - dataset.is_published = False - dataset.enable_api = False - dataset.is_multimodal = False - dataset.documents = [] - dataset.retrieval_model_dict = {} - dataset.tags = [] - dataset.external_knowledge_info = None - dataset.external_retrieval_model = None - dataset.doc_metadata = [] - dataset.icon_info = None - dataset.summary_index_setting = MagicMock() - dataset.summary_index_setting.enable = False + dataset = make_dataset(id=dataset_id, permission="partial_members") with ( app.test_request_context(f"/datasets/{dataset_id}"), @@ -767,12 +769,12 @@ class TestDatasetApiPatch: patch.object( DatasetPermissionService, "get_dataset_partial_member_list", - return_value=payload["partial_member_list"], + return_value=["u1", "u2"], ), ): result, _ = method(api, dataset_id) - assert result["partial_member_list"] == payload["partial_member_list"] + assert result["partial_member_list"] == ["u1", "u2"] def test_patch_clear_partial_members(self, app: Flask): api = DatasetApi() @@ -784,26 +786,7 @@ class TestDatasetApiPatch: "permission": "only_me", } - dataset = MagicMock() - dataset.id = dataset_id - dataset.permission = "only_me" - dataset.indexing_technique = "economy" - dataset.embedding_model_provider = None - - dataset.embedding_available = True - dataset.built_in_field_enabled = False - dataset.is_published = False - dataset.enable_api = False - dataset.is_multimodal = False - dataset.documents = [] - dataset.retrieval_model_dict = {} - dataset.tags = [] - dataset.external_knowledge_info = None - dataset.external_retrieval_model = None - dataset.doc_metadata = [] - dataset.icon_info = None - dataset.summary_index_setting = MagicMock() - dataset.summary_index_setting.enable = False + dataset = make_dataset(id=dataset_id) with ( app.test_request_context(f"/datasets/{dataset_id}"), @@ -984,6 +967,27 @@ class TestDatasetUseCheckApi: class TestDatasetQueryApi: + def _query_record(self, index: int = 1) -> DatasetQuery: + query = DatasetQuery( + dataset_id="dataset-id", + content=json.dumps( + [ + { + "content_type": "text_query", + "content": f"question {index}", + "file_info": None, + } + ] + ), + source="hit_testing", + source_app_id=None, + created_by_role=CreatorUserRole.ACCOUNT, + created_by=f"account-{index}", + ) + query.id = f"query-{index}" + query.created_at = datetime.datetime(2024, 1, index, 12, 0, 0, tzinfo=datetime.UTC) + return query + def test_get_queries_success(self, app: Flask): api = DatasetQueryApi() method = unwrap(api.get) @@ -995,7 +999,7 @@ class TestDatasetQueryApi: dataset = MagicMock() dataset.id = dataset_id - queries = [MagicMock(), MagicMock()] + queries = [self._query_record(1), self._query_record(2)] with ( app.test_request_context("/datasets/queries?page=1&limit=20"), @@ -1027,6 +1031,21 @@ class TestDatasetQueryApi: assert response["limit"] == 20 assert response["has_more"] is False assert len(response["data"]) == 2 + assert response["data"][0] == { + "id": "query-1", + "queries": [ + { + "content_type": "text_query", + "content": "question 1", + "file_info": None, + } + ], + "source": "hit_testing", + "source_app_id": None, + "created_by_role": "account", + "created_by": "account-1", + "created_at": 1704110400, + } def test_get_queries_dataset_not_found(self, app: Flask): api = DatasetQueryApi() @@ -1089,7 +1108,7 @@ class TestDatasetQueryApi: dataset = MagicMock() dataset.id = dataset_id - queries = [MagicMock() for _ in range(20)] + queries = [self._query_record(index) for index in range(1, 21)] with ( app.test_request_context("/datasets/queries?page=1&limit=20"), @@ -1338,8 +1357,8 @@ class TestDatasetRelatedAppListApi: dataset = MagicMock() dataset.id = "dataset-1" - app1 = MagicMock() - app2 = MagicMock() + app1 = make_related_app(id="app-1", name="App 1") + app2 = make_related_app(id="app-2", name="App 2") join1 = MagicMock(app=app1) join2 = MagicMock(app=app2) @@ -1367,7 +1386,28 @@ class TestDatasetRelatedAppListApi: assert status == 200 assert response["total"] == 2 - assert response["data"] == [app1, app2] + assert response["data"] == [ + { + "id": "app-1", + "name": "App 1", + "description": "desc", + "mode": "chat", + "icon_type": "emoji", + "icon": "🤖", + "icon_background": "#fff", + "icon_url": None, + }, + { + "id": "app-2", + "name": "App 2", + "description": "desc", + "mode": "chat", + "icon_type": "emoji", + "icon": "🤖", + "icon_background": "#fff", + "icon_url": None, + }, + ] def test_get_dataset_not_found(self, app: Flask): api = DatasetRelatedAppListApi() @@ -1418,7 +1458,7 @@ class TestDatasetRelatedAppListApi: dataset = MagicMock() dataset.id = "dataset-1" - app1 = MagicMock() + app1 = make_related_app() join1 = MagicMock(app=app1) join2 = MagicMock(app=None) @@ -1446,7 +1486,18 @@ class TestDatasetRelatedAppListApi: assert status == 200 assert response["total"] == 1 - assert response["data"] == [app1] + assert response["data"] == [ + { + "id": "app-1", + "name": "App", + "description": "desc", + "mode": "chat", + "icon_type": "emoji", + "icon": "🤖", + "icon_background": "#fff", + "icon_url": None, + } + ] class TestDatasetIndexingStatusApi: @@ -1652,7 +1703,7 @@ class TestDatasetApiKeyApi: method(api) assert exc_info.value.code == 400 - assert exc_info.value.data == { + assert vars(exc_info.value)["data"] == { "message": "Cannot create more than 10 API keys for this resource type.", "custom": "max_keys_exceeded", } @@ -1833,7 +1884,7 @@ class TestDatasetErrorDocs: method = unwrap(api.get) dataset = MagicMock() - error_doc = MagicMock() + error_doc = make_document_status(id="error-doc", indexing_status=IndexingStatus.ERROR, error="failed") with ( app.test_request_context("/"), @@ -1872,7 +1923,7 @@ class TestDatasetPermissionUserListApi: method = unwrap(api.get) dataset = MagicMock() - users = [{"id": "u1"}, {"id": "u2"}] + users = ["u1", "u2"] with ( app.test_request_context("/"), @@ -1929,7 +1980,7 @@ class TestDatasetAutoDisableLogApi: method = unwrap(api.get) dataset = MagicMock() - logs = [{"reason": "quota"}] + logs = {"document_ids": ["doc-1"], "count": 1} with ( app.test_request_context("/"), diff --git a/packages/contracts/generated/api/console/datasets/orpc.gen.ts b/packages/contracts/generated/api/console/datasets/orpc.gen.ts index baec823590..c926c82620 100644 --- a/packages/contracts/generated/api/console/datasets/orpc.gen.ts +++ b/packages/contracts/generated/api/console/datasets/orpc.gen.ts @@ -574,16 +574,10 @@ export const processRule = { /** * Get mock dataset retrieval settings by vector type - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get10 = oc .route({ - deprecated: true, - description: - 'Get mock dataset retrieval settings by vector type\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get mock dataset retrieval settings by vector type', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsRetrievalSettingByVectorType', @@ -599,16 +593,10 @@ export const byVectorType = { /** * Get dataset retrieval settings - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get11 = oc .route({ - deprecated: true, - description: - 'Get dataset retrieval settings\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get dataset retrieval settings', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsRetrievalSetting', @@ -643,16 +631,10 @@ export const apiKeys2 = { /** * Get dataset auto disable logs - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get12 = oc .route({ - deprecated: true, - description: - 'Get dataset auto disable logs\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get dataset auto disable logs', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdAutoDisableLogs', @@ -1522,16 +1504,10 @@ export const documents = { /** * Get dataset error documents - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get27 = oc .route({ - deprecated: true, - description: - 'Get dataset error documents\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get dataset error documents', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdErrorDocs', @@ -1607,16 +1583,10 @@ export const hitTesting = { /** * Get dataset indexing status - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get28 = oc .route({ - deprecated: true, - description: - 'Get dataset indexing status\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get dataset indexing status', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdIndexingStatus', @@ -1739,16 +1709,10 @@ export const notion2 = { /** * Get dataset permission user list - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get31 = oc .route({ - deprecated: true, - description: - 'Get dataset permission user list\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get dataset permission user list', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdPermissionPartUsers', @@ -1764,16 +1728,10 @@ export const permissionPartUsers = { /** * Get dataset query history - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get32 = oc .route({ - deprecated: true, - description: - 'Get dataset query history\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get dataset query history', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdQueries', @@ -1789,16 +1747,10 @@ export const queries = { /** * Get applications related to dataset - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get33 = oc .route({ - deprecated: true, - description: - 'Get applications related to dataset\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get applications related to dataset', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdRelatedApps', @@ -1870,16 +1822,10 @@ export const delete9 = oc /** * Get dataset details - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get35 = oc .route({ - deprecated: true, - description: - 'Get dataset details\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get dataset details', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetId', @@ -2003,16 +1949,10 @@ export const byResourceId = { /** * Get list of datasets - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get37 = oc .route({ - deprecated: true, - description: - 'Get list of datasets\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get list of datasets', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasets', @@ -2024,16 +1964,10 @@ export const get37 = oc /** * Create a new dataset - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const post23 = oc .route({ - deprecated: true, - description: - 'Create a new dataset\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Create a new dataset', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasets', diff --git a/packages/contracts/generated/api/console/datasets/types.gen.ts b/packages/contracts/generated/api/console/datasets/types.gen.ts index 92df8a52a5..b0cafa0a66 100644 --- a/packages/contracts/generated/api/console/datasets/types.gen.ts +++ b/packages/contracts/generated/api/console/datasets/types.gen.ts @@ -4,6 +4,14 @@ export type ClientOptions = { baseUrl: `${string}://${string}/console/api` | (string & {}) } +export type DatasetListResponse = { + data: Array + has_more: boolean + limit: number + page: number + total: number +} + export type DatasetCreatePayload = { description?: string external_knowledge_api_id?: string | null @@ -14,6 +22,44 @@ export type DatasetCreatePayload = { provider?: string } +export type DatasetDetailResponse = { + app_count: number + author_name: string | null + built_in_field_enabled: boolean + chunk_structure: string | null + created_at: number + created_by: string + data_source_type: string | null + description: string | null + doc_form: string | null + doc_metadata: Array + document_count: number + embedding_available?: boolean | null + embedding_model: string | null + embedding_model_provider: string | null + enable_api: boolean + external_knowledge_info: DatasetExternalKnowledgeInfoResponse + external_retrieval_model: DatasetExternalRetrievalModelResponse + icon_info: DatasetIconInfoResponse + id: string + indexing_technique: string | null + is_multimodal: boolean + is_published: boolean + name: string + permission: string + pipeline_id: string | null + provider: string + retrieval_model_dict: DatasetRetrievalModelResponse + runtime_mode: string | null + summary_index_setting: DatasetSummaryIndexSettingResponse + tags: Array + total_available_documents: number + total_documents: number + updated_at: number + updated_by: string | null + word_count: number +} + export type ApiBaseUrlResponse = { api_base_url: string } @@ -116,6 +162,12 @@ export type IndexingEstimatePayload = { } } +export type IndexingEstimateResponse = { + preview: Array + qa_preview?: Array | null + total_segments: number +} + export type KnowledgeConfig = { data_source?: DataSource doc_form?: string @@ -159,6 +211,49 @@ export type NotionEstimatePayload = { } } +export type RetrievalSettingResponse = { + retrieval_method: Array +} + +export type DatasetDetailWithPartialMembersResponse = { + app_count: number + author_name: string | null + built_in_field_enabled: boolean + chunk_structure: string | null + created_at: number + created_by: string + data_source_type: string | null + description: string | null + doc_form: string | null + doc_metadata: Array + document_count: number + embedding_available?: boolean | null + embedding_model: string | null + embedding_model_provider: string | null + enable_api: boolean + external_knowledge_info: DatasetExternalKnowledgeInfoResponse + external_retrieval_model: DatasetExternalRetrievalModelResponse + icon_info: DatasetIconInfoResponse + id: string + indexing_technique: string | null + is_multimodal: boolean + is_published: boolean + name: string + partial_member_list?: Array | null + permission: string + pipeline_id: string | null + provider: string + retrieval_model_dict: DatasetRetrievalModelResponse + runtime_mode: string | null + summary_index_setting: DatasetSummaryIndexSettingResponse + tags: Array + total_available_documents: number + total_documents: number + updated_at: number + updated_by: string | null + word_count: number +} + export type DatasetUpdatePayload = { description?: string | null embedding_model?: string | null @@ -190,6 +285,11 @@ export type SimpleResultResponse = { result: string } +export type AutoDisableLogsResponse = { + count: number + document_ids: Array +} + export type DocumentBatchDownloadZipPayload = { document_ids: Array } @@ -271,6 +371,11 @@ export type ChildChunkUpdatePayload = { content: string } +export type ErrorDocsResponse = { + data: Array + total: number +} + export type ExternalHitTestingPayload = { external_retrieval_model?: { [key: string]: unknown @@ -295,6 +400,10 @@ export type HitTestingResponse = { records?: Array } +export type DocumentStatusListResponse = { + data: Array +} + export type DatasetMetadataListResponse = { built_in_field_enabled: boolean doc_metadata: Array @@ -315,21 +424,21 @@ export type MetadataUpdatePayload = { name: string } -export type DatasetQueryDetail = { - created_at?: { - [key: string]: unknown - } - created_by?: string - created_by_role?: string - id?: string - queries?: DatasetContent - source?: string - source_app_id?: string +export type PartialMemberListResponse = { + data: Array } -export type RelatedAppList = { - data?: Array - total?: number +export type DatasetQueryListResponse = { + data: Array + has_more: boolean + limit: number + page: number + total: number +} + +export type RelatedAppListResponse = { + data: Array + total: number } export type DocumentRetryPayload = { @@ -340,8 +449,97 @@ export type UsageCheckResponse = { is_using: boolean } +export type DatasetListItemResponse = { + app_count: number + author_name: string | null + built_in_field_enabled: boolean + chunk_structure: string | null + created_at: number + created_by: string + data_source_type: string | null + description: string | null + doc_form: string | null + doc_metadata: Array + document_count: number + embedding_available?: boolean | null + embedding_model: string | null + embedding_model_provider: string | null + enable_api: boolean + external_knowledge_info: DatasetExternalKnowledgeInfoResponse + external_retrieval_model: DatasetExternalRetrievalModelResponse + icon_info: DatasetIconInfoResponse + id: string + indexing_technique: string | null + is_multimodal: boolean + is_published: boolean + name: string + partial_member_list: Array + permission: string + pipeline_id: string | null + provider: string + retrieval_model_dict: DatasetRetrievalModelResponse + runtime_mode: string | null + summary_index_setting: DatasetSummaryIndexSettingResponse + tags: Array + total_available_documents: number + total_documents: number + updated_at: number + updated_by: string | null + word_count: number +} + export type DatasetPermissionEnum = 'all_team_members' | 'only_me' | 'partial_members' +export type DatasetDocMetadataResponse = { + id: string + name: string + type: string +} + +export type DatasetExternalKnowledgeInfoResponse = { + external_knowledge_api_endpoint: string + external_knowledge_api_id: string + external_knowledge_api_name: string + external_knowledge_id: string +} + +export type DatasetExternalRetrievalModelResponse = { + score_threshold: number + score_threshold_enabled?: boolean | null + top_k: number +} + +export type DatasetIconInfoResponse = { + icon: string | null + icon_background?: string | null + icon_type: string | null + icon_url?: string | null +} + +export type DatasetRetrievalModelResponse = { + reranking_enable: boolean + reranking_mode?: string | null + reranking_model: DatasetRerankingModelResponse + score_threshold?: number | null + score_threshold_enabled: boolean + search_method: string + top_k: number + weights?: DatasetWeightedScoreResponse +} + +export type DatasetSummaryIndexSettingResponse = { + enable?: boolean | null + model_name?: string | null + model_provider_name?: string | null + summary_prompt?: string | null +} + +export type DatasetTagResponse = { + id: string + name: string + type: string +} + export type DatasetDocMetadata = { id?: string name?: string @@ -392,6 +590,17 @@ export type Tag = { type: string } +export type IndexingEstimatePreviewItemResponse = { + child_chunks?: Array | null + content: string + summary?: string | null +} + +export type IndexingEstimateQaPreviewItemResponse = { + answer: string + question: string +} + export type DataSource = { info_list: InfoList } @@ -442,6 +651,21 @@ export type DocumentMetadataResponse = { value?: string | null } +export type DocumentStatusResponse = { + cleaning_completed_at: number | null + completed_at: number | null + completed_segments?: number | null + error: string | null + id: string + indexing_status: string + parsing_completed_at: number | null + paused_at: number | null + processing_started_at: number | null + splitting_completed_at: number | null + stopped_at: number | null + total_segments?: number | null +} + export type HitTestingRecord = { child_chunks?: Array files?: Array @@ -458,23 +682,36 @@ export type DatasetMetadataListItemResponse = { type: string } -export type DatasetContent = { - content?: string - content_type?: string - file_info?: DatasetFileInfo +export type DatasetQueryDetailResponse = { + created_at: number + created_by: string + created_by_role: string + id: string + queries: Array + source: string + source_app_id: string | null } -export type AppDetailKernel = { - description?: string - icon?: string - icon_background?: string - icon_type?: string - icon_url?: { - [key: string]: unknown - } - id?: string - mode?: string - name?: string +export type RelatedAppResponse = { + description: string + icon: string | null + icon_background: string | null + icon_type: string | null + icon_url?: string | null + id: string + mode: string + name: string +} + +export type DatasetRerankingModelResponse = { + reranking_model_name?: string | null + reranking_provider_name?: string | null +} + +export type DatasetWeightedScoreResponse = { + keyword_setting: DatasetKeywordSettingResponse + vector_setting: DatasetVectorSettingResponse + weight_type: string | null } export type DatasetRerankingModel = { @@ -572,13 +809,20 @@ export type HitTestingSegment = { word_count?: number | null } -export type DatasetFileInfo = { - extension?: string - id?: string - mime_type?: string - name?: string - size?: number - source_url?: string +export type DatasetQueryContentResponse = { + content: string + content_type: string + file_info?: DatasetQueryFileInfoResponse +} + +export type DatasetKeywordSettingResponse = { + keyword_weight: number +} + +export type DatasetVectorSettingResponse = { + embedding_model_name: string + embedding_provider_name: string + vector_weight: number } export type DatasetKeywordSetting = { @@ -661,6 +905,15 @@ export type HitTestingDocument = { name?: string | null } +export type DatasetQueryFileInfoResponse = { + extension: string + id: string + mime_type: string + name: string + size: number + source_url: string +} + export type NotionPage = { page_icon?: NotionIcon page_id: string @@ -678,20 +931,18 @@ export type GetDatasetsData = { body?: never path?: never query?: { - ids?: string - include_all?: string + ids?: Array + include_all?: boolean keyword?: string - limit?: string - page?: string - tag_ids?: string + limit?: number + page?: number + tag_ids?: Array } url: '/datasets' } export type GetDatasetsResponses = { - 200: { - [key: string]: unknown - } + 200: DatasetListResponse } export type GetDatasetsResponse = GetDatasetsResponses[keyof GetDatasetsResponses] @@ -712,9 +963,7 @@ export type PostDatasetsErrors = { export type PostDatasetsError = PostDatasetsErrors[keyof PostDatasetsErrors] export type PostDatasetsResponses = { - 201: { - [key: string]: unknown - } + 201: DatasetDetailResponse } export type PostDatasetsResponse = PostDatasetsResponses[keyof PostDatasetsResponses] @@ -967,9 +1216,7 @@ export type PostDatasetsIndexingEstimateData = { } export type PostDatasetsIndexingEstimateResponses = { - 200: { - [key: string]: unknown - } + 200: IndexingEstimateResponse } export type PostDatasetsIndexingEstimateResponse @@ -1066,9 +1313,7 @@ export type GetDatasetsRetrievalSettingData = { } export type GetDatasetsRetrievalSettingResponses = { - 200: { - [key: string]: unknown - } + 200: RetrievalSettingResponse } export type GetDatasetsRetrievalSettingResponse @@ -1084,9 +1329,7 @@ export type GetDatasetsRetrievalSettingByVectorTypeData = { } export type GetDatasetsRetrievalSettingByVectorTypeResponses = { - 200: { - [key: string]: unknown - } + 200: RetrievalSettingResponse } export type GetDatasetsRetrievalSettingByVectorTypeResponse @@ -1132,7 +1375,7 @@ export type GetDatasetsByDatasetIdError = GetDatasetsByDatasetIdErrors[keyof GetDatasetsByDatasetIdErrors] export type GetDatasetsByDatasetIdResponses = { - 200: DatasetDetail + 200: DatasetDetailWithPartialMembersResponse } export type GetDatasetsByDatasetIdResponse @@ -1160,7 +1403,7 @@ export type PatchDatasetsByDatasetIdError = PatchDatasetsByDatasetIdErrors[keyof PatchDatasetsByDatasetIdErrors] export type PatchDatasetsByDatasetIdResponses = { - 200: DatasetDetail + 200: DatasetDetailWithPartialMembersResponse } export type PatchDatasetsByDatasetIdResponse @@ -1202,9 +1445,7 @@ export type GetDatasetsByDatasetIdAutoDisableLogsError = GetDatasetsByDatasetIdAutoDisableLogsErrors[keyof GetDatasetsByDatasetIdAutoDisableLogsErrors] export type GetDatasetsByDatasetIdAutoDisableLogsResponses = { - 200: { - [key: string]: unknown - } + 200: AutoDisableLogsResponse } export type GetDatasetsByDatasetIdAutoDisableLogsResponse @@ -1985,9 +2226,7 @@ export type GetDatasetsByDatasetIdErrorDocsError = GetDatasetsByDatasetIdErrorDocsErrors[keyof GetDatasetsByDatasetIdErrorDocsErrors] export type GetDatasetsByDatasetIdErrorDocsResponses = { - 200: { - [key: string]: unknown - } + 200: ErrorDocsResponse } export type GetDatasetsByDatasetIdErrorDocsResponse @@ -2061,9 +2300,7 @@ export type GetDatasetsByDatasetIdIndexingStatusData = { } export type GetDatasetsByDatasetIdIndexingStatusResponses = { - 200: { - [key: string]: unknown - } + 200: DocumentStatusListResponse } export type GetDatasetsByDatasetIdIndexingStatusResponse @@ -2194,9 +2431,7 @@ export type GetDatasetsByDatasetIdPermissionPartUsersError = GetDatasetsByDatasetIdPermissionPartUsersErrors[keyof GetDatasetsByDatasetIdPermissionPartUsersErrors] export type GetDatasetsByDatasetIdPermissionPartUsersResponses = { - 200: { - [key: string]: unknown - } + 200: PartialMemberListResponse } export type GetDatasetsByDatasetIdPermissionPartUsersResponse @@ -2212,7 +2447,7 @@ export type GetDatasetsByDatasetIdQueriesData = { } export type GetDatasetsByDatasetIdQueriesResponses = { - 200: DatasetQueryDetail + 200: DatasetQueryListResponse } export type GetDatasetsByDatasetIdQueriesResponse @@ -2228,7 +2463,7 @@ export type GetDatasetsByDatasetIdRelatedAppsData = { } export type GetDatasetsByDatasetIdRelatedAppsResponses = { - 200: RelatedAppList + 200: RelatedAppListResponse } export type GetDatasetsByDatasetIdRelatedAppsResponse diff --git a/packages/contracts/generated/api/console/datasets/zod.gen.ts b/packages/contracts/generated/api/console/datasets/zod.gen.ts index 984608ffcc..8f21000ace 100644 --- a/packages/contracts/generated/api/console/datasets/zod.gen.ts +++ b/packages/contracts/generated/api/console/datasets/zod.gen.ts @@ -98,6 +98,13 @@ export const zNotionEstimatePayload = z.object({ process_rule: z.record(z.string(), z.unknown()), }) +/** + * RetrievalSettingResponse + */ +export const zRetrievalSettingResponse = z.object({ + retrieval_method: z.array(z.string()), +}) + /** * SimpleResultResponse */ @@ -105,6 +112,14 @@ export const zSimpleResultResponse = z.object({ result: z.string(), }) +/** + * AutoDisableLogsResponse + */ +export const zAutoDisableLogsResponse = z.object({ + count: z.int(), + document_ids: z.array(z.string()), +}) + /** * DocumentBatchDownloadZipPayload * @@ -220,6 +235,13 @@ export const zMetadataUpdatePayload = z.object({ name: z.string(), }) +/** + * PartialMemberListResponse + */ +export const zPartialMemberListResponse = z.object({ + data: z.array(z.string()), +}) + /** * DocumentRetryPayload */ @@ -272,6 +294,63 @@ export const zDatasetUpdatePayload = z.object({ summary_index_setting: z.record(z.string(), z.unknown()).nullish(), }) +/** + * DatasetDocMetadataResponse + */ +export const zDatasetDocMetadataResponse = z.object({ + id: z.string(), + name: z.string(), + type: z.string(), +}) + +/** + * DatasetExternalKnowledgeInfoResponse + */ +export const zDatasetExternalKnowledgeInfoResponse = z.object({ + external_knowledge_api_endpoint: z.string(), + external_knowledge_api_id: z.string(), + external_knowledge_api_name: z.string(), + external_knowledge_id: z.string(), +}) + +/** + * DatasetExternalRetrievalModelResponse + */ +export const zDatasetExternalRetrievalModelResponse = z.object({ + score_threshold: z.number(), + score_threshold_enabled: z.boolean().nullish(), + top_k: z.int(), +}) + +/** + * DatasetIconInfoResponse + */ +export const zDatasetIconInfoResponse = z.object({ + icon: z.string().nullable(), + icon_background: z.string().nullish(), + icon_type: z.string().nullable(), + icon_url: z.string().nullish(), +}) + +/** + * DatasetSummaryIndexSettingResponse + */ +export const zDatasetSummaryIndexSettingResponse = z.object({ + enable: z.boolean().nullish(), + model_name: z.string().nullish(), + model_provider_name: z.string().nullish(), + summary_prompt: z.string().nullish(), +}) + +/** + * DatasetTagResponse + */ +export const zDatasetTagResponse = z.object({ + id: z.string(), + name: z.string(), + type: z.string(), +}) + export const zDatasetDocMetadata = z.object({ id: z.string().optional(), name: z.string().optional(), @@ -314,6 +393,32 @@ export const zTag = z.object({ type: z.string(), }) +/** + * IndexingEstimatePreviewItemResponse + */ +export const zIndexingEstimatePreviewItemResponse = z.object({ + child_chunks: z.array(z.string()).nullish(), + content: z.string(), + summary: z.string().nullish(), +}) + +/** + * IndexingEstimateQaPreviewItemResponse + */ +export const zIndexingEstimateQaPreviewItemResponse = z.object({ + answer: z.string(), + question: z.string(), +}) + +/** + * IndexingEstimateResponse + */ +export const zIndexingEstimateResponse = z.object({ + preview: z.array(zIndexingEstimatePreviewItemResponse), + qa_preview: z.array(zIndexingEstimateQaPreviewItemResponse).nullish(), + total_segments: z.int(), +}) + /** * DatasetResponse */ @@ -392,6 +497,39 @@ export const zDatasetAndDocumentResponse = z.object({ documents: z.array(zDocumentResponse), }) +/** + * DocumentStatusResponse + */ +export const zDocumentStatusResponse = z.object({ + cleaning_completed_at: z.int().nullable(), + completed_at: z.int().nullable(), + completed_segments: z.int().nullish(), + error: z.string().nullable(), + id: z.string(), + indexing_status: z.string(), + parsing_completed_at: z.int().nullable(), + paused_at: z.int().nullable(), + processing_started_at: z.int().nullable(), + splitting_completed_at: z.int().nullable(), + stopped_at: z.int().nullable(), + total_segments: z.int().nullish(), +}) + +/** + * ErrorDocsResponse + */ +export const zErrorDocsResponse = z.object({ + data: z.array(zDocumentStatusResponse), + total: z.int(), +}) + +/** + * DocumentStatusListResponse + */ +export const zDocumentStatusListResponse = z.object({ + data: z.array(zDocumentStatusResponse), +}) + /** * DatasetMetadataListItemResponse */ @@ -410,20 +548,34 @@ export const zDatasetMetadataListResponse = z.object({ doc_metadata: z.array(zDatasetMetadataListItemResponse), }) -export const zAppDetailKernel = z.object({ - description: z.string().optional(), - icon: z.string().optional(), - icon_background: z.string().optional(), - icon_type: z.string().optional(), - icon_url: z.record(z.string(), z.unknown()).optional(), - id: z.string().optional(), - mode: z.string().optional(), - name: z.string().optional(), +/** + * RelatedAppResponse + */ +export const zRelatedAppResponse = z.object({ + description: z.string(), + icon: z.string().nullable(), + icon_background: z.string().nullable(), + icon_type: z.string().nullable(), + icon_url: z.string().nullish(), + id: z.string(), + mode: z.string(), + name: z.string(), }) -export const zRelatedAppList = z.object({ - data: z.array(zAppDetailKernel).optional(), - total: z.int().optional(), +/** + * RelatedAppListResponse + */ +export const zRelatedAppListResponse = z.object({ + data: z.array(zRelatedAppResponse), + total: z.int(), +}) + +/** + * DatasetRerankingModelResponse + */ +export const zDatasetRerankingModelResponse = z.object({ + reranking_model_name: z.string().nullish(), + reranking_provider_name: z.string().nullish(), }) export const zDatasetRerankingModel = z.object({ @@ -498,29 +650,179 @@ export const zHitTestingFile = z.object({ source_url: z.string().nullish(), }) -export const zDatasetFileInfo = z.object({ - extension: z.string().optional(), - id: z.string().optional(), - mime_type: z.string().optional(), - name: z.string().optional(), - size: z.int().optional(), - source_url: z.string().optional(), +/** + * DatasetKeywordSettingResponse + */ +export const zDatasetKeywordSettingResponse = z.object({ + keyword_weight: z.number(), }) -export const zDatasetContent = z.object({ - content: z.string().optional(), - content_type: z.string().optional(), - file_info: zDatasetFileInfo.optional(), +/** + * DatasetVectorSettingResponse + */ +export const zDatasetVectorSettingResponse = z.object({ + embedding_model_name: z.string(), + embedding_provider_name: z.string(), + vector_weight: z.number(), }) -export const zDatasetQueryDetail = z.object({ - created_at: z.record(z.string(), z.unknown()).optional(), - created_by: z.string().optional(), - created_by_role: z.string().optional(), - id: z.string().optional(), - queries: zDatasetContent.optional(), - source: z.string().optional(), - source_app_id: z.string().optional(), +/** + * DatasetWeightedScoreResponse + */ +export const zDatasetWeightedScoreResponse = z.object({ + keyword_setting: zDatasetKeywordSettingResponse, + vector_setting: zDatasetVectorSettingResponse, + weight_type: z.string().nullable(), +}) + +/** + * DatasetRetrievalModelResponse + */ +export const zDatasetRetrievalModelResponse = z.object({ + reranking_enable: z.boolean(), + reranking_mode: z.string().nullish(), + reranking_model: zDatasetRerankingModelResponse, + score_threshold: z.number().nullish(), + score_threshold_enabled: z.boolean(), + search_method: z.string(), + top_k: z.int(), + weights: zDatasetWeightedScoreResponse.optional(), +}) + +/** + * DatasetDetailResponse + */ +export const zDatasetDetailResponse = z.object({ + app_count: z.int(), + author_name: z.string().nullable(), + built_in_field_enabled: z.boolean(), + chunk_structure: z.string().nullable(), + created_at: z.int(), + created_by: z.string(), + data_source_type: z.string().nullable(), + description: z.string().nullable(), + doc_form: z.string().nullable(), + doc_metadata: z.array(zDatasetDocMetadataResponse), + document_count: z.int(), + embedding_available: z.boolean().nullish(), + embedding_model: z.string().nullable(), + embedding_model_provider: z.string().nullable(), + enable_api: z.boolean(), + external_knowledge_info: zDatasetExternalKnowledgeInfoResponse, + external_retrieval_model: zDatasetExternalRetrievalModelResponse, + icon_info: zDatasetIconInfoResponse, + id: z.string(), + indexing_technique: z.string().nullable(), + is_multimodal: z.boolean(), + is_published: z.boolean(), + name: z.string(), + permission: z.string(), + pipeline_id: z.string().nullable(), + provider: z.string(), + retrieval_model_dict: zDatasetRetrievalModelResponse, + runtime_mode: z.string().nullable(), + summary_index_setting: zDatasetSummaryIndexSettingResponse, + tags: z.array(zDatasetTagResponse), + total_available_documents: z.int(), + total_documents: z.int(), + updated_at: z.int(), + updated_by: z.string().nullable(), + word_count: z.int(), +}) + +/** + * DatasetDetailWithPartialMembersResponse + */ +export const zDatasetDetailWithPartialMembersResponse = z.object({ + app_count: z.int(), + author_name: z.string().nullable(), + built_in_field_enabled: z.boolean(), + chunk_structure: z.string().nullable(), + created_at: z.int(), + created_by: z.string(), + data_source_type: z.string().nullable(), + description: z.string().nullable(), + doc_form: z.string().nullable(), + doc_metadata: z.array(zDatasetDocMetadataResponse), + document_count: z.int(), + embedding_available: z.boolean().nullish(), + embedding_model: z.string().nullable(), + embedding_model_provider: z.string().nullable(), + enable_api: z.boolean(), + external_knowledge_info: zDatasetExternalKnowledgeInfoResponse, + external_retrieval_model: zDatasetExternalRetrievalModelResponse, + icon_info: zDatasetIconInfoResponse, + id: z.string(), + indexing_technique: z.string().nullable(), + is_multimodal: z.boolean(), + is_published: z.boolean(), + name: z.string(), + partial_member_list: z.array(z.string()).nullish(), + permission: z.string(), + pipeline_id: z.string().nullable(), + provider: z.string(), + retrieval_model_dict: zDatasetRetrievalModelResponse, + runtime_mode: z.string().nullable(), + summary_index_setting: zDatasetSummaryIndexSettingResponse, + tags: z.array(zDatasetTagResponse), + total_available_documents: z.int(), + total_documents: z.int(), + updated_at: z.int(), + updated_by: z.string().nullable(), + word_count: z.int(), +}) + +/** + * DatasetListItemResponse + */ +export const zDatasetListItemResponse = z.object({ + app_count: z.int(), + author_name: z.string().nullable(), + built_in_field_enabled: z.boolean(), + chunk_structure: z.string().nullable(), + created_at: z.int(), + created_by: z.string(), + data_source_type: z.string().nullable(), + description: z.string().nullable(), + doc_form: z.string().nullable(), + doc_metadata: z.array(zDatasetDocMetadataResponse), + document_count: z.int(), + embedding_available: z.boolean().nullish(), + embedding_model: z.string().nullable(), + embedding_model_provider: z.string().nullable(), + enable_api: z.boolean(), + external_knowledge_info: zDatasetExternalKnowledgeInfoResponse, + external_retrieval_model: zDatasetExternalRetrievalModelResponse, + icon_info: zDatasetIconInfoResponse, + id: z.string(), + indexing_technique: z.string().nullable(), + is_multimodal: z.boolean(), + is_published: z.boolean(), + name: z.string(), + partial_member_list: z.array(z.string()), + permission: z.string(), + pipeline_id: z.string().nullable(), + provider: z.string(), + retrieval_model_dict: zDatasetRetrievalModelResponse, + runtime_mode: z.string().nullable(), + summary_index_setting: zDatasetSummaryIndexSettingResponse, + tags: z.array(zDatasetTagResponse), + total_available_documents: z.int(), + total_documents: z.int(), + updated_at: z.int(), + updated_by: z.string().nullable(), + word_count: z.int(), +}) + +/** + * DatasetListResponse + */ +export const zDatasetListResponse = z.object({ + data: z.array(zDatasetListItemResponse), + has_more: z.boolean(), + limit: z.int(), + page: z.int(), + total: z.int(), }) export const zDatasetKeywordSetting = z.object({ @@ -790,6 +1092,51 @@ export const zHitTestingResponse = z.object({ records: z.array(zHitTestingRecord).optional(), }) +/** + * DatasetQueryFileInfoResponse + */ +export const zDatasetQueryFileInfoResponse = z.object({ + extension: z.string(), + id: z.string(), + mime_type: z.string(), + name: z.string(), + size: z.int(), + source_url: z.string(), +}) + +/** + * DatasetQueryContentResponse + */ +export const zDatasetQueryContentResponse = z.object({ + content: z.string(), + content_type: z.string(), + file_info: zDatasetQueryFileInfoResponse.optional(), +}) + +/** + * DatasetQueryDetailResponse + */ +export const zDatasetQueryDetailResponse = z.object({ + created_at: z.int(), + created_by: z.string(), + created_by_role: z.string(), + id: z.string(), + queries: z.array(zDatasetQueryContentResponse), + source: z.string(), + source_app_id: z.string().nullable(), +}) + +/** + * DatasetQueryListResponse + */ +export const zDatasetQueryListResponse = z.object({ + data: z.array(zDatasetQueryDetailResponse), + has_more: z.boolean(), + limit: z.int(), + page: z.int(), + total: z.int(), +}) + /** * NotionIcon */ @@ -855,25 +1202,25 @@ export const zKnowledgeConfig = z.object({ }) export const zGetDatasetsQuery = z.object({ - ids: z.string().optional(), - include_all: z.string().optional(), + ids: z.array(z.string()).optional(), + include_all: z.boolean().optional().default(false), keyword: z.string().optional(), - limit: z.string().optional(), - page: z.string().optional(), - tag_ids: z.string().optional(), + limit: z.int().optional().default(20), + page: z.int().optional().default(1), + tag_ids: z.array(z.string()).optional(), }) /** * Datasets retrieved successfully */ -export const zGetDatasetsResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsResponse = zDatasetListResponse export const zPostDatasetsBody = zDatasetCreatePayload /** * Dataset created successfully */ -export const zPostDatasetsResponse = z.record(z.string(), z.unknown()) +export const zPostDatasetsResponse = zDatasetDetailResponse /** * API base info retrieved successfully @@ -998,7 +1345,7 @@ export const zPostDatasetsIndexingEstimateBody = zIndexingEstimatePayload /** * Indexing estimate calculated successfully */ -export const zPostDatasetsIndexingEstimateResponse = z.record(z.string(), z.unknown()) +export const zPostDatasetsIndexingEstimateResponse = zIndexingEstimateResponse export const zPostDatasetsInitBody = zKnowledgeConfig @@ -1036,7 +1383,7 @@ export const zGetDatasetsProcessRuleResponse = z.record(z.string(), z.unknown()) /** * Retrieval settings retrieved successfully */ -export const zGetDatasetsRetrievalSettingResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsRetrievalSettingResponse = zRetrievalSettingResponse export const zGetDatasetsRetrievalSettingByVectorTypePath = z.object({ vector_type: z.string(), @@ -1045,7 +1392,7 @@ export const zGetDatasetsRetrievalSettingByVectorTypePath = z.object({ /** * Mock retrieval settings retrieved successfully */ -export const zGetDatasetsRetrievalSettingByVectorTypeResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsRetrievalSettingByVectorTypeResponse = zRetrievalSettingResponse export const zDeleteDatasetsByDatasetIdPath = z.object({ dataset_id: z.string(), @@ -1063,7 +1410,7 @@ export const zGetDatasetsByDatasetIdPath = z.object({ /** * Dataset retrieved successfully */ -export const zGetDatasetsByDatasetIdResponse = zDatasetDetail +export const zGetDatasetsByDatasetIdResponse = zDatasetDetailWithPartialMembersResponse export const zPatchDatasetsByDatasetIdBody = zDatasetUpdatePayload @@ -1074,7 +1421,7 @@ export const zPatchDatasetsByDatasetIdPath = z.object({ /** * Dataset updated successfully */ -export const zPatchDatasetsByDatasetIdResponse = zDatasetDetail +export const zPatchDatasetsByDatasetIdResponse = zDatasetDetailWithPartialMembersResponse export const zPostDatasetsByDatasetIdApiKeysByStatusPath = z.object({ dataset_id: z.string(), @@ -1093,7 +1440,7 @@ export const zGetDatasetsByDatasetIdAutoDisableLogsPath = z.object({ /** * Auto disable logs retrieved successfully */ -export const zGetDatasetsByDatasetIdAutoDisableLogsResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsByDatasetIdAutoDisableLogsResponse = zAutoDisableLogsResponse export const zGetDatasetsByDatasetIdBatchByBatchIndexingEstimatePath = z.object({ batch: z.string(), @@ -1570,7 +1917,7 @@ export const zGetDatasetsByDatasetIdErrorDocsPath = z.object({ /** * Error documents retrieved successfully */ -export const zGetDatasetsByDatasetIdErrorDocsResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsByDatasetIdErrorDocsResponse = zErrorDocsResponse export const zPostDatasetsByDatasetIdExternalHitTestingBody = zExternalHitTestingPayload @@ -1601,7 +1948,7 @@ export const zGetDatasetsByDatasetIdIndexingStatusPath = z.object({ /** * Indexing status retrieved successfully */ -export const zGetDatasetsByDatasetIdIndexingStatusResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsByDatasetIdIndexingStatusResponse = zDocumentStatusListResponse export const zGetDatasetsByDatasetIdMetadataPath = z.object({ dataset_id: z.string(), @@ -1677,7 +2024,7 @@ export const zGetDatasetsByDatasetIdPermissionPartUsersPath = z.object({ /** * Permission users retrieved successfully */ -export const zGetDatasetsByDatasetIdPermissionPartUsersResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsByDatasetIdPermissionPartUsersResponse = zPartialMemberListResponse export const zGetDatasetsByDatasetIdQueriesPath = z.object({ dataset_id: z.string(), @@ -1686,7 +2033,7 @@ export const zGetDatasetsByDatasetIdQueriesPath = z.object({ /** * Query history retrieved successfully */ -export const zGetDatasetsByDatasetIdQueriesResponse = zDatasetQueryDetail +export const zGetDatasetsByDatasetIdQueriesResponse = zDatasetQueryListResponse export const zGetDatasetsByDatasetIdRelatedAppsPath = z.object({ dataset_id: z.string(), @@ -1695,7 +2042,7 @@ export const zGetDatasetsByDatasetIdRelatedAppsPath = z.object({ /** * Related apps retrieved successfully */ -export const zGetDatasetsByDatasetIdRelatedAppsResponse = zRelatedAppList +export const zGetDatasetsByDatasetIdRelatedAppsResponse = zRelatedAppListResponse export const zPostDatasetsByDatasetIdRetryBody = zDocumentRetryPayload diff --git a/packages/contracts/generated/api/service/orpc.gen.ts b/packages/contracts/generated/api/service/orpc.gen.ts index 33d2c47361..43b2d4402b 100644 --- a/packages/contracts/generated/api/service/orpc.gen.ts +++ b/packages/contracts/generated/api/service/orpc.gen.ts @@ -57,6 +57,7 @@ import { zGetDatasetsByDatasetIdResponse, zGetDatasetsByDatasetIdTagsPath, zGetDatasetsByDatasetIdTagsResponse, + zGetDatasetsQuery, zGetDatasetsResponse, zGetDatasetsTagsResponse, zGetEndUsersByEndUserIdPath, @@ -793,16 +794,10 @@ export const delete3 = oc * Get all knowledge type tags * * Get all knowledge type tags - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get7 = oc .route({ - deprecated: true, - description: - 'Get all knowledge type tags\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get all knowledge type tags', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsTags', @@ -814,16 +809,10 @@ export const get7 = oc /** * Update a knowledge type tag - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const patch = oc .route({ - deprecated: true, - description: - 'Update a knowledge type tag\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Update a knowledge type tag', inputStructure: 'detailed', method: 'PATCH', operationId: 'patchDatasetsTags', @@ -837,16 +826,10 @@ export const patch = oc * Add a knowledge type tag * * Add a knowledge type tag - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const post12 = oc .route({ - deprecated: true, - description: - 'Add a knowledge type tag\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Add a knowledge type tag', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsTags', @@ -1894,16 +1877,10 @@ export const retrieve = { * Get all knowledge type tags * * Get tags bound to a specific dataset - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get18 = oc .route({ - deprecated: true, - description: - 'Get tags bound to a specific dataset\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get tags bound to a specific dataset', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdTags', @@ -1951,16 +1928,10 @@ export const delete8 = oc /** * Get a specific dataset by ID - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get19 = oc .route({ - deprecated: true, - description: - 'Get a specific dataset by ID\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get a specific dataset by ID', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetId', @@ -2008,16 +1979,10 @@ export const byDatasetId = { * Resource for getting datasets * * List all datasets - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get20 = oc .route({ - deprecated: true, - description: - 'List all datasets\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'List all datasets', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasets', @@ -2025,6 +1990,7 @@ export const get20 = oc summary: 'Resource for getting datasets', tags: ['service_api'], }) + .input(z.object({ query: zGetDatasetsQuery.optional() })) .output(zGetDatasetsResponse) /** diff --git a/packages/contracts/generated/api/service/types.gen.ts b/packages/contracts/generated/api/service/types.gen.ts index 101be40c8c..88145a0a6e 100644 --- a/packages/contracts/generated/api/service/types.gen.ts +++ b/packages/contracts/generated/api/service/types.gen.ts @@ -141,11 +141,14 @@ export type ConversationVariablesQuery = { variable_name?: string | null } -export type DataSetTag = { - binding_count?: string | null +export type DatasetBoundTagListResponse = { + data: Array + total: number +} + +export type DatasetBoundTagResponse = { id: string name: string - type: string } export type DatasetCreatePayload = { @@ -164,6 +167,113 @@ export type DatasetCreatePayload = { } | null } +export type DatasetDetailResponse = { + app_count: number + author_name: string | null + built_in_field_enabled: boolean + chunk_structure: string | null + created_at: number + created_by: string + data_source_type: string | null + description: string | null + doc_form: string | null + doc_metadata: Array + document_count: number + embedding_available?: boolean | null + embedding_model: string | null + embedding_model_provider: string | null + enable_api: boolean + external_knowledge_info: DatasetExternalKnowledgeInfoResponse + external_retrieval_model: DatasetExternalRetrievalModelResponse + icon_info: DatasetIconInfoResponse + id: string + indexing_technique: string | null + is_multimodal: boolean + is_published: boolean + name: string + permission: string + pipeline_id: string | null + provider: string + retrieval_model_dict: DatasetRetrievalModelResponse + runtime_mode: string | null + summary_index_setting: DatasetSummaryIndexSettingResponse + tags: Array + total_available_documents: number + total_documents: number + updated_at: number + updated_by: string | null + word_count: number +} + +export type DatasetDetailWithPartialMembersResponse = { + app_count: number + author_name: string | null + built_in_field_enabled: boolean + chunk_structure: string | null + created_at: number + created_by: string + data_source_type: string | null + description: string | null + doc_form: string | null + doc_metadata: Array + document_count: number + embedding_available?: boolean | null + embedding_model: string | null + embedding_model_provider: string | null + enable_api: boolean + external_knowledge_info: DatasetExternalKnowledgeInfoResponse + external_retrieval_model: DatasetExternalRetrievalModelResponse + icon_info: DatasetIconInfoResponse + id: string + indexing_technique: string | null + is_multimodal: boolean + is_published: boolean + name: string + partial_member_list?: Array | null + permission: string + pipeline_id: string | null + provider: string + retrieval_model_dict: DatasetRetrievalModelResponse + runtime_mode: string | null + summary_index_setting: DatasetSummaryIndexSettingResponse + tags: Array + total_available_documents: number + total_documents: number + updated_at: number + updated_by: string | null + word_count: number +} + +export type DatasetDocMetadataResponse = { + id: string + name: string + type: string +} + +export type DatasetExternalKnowledgeInfoResponse = { + external_knowledge_api_endpoint: string + external_knowledge_api_id: string + external_knowledge_api_name: string + external_knowledge_id: string +} + +export type DatasetExternalRetrievalModelResponse = { + score_threshold: number + score_threshold_enabled?: boolean | null + top_k: number +} + +export type DatasetIconInfoResponse = { + icon: string | null + icon_background?: string | null + icon_type: string | null + icon_url?: string | null +} + +export type DatasetKeywordSettingResponse = { + keyword_weight: number +} + export type DatasetListQuery = { include_all?: boolean keyword?: string | null @@ -172,6 +282,14 @@ export type DatasetListQuery = { tag_ids?: Array } +export type DatasetListResponse = { + data: Array + has_more: boolean + limit: number + page: number + total: number +} + export type DatasetMetadataActionResponse = { result: string } @@ -205,6 +323,35 @@ export type DatasetMetadataResponse = { export type DatasetPermissionEnum = 'all_team_members' | 'only_me' | 'partial_members' +export type DatasetRerankingModelResponse = { + reranking_model_name?: string | null + reranking_provider_name?: string | null +} + +export type DatasetRetrievalModelResponse = { + reranking_enable: boolean + reranking_mode?: string | null + reranking_model: DatasetRerankingModelResponse + score_threshold?: number | null + score_threshold_enabled: boolean + search_method: string + top_k: number + weights?: DatasetWeightedScoreResponse +} + +export type DatasetSummaryIndexSettingResponse = { + enable?: boolean | null + model_name?: string | null + model_provider_name?: string | null + summary_prompt?: string | null +} + +export type DatasetTagResponse = { + id: string + name: string + type: string +} + export type DatasetUpdatePayload = { description?: string | null embedding_model?: string | null @@ -223,6 +370,18 @@ export type DatasetUpdatePayload = { retrieval_model?: RetrievalModel } +export type DatasetVectorSettingResponse = { + embedding_model_name: string + embedding_provider_name: string + vector_weight: number +} + +export type DatasetWeightedScoreResponse = { + keyword_setting: DatasetKeywordSettingResponse + vector_setting: DatasetVectorSettingResponse + weight_type: string | null +} + export type DatasourceNodeRunPayload = { credential_id?: string | null datasource_type: string @@ -334,6 +493,15 @@ export type IndexInfoResponse = { export type JsonValue = unknown +export type KnowledgeTagListResponse = Array + +export type KnowledgeTagResponse = { + binding_count?: string | null + id: string + name: string + type: string +} + export type MessageFeedbackPayload = { content?: string | null rating?: 'dislike' | 'like' | null @@ -1155,7 +1323,13 @@ export type PutConversationsByCIdVariablesByVariableIdResponse export type GetDatasetsData = { body?: never path?: never - query?: never + query?: { + include_all?: boolean + keyword?: string + limit?: number + page?: number + tag_ids?: Array + } url: '/datasets' } @@ -1168,9 +1342,7 @@ export type GetDatasetsErrors = { export type GetDatasetsError = GetDatasetsErrors[keyof GetDatasetsErrors] export type GetDatasetsResponses = { - 200: { - [key: string]: unknown - } + 200: DatasetListResponse } export type GetDatasetsResponse = GetDatasetsResponses[keyof GetDatasetsResponses] @@ -1194,9 +1366,7 @@ export type PostDatasetsErrors = { export type PostDatasetsError = PostDatasetsErrors[keyof PostDatasetsErrors] export type PostDatasetsResponses = { - 200: { - [key: string]: unknown - } + 200: DatasetDetailResponse } export type PostDatasetsResponse = PostDatasetsResponses[keyof PostDatasetsResponses] @@ -1278,9 +1448,7 @@ export type GetDatasetsTagsErrors = { export type GetDatasetsTagsError = GetDatasetsTagsErrors[keyof GetDatasetsTagsErrors] export type GetDatasetsTagsResponses = { - 200: { - [key: string]: unknown - } + 200: KnowledgeTagListResponse } export type GetDatasetsTagsResponse = GetDatasetsTagsResponses[keyof GetDatasetsTagsResponses] @@ -1304,9 +1472,7 @@ export type PatchDatasetsTagsErrors = { export type PatchDatasetsTagsError = PatchDatasetsTagsErrors[keyof PatchDatasetsTagsErrors] export type PatchDatasetsTagsResponses = { - 200: { - [key: string]: unknown - } + 200: KnowledgeTagResponse } export type PatchDatasetsTagsResponse = PatchDatasetsTagsResponses[keyof PatchDatasetsTagsResponses] @@ -1330,9 +1496,7 @@ export type PostDatasetsTagsErrors = { export type PostDatasetsTagsError = PostDatasetsTagsErrors[keyof PostDatasetsTagsErrors] export type PostDatasetsTagsResponses = { - 200: { - [key: string]: unknown - } + 200: KnowledgeTagResponse } export type PostDatasetsTagsResponse = PostDatasetsTagsResponses[keyof PostDatasetsTagsResponses] @@ -1451,9 +1615,7 @@ export type GetDatasetsByDatasetIdError = GetDatasetsByDatasetIdErrors[keyof GetDatasetsByDatasetIdErrors] export type GetDatasetsByDatasetIdResponses = { - 200: { - [key: string]: unknown - } + 200: DatasetDetailWithPartialMembersResponse } export type GetDatasetsByDatasetIdResponse @@ -1484,9 +1646,7 @@ export type PatchDatasetsByDatasetIdError = PatchDatasetsByDatasetIdErrors[keyof PatchDatasetsByDatasetIdErrors] export type PatchDatasetsByDatasetIdResponses = { - 200: { - [key: string]: unknown - } + 200: DatasetDetailWithPartialMembersResponse } export type PatchDatasetsByDatasetIdResponse @@ -2661,9 +2821,7 @@ export type GetDatasetsByDatasetIdTagsError = GetDatasetsByDatasetIdTagsErrors[keyof GetDatasetsByDatasetIdTagsErrors] export type GetDatasetsByDatasetIdTagsResponses = { - 200: { - [key: string]: unknown - } + 200: DatasetBoundTagListResponse } export type GetDatasetsByDatasetIdTagsResponse diff --git a/packages/contracts/generated/api/service/zod.gen.ts b/packages/contracts/generated/api/service/zod.gen.ts index 3bdaea3976..d183697811 100644 --- a/packages/contracts/generated/api/service/zod.gen.ts +++ b/packages/contracts/generated/api/service/zod.gen.ts @@ -189,15 +189,66 @@ export const zConversationVariablesQuery = z.object({ }) /** - * DataSetTag + * DatasetBoundTagResponse */ -export const zDataSetTag = z.object({ - binding_count: z.string().nullish(), +export const zDatasetBoundTagResponse = z.object({ + id: z.string(), + name: z.string(), +}) + +/** + * DatasetBoundTagListResponse + */ +export const zDatasetBoundTagListResponse = z.object({ + data: z.array(zDatasetBoundTagResponse), + total: z.int(), +}) + +/** + * DatasetDocMetadataResponse + */ +export const zDatasetDocMetadataResponse = z.object({ id: z.string(), name: z.string(), type: z.string(), }) +/** + * DatasetExternalKnowledgeInfoResponse + */ +export const zDatasetExternalKnowledgeInfoResponse = z.object({ + external_knowledge_api_endpoint: z.string(), + external_knowledge_api_id: z.string(), + external_knowledge_api_name: z.string(), + external_knowledge_id: z.string(), +}) + +/** + * DatasetExternalRetrievalModelResponse + */ +export const zDatasetExternalRetrievalModelResponse = z.object({ + score_threshold: z.number(), + score_threshold_enabled: z.boolean().nullish(), + top_k: z.int(), +}) + +/** + * DatasetIconInfoResponse + */ +export const zDatasetIconInfoResponse = z.object({ + icon: z.string().nullable(), + icon_background: z.string().nullish(), + icon_type: z.string().nullable(), + icon_url: z.string().nullish(), +}) + +/** + * DatasetKeywordSettingResponse + */ +export const zDatasetKeywordSettingResponse = z.object({ + keyword_weight: z.number(), +}) + /** * DatasetListQuery */ @@ -263,6 +314,159 @@ export const zDatasetMetadataResponse = z.object({ */ export const zDatasetPermissionEnum = z.enum(['all_team_members', 'only_me', 'partial_members']) +/** + * DatasetRerankingModelResponse + */ +export const zDatasetRerankingModelResponse = z.object({ + reranking_model_name: z.string().nullish(), + reranking_provider_name: z.string().nullish(), +}) + +/** + * DatasetSummaryIndexSettingResponse + */ +export const zDatasetSummaryIndexSettingResponse = z.object({ + enable: z.boolean().nullish(), + model_name: z.string().nullish(), + model_provider_name: z.string().nullish(), + summary_prompt: z.string().nullish(), +}) + +/** + * DatasetTagResponse + */ +export const zDatasetTagResponse = z.object({ + id: z.string(), + name: z.string(), + type: z.string(), +}) + +/** + * DatasetVectorSettingResponse + */ +export const zDatasetVectorSettingResponse = z.object({ + embedding_model_name: z.string(), + embedding_provider_name: z.string(), + vector_weight: z.number(), +}) + +/** + * DatasetWeightedScoreResponse + */ +export const zDatasetWeightedScoreResponse = z.object({ + keyword_setting: zDatasetKeywordSettingResponse, + vector_setting: zDatasetVectorSettingResponse, + weight_type: z.string().nullable(), +}) + +/** + * DatasetRetrievalModelResponse + */ +export const zDatasetRetrievalModelResponse = z.object({ + reranking_enable: z.boolean(), + reranking_mode: z.string().nullish(), + reranking_model: zDatasetRerankingModelResponse, + score_threshold: z.number().nullish(), + score_threshold_enabled: z.boolean(), + search_method: z.string(), + top_k: z.int(), + weights: zDatasetWeightedScoreResponse.optional(), +}) + +/** + * DatasetDetailResponse + */ +export const zDatasetDetailResponse = z.object({ + app_count: z.int(), + author_name: z.string().nullable(), + built_in_field_enabled: z.boolean(), + chunk_structure: z.string().nullable(), + created_at: z.int(), + created_by: z.string(), + data_source_type: z.string().nullable(), + description: z.string().nullable(), + doc_form: z.string().nullable(), + doc_metadata: z.array(zDatasetDocMetadataResponse), + document_count: z.int(), + embedding_available: z.boolean().nullish(), + embedding_model: z.string().nullable(), + embedding_model_provider: z.string().nullable(), + enable_api: z.boolean(), + external_knowledge_info: zDatasetExternalKnowledgeInfoResponse, + external_retrieval_model: zDatasetExternalRetrievalModelResponse, + icon_info: zDatasetIconInfoResponse, + id: z.string(), + indexing_technique: z.string().nullable(), + is_multimodal: z.boolean(), + is_published: z.boolean(), + name: z.string(), + permission: z.string(), + pipeline_id: z.string().nullable(), + provider: z.string(), + retrieval_model_dict: zDatasetRetrievalModelResponse, + runtime_mode: z.string().nullable(), + summary_index_setting: zDatasetSummaryIndexSettingResponse, + tags: z.array(zDatasetTagResponse), + total_available_documents: z.int(), + total_documents: z.int(), + updated_at: z.int(), + updated_by: z.string().nullable(), + word_count: z.int(), +}) + +/** + * DatasetDetailWithPartialMembersResponse + */ +export const zDatasetDetailWithPartialMembersResponse = z.object({ + app_count: z.int(), + author_name: z.string().nullable(), + built_in_field_enabled: z.boolean(), + chunk_structure: z.string().nullable(), + created_at: z.int(), + created_by: z.string(), + data_source_type: z.string().nullable(), + description: z.string().nullable(), + doc_form: z.string().nullable(), + doc_metadata: z.array(zDatasetDocMetadataResponse), + document_count: z.int(), + embedding_available: z.boolean().nullish(), + embedding_model: z.string().nullable(), + embedding_model_provider: z.string().nullable(), + enable_api: z.boolean(), + external_knowledge_info: zDatasetExternalKnowledgeInfoResponse, + external_retrieval_model: zDatasetExternalRetrievalModelResponse, + icon_info: zDatasetIconInfoResponse, + id: z.string(), + indexing_technique: z.string().nullable(), + is_multimodal: z.boolean(), + is_published: z.boolean(), + name: z.string(), + partial_member_list: z.array(z.string()).nullish(), + permission: z.string(), + pipeline_id: z.string().nullable(), + provider: z.string(), + retrieval_model_dict: zDatasetRetrievalModelResponse, + runtime_mode: z.string().nullable(), + summary_index_setting: zDatasetSummaryIndexSettingResponse, + tags: z.array(zDatasetTagResponse), + total_available_documents: z.int(), + total_documents: z.int(), + updated_at: z.int(), + updated_by: z.string().nullable(), + word_count: z.int(), +}) + +/** + * DatasetListResponse + */ +export const zDatasetListResponse = z.object({ + data: z.array(zDatasetDetailResponse), + has_more: z.boolean(), + limit: z.int(), + page: z.int(), + total: z.int(), +}) + /** * DatasourceNodeRunPayload */ @@ -368,6 +572,21 @@ export const zHumanInputFormSubmitPayload = z.object({ inputs: z.record(z.string(), zJsonValue), }) +/** + * KnowledgeTagResponse + */ +export const zKnowledgeTagResponse = z.object({ + binding_count: z.string().nullish(), + id: z.string(), + name: z.string(), + type: z.string(), +}) + +/** + * KnowledgeTagListResponse + */ +export const zKnowledgeTagListResponse = z.array(zKnowledgeTagResponse) + /** * MessageFeedbackPayload */ @@ -1039,17 +1258,25 @@ export const zPutConversationsByCIdVariablesByVariableIdPath = z.object({ */ export const zPutConversationsByCIdVariablesByVariableIdResponse = zConversationVariableResponse +export const zGetDatasetsQuery = z.object({ + include_all: z.boolean().optional().default(false), + keyword: z.string().optional(), + limit: z.int().optional().default(20), + page: z.int().optional().default(1), + tag_ids: z.array(z.string()).optional(), +}) + /** * Datasets retrieved successfully */ -export const zGetDatasetsResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsResponse = zDatasetListResponse export const zPostDatasetsBody = zDatasetCreatePayload /** * Dataset created successfully */ -export const zPostDatasetsResponse = z.record(z.string(), z.unknown()) +export const zPostDatasetsResponse = zDatasetDetailResponse /** * File uploaded successfully @@ -1066,21 +1293,21 @@ export const zDeleteDatasetsTagsResponse = z.record(z.string(), z.never()) /** * Tags retrieved successfully */ -export const zGetDatasetsTagsResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsTagsResponse = zKnowledgeTagListResponse export const zPatchDatasetsTagsBody = zTagUpdatePayload /** * Tag updated successfully */ -export const zPatchDatasetsTagsResponse = z.record(z.string(), z.unknown()) +export const zPatchDatasetsTagsResponse = zKnowledgeTagResponse export const zPostDatasetsTagsBody = zTagCreatePayload /** * Tag created successfully */ -export const zPostDatasetsTagsResponse = z.record(z.string(), z.unknown()) +export const zPostDatasetsTagsResponse = zKnowledgeTagResponse export const zPostDatasetsTagsBindingBody = zTagBindingPayload @@ -1112,7 +1339,7 @@ export const zGetDatasetsByDatasetIdPath = z.object({ /** * Dataset retrieved successfully */ -export const zGetDatasetsByDatasetIdResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsByDatasetIdResponse = zDatasetDetailWithPartialMembersResponse export const zPatchDatasetsByDatasetIdBody = zDatasetUpdatePayload @@ -1123,7 +1350,7 @@ export const zPatchDatasetsByDatasetIdPath = z.object({ /** * Dataset updated successfully */ -export const zPatchDatasetsByDatasetIdResponse = z.record(z.string(), z.unknown()) +export const zPatchDatasetsByDatasetIdResponse = zDatasetDetailWithPartialMembersResponse export const zPostDatasetsByDatasetIdDocumentCreateByFilePath = z.object({ dataset_id: z.string(), @@ -1616,7 +1843,7 @@ export const zGetDatasetsByDatasetIdTagsPath = z.object({ /** * Tags retrieved successfully */ -export const zGetDatasetsByDatasetIdTagsResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsByDatasetIdTagsResponse = zDatasetBoundTagListResponse export const zGetEndUsersByEndUserIdPath = z.object({ end_user_id: z.string(),