diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index bbfbe00a67..5b1588e2dc 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -9,7 +9,7 @@ from uuid import UUID import sqlalchemy as sa from flask import request, send_file -from flask_restx import Resource, marshal +from flask_restx import Resource from pydantic import BaseModel, Field, field_validator from sqlalchemy import asc, desc, func, select from werkzeug.exceptions import Forbidden, NotFound @@ -34,14 +34,16 @@ from core.rag.index_processor.constant.index_type import IndexTechniqueType from extensions.ext_database import db from fields.base import ResponseModel from fields.document_fields import ( - document_fields, - document_status_fields, - document_with_segments_fields, + DocumentMetadataResponse, + DocumentResponse, + DocumentStatusListResponse, + DocumentStatusResponse, + normalize_enum, ) from graphon.model_runtime.entities.model_entities import ModelType from graphon.model_runtime.errors.invoke import InvokeAuthorizationError from libs.datetime_utils import naive_utc_now -from libs.helper import to_timestamp +from libs.helper import dump_response, to_timestamp from libs.login import current_account_with_tenant, login_required from models import DatasetProcessRule, Document, DocumentSegment, UploadFile from models.dataset import DocumentPipelineExecutionLog @@ -74,12 +76,6 @@ from ..wraps import ( logger = logging.getLogger(__name__) -def _normalize_enum(value: Any) -> Any: - if isinstance(value, str) or value is None: - return value - return getattr(value, "value", value) - - class DatasetResponse(ResponseModel): id: str name: str @@ -93,7 +89,7 @@ class DatasetResponse(ResponseModel): @field_validator("data_source_type", "indexing_technique", mode="before") @classmethod def _normalize_enum_fields(cls, value: Any) -> Any: - return _normalize_enum(value) + return normalize_enum(value) @field_validator("created_at", mode="before") @classmethod @@ -101,61 +97,10 @@ class DatasetResponse(ResponseModel): return to_timestamp(value) -class DocumentMetadataResponse(ResponseModel): - id: str - name: str - type: str - value: str | None = None - - -class DocumentResponse(ResponseModel): - id: str - position: int | None = None - data_source_type: str | None = None - data_source_info: Any = Field(default=None, validation_alias="data_source_info_dict") - data_source_detail_dict: Any = None - dataset_process_rule_id: str | None = None - name: str - created_from: str | None = None - created_by: str | None = None - created_at: int | None = None - tokens: int | None = None - indexing_status: str | None = None - error: str | None = None - enabled: bool | None = None - disabled_at: int | None = None - disabled_by: str | None = None - archived: bool | None = None - display_status: str | None = None - word_count: int | None = None - hit_count: int | None = None - doc_form: str | None = None - doc_metadata: list[DocumentMetadataResponse] = Field(default_factory=list, validation_alias="doc_metadata_details") - summary_index_status: str | None = None - need_summary: bool | None = None - - @field_validator("data_source_type", "indexing_status", "display_status", "doc_form", mode="before") - @classmethod - def _normalize_enum_fields(cls, value: Any) -> Any: - return _normalize_enum(value) - - @field_validator("doc_metadata", mode="before") - @classmethod - def _normalize_doc_metadata(cls, value: Any) -> list[Any]: - if value is None: - return [] - return value - - @field_validator("created_at", "disabled_at", mode="before") - @classmethod - def _normalize_timestamp(cls, value: datetime | int | None) -> int | None: - return to_timestamp(value) - - class DocumentWithSegmentsResponse(DocumentResponse): process_rule_dict: Any = None - completed_segments: int | None = None - total_segments: int | None = None + completed_segments: int | None = Field(default=None, exclude_if=lambda value: value is None) + total_segments: int | None = Field(default=None, exclude_if=lambda value: value is None) class DatasetAndDocumentResponse(ResponseModel): @@ -190,6 +135,14 @@ class DocumentDatasetListParam(BaseModel): fetch_val: str = Field("false", alias="fetch") +class DocumentWithSegmentsListResponse(ResponseModel): + data: list[DocumentWithSegmentsResponse] + has_more: bool + limit: int + total: int + page: int + + register_schema_models( console_ns, KnowledgeConfig, @@ -200,13 +153,19 @@ register_schema_models( GenerateSummaryPayload, DocumentMetadataUpdatePayload, DocumentBatchDownloadZipPayload, +) +register_response_schema_models( + console_ns, + SimpleResultMessageResponse, + SimpleResultResponse, + UrlResponse, DatasetResponse, DocumentMetadataResponse, DocumentResponse, DocumentWithSegmentsResponse, DatasetAndDocumentResponse, + DocumentWithSegmentsListResponse, ) -register_response_schema_models(console_ns, SimpleResultMessageResponse, SimpleResultResponse, UrlResponse) class DocumentResource(Resource): @@ -312,7 +271,11 @@ class DatasetDocumentListApi(Resource): "status": "Filter documents by display status", } ) - @console_ns.response(200, "Documents retrieved successfully") + @console_ns.response( + 200, + "Documents retrieved successfully", + console_ns.models[DocumentWithSegmentsListResponse.__name__], + ) @setup_required @login_required @account_initialization_required @@ -425,18 +388,15 @@ class DatasetDocumentListApi(Resource): ) document.completed_segments = completed_segments document.total_segments = total_segments - data = marshal(documents, document_with_segments_fields) - else: - data = marshal(documents, document_fields) response = { - "data": data, + "data": documents, "has_more": len(documents) == limit, "limit": limit, "total": paginated_documents.total, "page": page, } - return response + return dump_response(DocumentWithSegmentsListResponse, response) @setup_required @login_required @@ -482,9 +442,7 @@ class DatasetDocumentListApi(Resource): except ModelCurrentlyNotSupportError: raise ProviderModelCurrentlyNotSupportError() - return DatasetAndDocumentResponse.model_validate( - {"dataset": dataset, "documents": documents, "batch": batch}, from_attributes=True - ).model_dump(mode="json") + return dump_response(DatasetAndDocumentResponse, {"dataset": dataset, "documents": documents, "batch": batch}) @setup_required @login_required @@ -567,9 +525,7 @@ class DatasetInitApi(Resource): except ModelCurrentlyNotSupportError: raise ProviderModelCurrentlyNotSupportError() - return DatasetAndDocumentResponse.model_validate( - {"dataset": dataset, "documents": documents, "batch": batch}, from_attributes=True - ).model_dump(mode="json") + return dump_response(DatasetAndDocumentResponse, {"dataset": dataset, "documents": documents, "batch": batch}) @console_ns.route("/datasets//documents//indexing-estimate") @@ -742,6 +698,9 @@ class DocumentBatchIndexingEstimateApi(DocumentResource): @console_ns.route("/datasets//batch//indexing-status") class DocumentBatchIndexingStatusApi(DocumentResource): + @console_ns.response( + 200, "Indexing status retrieved successfully", console_ns.models[DocumentStatusListResponse.__name__] + ) @setup_required @login_required @account_initialization_required @@ -784,9 +743,8 @@ class DocumentBatchIndexingStatusApi(DocumentResource): "completed_segments": completed_segments, "total_segments": total_segments, } - documents_status.append(marshal(document_dict, document_status_fields)) - data = {"data": documents_status} - return data + documents_status.append(document_dict) + return dump_response(DocumentStatusListResponse, {"data": documents_status}) @console_ns.route("/datasets//documents//indexing-status") @@ -794,7 +752,9 @@ class DocumentIndexingStatusApi(DocumentResource): @console_ns.doc("get_document_indexing_status") @console_ns.doc(description="Get document indexing status") @console_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) - @console_ns.response(200, "Indexing status retrieved successfully") + @console_ns.response( + 200, "Indexing status retrieved successfully", console_ns.models[DocumentStatusResponse.__name__] + ) @console_ns.response(404, "Document not found") @setup_required @login_required @@ -839,7 +799,7 @@ class DocumentIndexingStatusApi(DocumentResource): "completed_segments": completed_segments, "total_segments": total_segments, } - return marshal(document_dict, document_status_fields) + return dump_response(DocumentStatusResponse, document_dict) @console_ns.route("/datasets//documents/") @@ -1304,7 +1264,7 @@ class DocumentRenameApi(DocumentResource): except services.errors.document.DocumentIndexingError: raise DocumentIndexingError("Cannot delete document during indexing.") - return DocumentResponse.model_validate(document, from_attributes=True).model_dump(mode="json") + return dump_response(DocumentResponse, document) @console_ns.route("/datasets//documents//website-sync") diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index c1d1e1f0a0..d1b81e8162 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -12,7 +12,6 @@ from typing import Self from uuid import UUID from flask import request, send_file -from flask_restx import marshal from pydantic import BaseModel, Field, field_validator, model_validator from sqlalchemy import desc, func, select from werkzeug.exceptions import Forbidden, NotFound @@ -27,7 +26,12 @@ from controllers.common.errors import ( UnsupportedFileTypeError, ) from controllers.common.fields import UrlResponse -from controllers.common.schema import register_enum_models, register_response_schema_models, register_schema_models +from controllers.common.schema import ( + query_params_from_model, + register_enum_models, + register_response_schema_models, + register_schema_models, +) from controllers.service_api import service_api_ns from controllers.service_api.app.error import ProviderNotInitializeError from controllers.service_api.dataset.error import ( @@ -44,7 +48,13 @@ from core.errors.error import ProviderTokenNotInitError from core.rag.entities import PreProcessingRule, Rule, Segmentation from core.rag.retrieval.retrieval_methods import RetrievalMethod from extensions.ext_database import db -from fields.document_fields import document_fields, document_status_fields +from fields.base import ResponseModel +from fields.document_fields import ( + DocumentListResponse, + DocumentResponse, + DocumentStatusListResponse, +) +from libs.helper import dump_response from libs.login import current_user from models.dataset import Dataset, Document, DocumentSegment from models.enums import SegmentStatus @@ -107,6 +117,44 @@ class DocumentListQuery(BaseModel): status: str | None = Field(default=None, description="Document status filter") +DOCUMENT_CREATE_BY_FILE_PARAMS = { + "dataset_id": "Dataset ID", + "file": { + "in": "formData", + "type": "file", + "required": True, + "description": "Document file to upload.", + }, + "data": { + "in": "formData", + "type": "string", + "required": False, + "description": "Optional JSON string with document creation settings.", + }, +} +DOCUMENT_UPDATE_BY_FILE_PARAMS = { + "dataset_id": "Dataset ID", + "document_id": "Document ID", + "file": { + "in": "formData", + "type": "file", + "required": False, + "description": "Replacement document file.", + }, + "data": { + "in": "formData", + "type": "string", + "required": False, + "description": "Optional JSON string with document update settings.", + }, +} + + +class DocumentAndBatchResponse(ResponseModel): + document: DocumentResponse + batch: str + + register_enum_models(service_api_ns, RetrievalMethod) register_schema_models( @@ -121,7 +169,14 @@ register_schema_models( PreProcessingRule, Segmentation, ) -register_response_schema_models(service_api_ns, UrlResponse) +register_response_schema_models( + service_api_ns, + UrlResponse, + DocumentResponse, + DocumentAndBatchResponse, + DocumentListResponse, + DocumentStatusListResponse, +) def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[str, object], int]: @@ -188,8 +243,7 @@ def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[ raise ProviderNotInitializeError(ex.description) document = documents[0] - documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch} - return documents_and_batch_fields, 200 + return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200 def _update_document_by_text(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]: @@ -248,8 +302,7 @@ def _update_document_by_text(tenant_id: str, dataset_id: UUID, document_id: UUID raise ProviderNotInitializeError(ex.description) document = documents[0] - documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch} - return documents_and_batch_fields, 200 + return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200 @service_api_ns.route("/datasets//document/create-by-text") @@ -267,6 +320,9 @@ class DocumentAddByTextApi(DatasetApiResource): 400: "Bad request - invalid parameters", } ) + @service_api_ns.response( + 200, "Document created successfully", service_api_ns.models[DocumentAndBatchResponse.__name__] + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_resource_check("documents", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") @@ -296,6 +352,9 @@ class DeprecatedDocumentAddByTextApi(DatasetApiResource): 400: "Bad request - invalid parameters", } ) + @service_api_ns.response( + 200, "Document created successfully", service_api_ns.models[DocumentAndBatchResponse.__name__] + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_resource_check("documents", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") @@ -319,6 +378,9 @@ class DocumentUpdateByTextApi(DatasetApiResource): 404: "Document not found", } ) + @service_api_ns.response( + 200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__] + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID): @@ -347,6 +409,9 @@ class DeprecatedDocumentUpdateByTextApi(DatasetApiResource): 404: "Document not found", } ) + @service_api_ns.response( + 200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__] + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID): @@ -363,7 +428,7 @@ class DocumentAddByFileApi(DatasetApiResource): @service_api_ns.doc("create_document_by_file") @service_api_ns.doc(description="Create a new document by uploading a file") - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_CREATE_BY_FILE_PARAMS) @service_api_ns.doc( responses={ 200: "Document created successfully", @@ -371,6 +436,9 @@ class DocumentAddByFileApi(DatasetApiResource): 400: "Bad request - invalid file or parameters", } ) + @service_api_ns.response( + 200, "Document created successfully", service_api_ns.models[DocumentAndBatchResponse.__name__] + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_resource_check("documents", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") @@ -462,8 +530,7 @@ class DocumentAddByFileApi(DatasetApiResource): except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) document = documents[0] - documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch} - return documents_and_batch_fields, 200 + return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200 def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]: @@ -539,8 +606,7 @@ def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) document = documents[0] - documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": document.batch} - return documents_and_batch_fields, 200 + return dump_response(DocumentAndBatchResponse, {"document": document, "batch": document.batch}), 200 @service_api_ns.route( @@ -558,7 +624,7 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource): "Use PATCH /datasets/{dataset_id}/documents/{document_id} instead." ) ) - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_UPDATE_BY_FILE_PARAMS) @service_api_ns.doc( responses={ 200: "Document updated successfully", @@ -566,6 +632,9 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource): 404: "Document not found", } ) + @service_api_ns.response( + 200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__] + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID): @@ -577,7 +646,7 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource): class DocumentListApi(DatasetApiResource): @service_api_ns.doc("list_documents") @service_api_ns.doc(description="List all documents in a dataset") - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(params={"dataset_id": "Dataset ID", **query_params_from_model(DocumentListQuery)}) @service_api_ns.doc( responses={ 200: "Documents retrieved successfully", @@ -585,6 +654,9 @@ class DocumentListApi(DatasetApiResource): 404: "Dataset not found", } ) + @service_api_ns.response( + 200, "Documents retrieved successfully", service_api_ns.models[DocumentListResponse.__name__] + ) def get(self, tenant_id, dataset_id: UUID): dataset_id_str = str(dataset_id) tenant_id = str(tenant_id) @@ -618,14 +690,14 @@ class DocumentListApi(DatasetApiResource): ) response = { - "data": marshal(documents, document_fields), + "data": documents, "has_more": len(documents) == query_params.limit, "limit": query_params.limit, "total": paginated_documents.total, "page": query_params.page, } - return response + return dump_response(DocumentListResponse, response) @service_api_ns.route("/datasets//documents/download-zip") @@ -680,6 +752,11 @@ class DocumentIndexingStatusApi(DatasetApiResource): 404: "Dataset or documents not found", } ) + @service_api_ns.response( + 200, + "Indexing status retrieved successfully", + service_api_ns.models[DocumentStatusListResponse.__name__], + ) def get(self, tenant_id, dataset_id: UUID, batch: str): dataset_id_str = str(dataset_id) tenant_id = str(tenant_id) @@ -729,9 +806,8 @@ class DocumentIndexingStatusApi(DatasetApiResource): "completed_segments": completed_segments, "total_segments": total_segments, } - documents_status.append(marshal(document_dict, document_status_fields)) - data = {"data": documents_status} - return data + documents_status.append(document_dict) + return dump_response(DocumentStatusListResponse, {"data": documents_status}) @service_api_ns.route("/datasets//documents//download") @@ -890,7 +966,7 @@ class DocumentApi(DatasetApiResource): @service_api_ns.doc("update_document_by_file") @service_api_ns.doc(description="Update an existing document by uploading a file") - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_UPDATE_BY_FILE_PARAMS) @service_api_ns.doc( responses={ 200: "Document updated successfully", @@ -898,6 +974,9 @@ class DocumentApi(DatasetApiResource): 404: "Document not found", } ) + @service_api_ns.response( + 200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__] + ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def patch(self, tenant_id: str, dataset_id: UUID, document_id: UUID): diff --git a/api/dev/generate_swagger_markdown_docs.py b/api/dev/generate_swagger_markdown_docs.py index c87b25eeb0..75575b355b 100644 --- a/api/dev/generate_swagger_markdown_docs.py +++ b/api/dev/generate_swagger_markdown_docs.py @@ -103,7 +103,11 @@ def _replace_schema_table_type(markdown: str, definition_name: str, row_name: st lines[index] = "|".join(cells) break - return "\n".join(lines) + return "\n".join(lines) + ("\n" if markdown.endswith("\n") else "") + + +def _has_union_schema(schema: object) -> bool: + return isinstance(schema, dict) and (isinstance(schema.get("oneOf"), list) or isinstance(schema.get("anyOf"), list)) def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str: @@ -117,8 +121,20 @@ def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str: for definition_name, schema in definitions.items(): if not isinstance(definition_name, str) or not isinstance(schema, dict): continue - one_of = schema.get("oneOf") - if not isinstance(one_of, list): + + properties = schema.get("properties") + if isinstance(properties, dict): + for property_name, property_schema in properties.items(): + if isinstance(property_name, str) and _has_union_schema(property_schema): + markdown = _replace_schema_table_type( + markdown, + definition_name, + property_name, + _schema_markdown_type(property_schema), + ) + + union_variants = schema.get("oneOf") or schema.get("anyOf") + if not isinstance(union_variants, list): continue markdown = _replace_schema_table_type( @@ -128,7 +144,7 @@ def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str: _schema_markdown_type(schema), ) - for variant in one_of: + for variant in union_variants: variant_name = _definition_ref_name(variant) variant_schema = definitions.get(variant_name) if variant_name is not None else None if not isinstance(variant_name, str) or not isinstance(variant_schema, dict): @@ -150,7 +166,7 @@ def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str: def _convert_spec_to_markdown(spec_path: Path, markdown_path: Path) -> None: markdown_path.parent.mkdir(parents=True, exist_ok=True) - with tempfile.TemporaryDirectory(prefix=f"{markdown_path.stem}-", dir=markdown_path.parent) as temp_dir: + with tempfile.TemporaryDirectory(prefix=f"{markdown_path.stem}-") as temp_dir: temp_markdown_path = Path(temp_dir) / markdown_path.name result = subprocess.run( [ @@ -158,12 +174,13 @@ def _convert_spec_to_markdown(spec_path: Path, markdown_path: Path) -> None: "--yes", SWAGGER_MARKDOWN_PACKAGE, "-i", - str(spec_path), + str(spec_path.resolve()), "-o", - str(temp_markdown_path), + str(temp_markdown_path.resolve()), ], check=False, capture_output=True, + cwd=temp_dir, text=True, ) if result.returncode != 0: diff --git a/api/fields/document_fields.py b/api/fields/document_fields.py index 35a2a04f3e..a565d19ae6 100644 --- a/api/fields/document_fields.py +++ b/api/fields/document_fields.py @@ -1,95 +1,112 @@ -from flask_restx import fields +"""Response schemas for dataset document endpoints.""" -from fields.dataset_fields import dataset_fields -from libs.helper import TimestampField +from datetime import datetime +from typing import Any -document_metadata_fields = { - "id": fields.String, - "name": fields.String, - "type": fields.String, - "value": fields.String, -} +from pydantic import Field, field_validator -document_fields = { - "id": fields.String, - "position": fields.Integer, - "data_source_type": fields.String, - "data_source_info": fields.Raw(attribute="data_source_info_dict"), - "data_source_detail_dict": fields.Raw(attribute="data_source_detail_dict"), - "dataset_process_rule_id": fields.String, - "name": fields.String, - "created_from": fields.String, - "created_by": fields.String, - "created_at": TimestampField, - "tokens": fields.Integer, - "indexing_status": fields.String, - "error": fields.String, - "enabled": fields.Boolean, - "disabled_at": TimestampField, - "disabled_by": fields.String, - "archived": fields.Boolean, - "display_status": fields.String, - "word_count": fields.Integer, - "hit_count": fields.Integer, - "doc_form": fields.String, - "doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"), - # Summary index generation status: - # "SUMMARIZING" (when task is queued and generating) - "summary_index_status": fields.String, - # Whether this document needs summary index generation - "need_summary": fields.Boolean, -} +from fields.base import ResponseModel +from libs.helper import to_timestamp -document_with_segments_fields = { - "id": fields.String, - "position": fields.Integer, - "data_source_type": fields.String, - "data_source_info": fields.Raw(attribute="data_source_info_dict"), - "data_source_detail_dict": fields.Raw(attribute="data_source_detail_dict"), - "dataset_process_rule_id": fields.String, - "process_rule_dict": fields.Raw(attribute="process_rule_dict"), - "name": fields.String, - "created_from": fields.String, - "created_by": fields.String, - "created_at": TimestampField, - "tokens": fields.Integer, - "indexing_status": fields.String, - "error": fields.String, - "enabled": fields.Boolean, - "disabled_at": TimestampField, - "disabled_by": fields.String, - "archived": fields.Boolean, - "display_status": fields.String, - "word_count": fields.Integer, - "hit_count": fields.Integer, - "completed_segments": fields.Integer, - "total_segments": fields.Integer, - "doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"), - # Summary index generation status: - # "SUMMARIZING" (when task is queued and generating) - "summary_index_status": fields.String, - "need_summary": fields.Boolean, # Whether this document needs summary index generation -} -dataset_and_document_fields = { - "dataset": fields.Nested(dataset_fields), - "documents": fields.List(fields.Nested(document_fields)), - "batch": fields.String, -} +def normalize_enum(value: Any) -> Any: + if isinstance(value, str) or value is None: + return value + return getattr(value, "value", value) -document_status_fields = { - "id": fields.String, - "indexing_status": fields.String, - "processing_started_at": TimestampField, - "parsing_completed_at": TimestampField, - "cleaning_completed_at": TimestampField, - "splitting_completed_at": TimestampField, - "completed_at": TimestampField, - "paused_at": TimestampField, - "error": fields.String, - "stopped_at": TimestampField, - "completed_segments": fields.Integer, - "total_segments": fields.Integer, -} -document_status_fields_list = {"data": fields.List(fields.Nested(document_status_fields))} +class DocumentMetadataResponse(ResponseModel): + id: str + name: str + type: str + value: str | int | float | bool | None = None + + +class DocumentResponse(ResponseModel): + id: str + position: int | None = None + data_source_type: str | None = None + data_source_info: Any = Field(default=None, validation_alias="data_source_info_dict") + data_source_detail_dict: Any = None + dataset_process_rule_id: str | None = None + name: str + created_from: str | None = None + created_by: str | None = None + created_at: int | None = None + tokens: int | None = None + indexing_status: str | None = None + error: str | None = None + enabled: bool | None = None + disabled_at: int | None = None + disabled_by: str | None = None + archived: bool | None = None + display_status: str | None = None + word_count: int | None = None + hit_count: int | None = None + doc_form: str | None = None + doc_metadata: list[DocumentMetadataResponse] = Field(default_factory=list, validation_alias="doc_metadata_details") + summary_index_status: str | None = None + need_summary: bool | None = None + + @field_validator("data_source_type", "indexing_status", "display_status", "doc_form", mode="before") + @classmethod + def _normalize_enum_fields(cls, value: Any) -> Any: + return normalize_enum(value) + + @field_validator("doc_metadata", mode="before") + @classmethod + def _normalize_doc_metadata(cls, value: Any) -> list[Any]: + if value is None: + return [] + return value + + @field_validator("created_at", "disabled_at", mode="before") + @classmethod + def _normalize_timestamp(cls, value: datetime | int | None) -> int | None: + return to_timestamp(value) + + +class DocumentListResponse(ResponseModel): + data: list[DocumentResponse] + has_more: bool + limit: int + total: int + page: int + + +class DocumentStatusResponse(ResponseModel): + id: str + indexing_status: str + processing_started_at: int | None + parsing_completed_at: int | None + cleaning_completed_at: int | None + splitting_completed_at: int | None + completed_at: int | None + paused_at: int | None + error: str | None + stopped_at: int | None + completed_segments: int | None = None + total_segments: int | None = None + + @field_validator("indexing_status", mode="before") + @classmethod + def _normalize_indexing_status(cls, value: Any) -> Any: + return normalize_enum(value) + + @field_validator( + "processing_started_at", + "parsing_completed_at", + "cleaning_completed_at", + "splitting_completed_at", + "completed_at", + "paused_at", + "stopped_at", + mode="before", + ) + @classmethod + def _normalize_timestamp(cls, value: datetime | int | None) -> int | None: + return to_timestamp(value) + + +class DocumentStatusListResponse(ResponseModel): + data: list[DocumentStatusResponse] diff --git a/api/openapi/markdown/console-swagger.md b/api/openapi/markdown/console-swagger.md index d0341a5d1c..3bbbc75f71 100644 --- a/api/openapi/markdown/console-swagger.md +++ b/api/openapi/markdown/console-swagger.md @@ -4792,9 +4792,9 @@ Get dataset auto disable logs ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Success | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Indexing status retrieved successfully | [DocumentStatusListResponse](#documentstatuslistresponse) | ### /datasets/{dataset_id}/documents @@ -4830,9 +4830,9 @@ Get documents in a dataset ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Documents retrieved successfully | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Documents retrieved successfully | [DocumentWithSegmentsListResponse](#documentwithsegmentslistresponse) | #### POST ##### Parameters @@ -5028,10 +5028,10 @@ Get document indexing status ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Indexing status retrieved successfully | -| 404 | Document not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Indexing status retrieved successfully | [DocumentStatusResponse](#documentstatusresponse) | +| 404 | Document not found | | ### /datasets/{dataset_id}/documents/{document_id}/metadata @@ -11432,7 +11432,7 @@ Enum class for api provider schema type. | description | string | | Yes | | id | string | | Yes | | name | string | | Yes | -| parameters | | | Yes | +| parameters | object
[ object ]
string | | Yes | | server_code | string | | Yes | | status | [AppMCPServerStatus](#appmcpserverstatus) | | Yes | | updated_at | integer | | No | @@ -11903,7 +11903,7 @@ Condition detail | ---- | ---- | ----------- | -------- | | comparison_operator | string | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes | | name | string | | Yes | -| value | | | No | +| value | string
[ string ]
integer
number | | No | #### ConsoleDatasetListQuery @@ -12820,7 +12820,7 @@ Request payload for bulk downloading documents as a zip archive. | id | string | | Yes | | name | string | | Yes | | type | string | | Yes | -| value | string | | No | +| value | string
integer
number
boolean | | No | #### DocumentMetadataUpdatePayload @@ -12844,14 +12844,14 @@ Request payload for bulk downloading documents as a zip archive. | created_by | string | | No | | created_from | string | | No | | data_source_detail_dict | | | No | -| data_source_info_dict | | | No | +| data_source_info | | | No | | data_source_type | string | | No | | dataset_process_rule_id | string | | No | | disabled_at | integer | | No | | disabled_by | string | | No | | display_status | string | | No | | doc_form | string | | No | -| doc_metadata_details | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No | +| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No | | enabled | boolean | | No | | error | string | | No | | hit_count | integer | | No | @@ -12893,6 +12893,16 @@ Request payload for bulk downloading documents as a zip archive. | stopped_at | integer | | Yes | | total_segments | integer | | No | +#### DocumentWithSegmentsListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [DocumentWithSegmentsResponse](#documentwithsegmentsresponse) ] | | Yes | +| has_more | boolean | | Yes | +| limit | integer | | Yes | +| page | integer | | Yes | +| total | integer | | Yes | + #### DocumentWithSegmentsResponse | Name | Type | Description | Required | @@ -12903,14 +12913,14 @@ Request payload for bulk downloading documents as a zip archive. | created_by | string | | No | | created_from | string | | No | | data_source_detail_dict | | | No | -| data_source_info_dict | | | No | +| data_source_info | | | No | | data_source_type | string | | No | | dataset_process_rule_id | string | | No | | disabled_at | integer | | No | | disabled_by | string | | No | | display_status | string | | No | | doc_form | string | | No | -| doc_metadata_details | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No | +| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No | | enabled | boolean | | No | | error | string | | No | | hit_count | integer | | No | @@ -14000,7 +14010,7 @@ Enum class for large language model mode. | ---- | ---- | ----------- | -------- | | id | string | | Yes | | name | string | | Yes | -| value | | | No | +| value | string
integer
number | | No | #### MetadataFilteringCondition @@ -14595,7 +14605,7 @@ Form input definition. | ---- | ---- | ----------- | -------- | | current_identifier | string | | No | | type | [Type](#type) | | Yes | -| value | | | Yes | +| value | [Github](#github)
[Marketplace](#marketplace)
[Package](#package) | | Yes | #### PluginEndpointListResponse @@ -15130,7 +15140,7 @@ Form input definition. | description | string | | No | | icon | string | | No | | icon_background | string | | No | -| icon_type | | | No | +| icon_type | string
[IconType](#icontype) | | No | | privacy_policy | string | | No | | prompt_public | boolean | | No | | show_workflow_steps | boolean | | No | diff --git a/api/openapi/markdown/service-swagger.md b/api/openapi/markdown/service-swagger.md index cfb67eb3bd..5105112b5a 100644 --- a/api/openapi/markdown/service-swagger.md +++ b/api/openapi/markdown/service-swagger.md @@ -753,15 +753,17 @@ Create a new document by uploading a file | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | +| data | formData | Optional JSON string with document creation settings. | No | string | +| file | formData | Document file to upload. | Yes | file | | dataset_id | path | Dataset ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Document created successfully | -| 400 | Bad request - invalid file or parameters | -| 401 | Unauthorized - invalid API token | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) | +| 400 | Bad request - invalid file or parameters | | +| 401 | Unauthorized - invalid API token | | ### /datasets/{dataset_id}/document/create-by-text @@ -779,11 +781,11 @@ Create a new document by providing text content ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Document created successfully | -| 400 | Bad request - invalid parameters | -| 401 | Unauthorized - invalid API token | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) | +| 400 | Bad request - invalid parameters | | +| 401 | Unauthorized - invalid API token | | ### /datasets/{dataset_id}/document/create_by_file @@ -796,15 +798,17 @@ Create a new document by uploading a file | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | +| data | formData | Optional JSON string with document creation settings. | No | string | +| file | formData | Document file to upload. | Yes | file | | dataset_id | path | Dataset ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Document created successfully | -| 400 | Bad request - invalid file or parameters | -| 401 | Unauthorized - invalid API token | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) | +| 400 | Bad request - invalid file or parameters | | +| 401 | Unauthorized - invalid API token | | ### /datasets/{dataset_id}/document/create_by_text @@ -823,11 +827,11 @@ Deprecated legacy alias for creating a new document by providing text content. U ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Document created successfully | -| 400 | Bad request - invalid parameters | -| 401 | Unauthorized - invalid API token | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) | +| 400 | Bad request - invalid parameters | | +| 401 | Unauthorized - invalid API token | | ### /datasets/{dataset_id}/documents @@ -841,14 +845,18 @@ List all documents in a dataset | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | | dataset_id | path | Dataset ID | Yes | string | +| keyword | query | Search keyword | No | string | +| limit | query | Number of items per page | No | integer | +| page | query | Page number | No | integer | +| status | query | Document status filter | No | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Documents retrieved successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Dataset not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Documents retrieved successfully | [DocumentListResponse](#documentlistresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Dataset not found | | ### /datasets/{dataset_id}/documents/download-zip @@ -956,11 +964,11 @@ Get indexing status for documents in a batch ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Indexing status retrieved successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Dataset or documents not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Indexing status retrieved successfully | [DocumentStatusListResponse](#documentstatuslistresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Dataset or documents not found | | ### /datasets/{dataset_id}/documents/{document_id} @@ -1019,16 +1027,18 @@ Update an existing document by uploading a file | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | +| data | formData | Optional JSON string with document update settings. | No | string | +| file | formData | Replacement document file. | No | file | | dataset_id | path | Dataset ID | Yes | string | | document_id | path | Document ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Document updated successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Document not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Document not found | | ### /datasets/{dataset_id}/documents/{document_id}/download @@ -1274,16 +1284,18 @@ Deprecated legacy alias for updating an existing document by uploading a file. U | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | +| data | formData | Optional JSON string with document update settings. | No | string | +| file | formData | Replacement document file. | No | file | | dataset_id | path | Dataset ID | Yes | string | | document_id | path | Document ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Document updated successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Document not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Document not found | | ### /datasets/{dataset_id}/documents/{document_id}/update-by-text @@ -1302,11 +1314,11 @@ Update an existing document by providing text content ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Document updated successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Document not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Document not found | | ### /datasets/{dataset_id}/documents/{document_id}/update_by_file @@ -1320,16 +1332,18 @@ Deprecated legacy alias for updating an existing document by uploading a file. U | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | +| data | formData | Optional JSON string with document update settings. | No | string | +| file | formData | Replacement document file. | No | file | | dataset_id | path | Dataset ID | Yes | string | | document_id | path | Document ID | Yes | string | ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Document updated successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Document not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Document not found | | ### /datasets/{dataset_id}/documents/{document_id}/update_by_text @@ -1349,11 +1363,11 @@ Deprecated legacy alias for updating an existing document by providing text cont ##### Responses -| Code | Description | -| ---- | ----------- | -| 200 | Document updated successfully | -| 401 | Unauthorized - invalid API token | -| 404 | Document not found | +| Code | Description | Schema | +| ---- | ----------- | ------ | +| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) | +| 401 | Unauthorized - invalid API token | | +| 404 | Document not found | | ### /datasets/{dataset_id}/hit-testing @@ -2288,7 +2302,7 @@ Condition detail | ---- | ---- | ----------- | -------- | | comparison_operator | string | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes | | name | string | | Yes | -| value | | | No | +| value | string
[ string ]
integer
number | | No | #### ConversationListQuery @@ -2637,6 +2651,13 @@ Condition detail | inputs | object | | Yes | | is_published | boolean | | Yes | +#### DocumentAndBatchResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| batch | string | | Yes | +| document | [DocumentResponse](#documentresponse) | | Yes | + #### DocumentBatchDownloadZipPayload Request payload for bulk downloading documents as a zip archive. @@ -2654,6 +2675,16 @@ Request payload for bulk downloading documents as a zip archive. | page | integer | Page number | No | | status | string | Document status filter | No | +#### DocumentListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [DocumentResponse](#documentresponse) ] | | Yes | +| has_more | boolean | | Yes | +| limit | integer | | Yes | +| page | integer | | Yes | +| total | integer | | Yes | + #### DocumentMetadataOperation | Name | Type | Description | Required | @@ -2662,6 +2693,67 @@ Request payload for bulk downloading documents as a zip archive. | metadata_list | [ [MetadataDetail](#metadatadetail) ] | | Yes | | partial_update | boolean | | No | +#### DocumentMetadataResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| id | string | | Yes | +| name | string | | Yes | +| type | string | | Yes | +| value | string
integer
number
boolean | | No | + +#### DocumentResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| archived | boolean | | No | +| created_at | integer | | No | +| created_by | string | | No | +| created_from | string | | No | +| data_source_detail_dict | | | No | +| data_source_info | | | No | +| data_source_type | string | | No | +| dataset_process_rule_id | string | | No | +| disabled_at | integer | | No | +| disabled_by | string | | No | +| display_status | string | | No | +| doc_form | string | | No | +| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No | +| enabled | boolean | | No | +| error | string | | No | +| hit_count | integer | | No | +| id | string | | Yes | +| indexing_status | string | | No | +| name | string | | Yes | +| need_summary | boolean | | No | +| position | integer | | No | +| summary_index_status | string | | No | +| tokens | integer | | No | +| word_count | integer | | No | + +#### DocumentStatusListResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| data | [ [DocumentStatusResponse](#documentstatusresponse) ] | | Yes | + +#### DocumentStatusResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| cleaning_completed_at | integer | | Yes | +| completed_at | integer | | Yes | +| completed_segments | integer | | No | +| error | string | | Yes | +| id | string | | Yes | +| indexing_status | string | | Yes | +| parsing_completed_at | integer | | Yes | +| paused_at | integer | | Yes | +| processing_started_at | integer | | Yes | +| splitting_completed_at | integer | | Yes | +| stopped_at | integer | | Yes | +| total_segments | integer | | No | + #### DocumentTextCreatePayload | Name | Type | Description | Required | @@ -2896,7 +2988,7 @@ Note: The SQLAlchemy model defines an `is_anonymous` property for Flask-Login se | ---- | ---- | ----------- | -------- | | id | string | | Yes | | name | string | | Yes | -| value | | | No | +| value | string
integer
number | | No | #### MetadataFilteringCondition @@ -3247,7 +3339,7 @@ Accept the legacy single-tag Service API payload while exposing a normalized tag | created_by_end_user | [SimpleEndUser](#simpleenduser) | | No | | created_by_role | string | | No | | created_from | string | | No | -| details | | | No | +| details | object
[ object ]
string
integer
number
boolean | | No | | id | string | | Yes | | workflow_run | [WorkflowRunForLogResponse](#workflowrunforlogresponse) | | No | @@ -3269,7 +3361,7 @@ Accept the legacy single-tag Service API payload while exposing a normalized tag | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | | created_at | integer | | No | -| elapsed_time | | | No | +| elapsed_time | number
integer | | No | | error | string | | No | | exceptions_count | integer | | No | | finished_at | integer | | No | @@ -3293,11 +3385,11 @@ Accept the legacy single-tag Service API payload while exposing a normalized tag | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | | created_at | integer | | No | -| elapsed_time | | | No | +| elapsed_time | number
integer | | No | | error | string | | No | | finished_at | integer | | No | | id | string | | Yes | -| inputs | | | No | +| inputs | object
[ object ]
string
integer
number
boolean | | No | | outputs | object | | No | | status | string | | Yes | | total_steps | integer | | No | diff --git a/api/tests/unit_tests/commands/test_generate_swagger_markdown_docs.py b/api/tests/unit_tests/commands/test_generate_swagger_markdown_docs.py index aa0a759ffa..4da03b2a88 100644 --- a/api/tests/unit_tests/commands/test_generate_swagger_markdown_docs.py +++ b/api/tests/unit_tests/commands/test_generate_swagger_markdown_docs.py @@ -188,6 +188,45 @@ def test_patch_union_schema_markdown_fills_converter_blank_schema_types(tmp_path assert "| allowed_file_types | [ [FileType](#filetype) ] | | No |" in patched +def test_patch_union_schema_markdown_fills_regular_definition_union_property(tmp_path): + module = _load_generate_swagger_markdown_docs_module() + spec_path = tmp_path / "service-swagger.json" + spec_path.write_text( + json.dumps( + { + "definitions": { + "DocumentMetadataResponse": { + "properties": { + "id": {"type": "string"}, + "value": { + "anyOf": [ + {"type": "string"}, + {"type": "integer"}, + {"type": "number"}, + {"type": "boolean"}, + {"type": "null"}, + ], + }, + }, + }, + } + } + ), + encoding="utf-8", + ) + markdown = """#### DocumentMetadataResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| id | string | | Yes | +| value | string | | No | +""" + + patched = module._patch_union_schema_markdown(markdown, spec_path) + + assert "| value | string
integer
number
boolean | | No |" in patched + + def test_patch_union_schema_markdown_ignores_specs_without_definitions(tmp_path): module = _load_generate_swagger_markdown_docs_module() spec_path = tmp_path / "console-swagger.json" @@ -236,7 +275,7 @@ def test_patch_union_schema_markdown_ignores_unrenderable_shapes(tmp_path): == "#### Definition\n| field |" ) - assert module._patch_union_schema_markdown("#### BrokenUnion\n", spec_path) == "#### BrokenUnion" + assert module._patch_union_schema_markdown("#### BrokenUnion\n", spec_path) == "#### BrokenUnion\n" def test_convert_spec_to_markdown_patches_generated_union_tables(tmp_path, monkeypatch): diff --git a/api/tests/unit_tests/controllers/console/datasets/test_datasets_document.py b/api/tests/unit_tests/controllers/console/datasets/test_datasets_document.py index c77895d940..16ead7c44f 100644 --- a/api/tests/unit_tests/controllers/console/datasets/test_datasets_document.py +++ b/api/tests/unit_tests/controllers/console/datasets/test_datasets_document.py @@ -1,4 +1,3 @@ -from types import SimpleNamespace from unittest.mock import MagicMock, patch import pytest @@ -9,6 +8,7 @@ import services from controllers.console import console_ns from controllers.console.datasets.datasets_document import ( DatasetDocumentListApi, + DatasetInitApi, DocumentApi, DocumentBatchDownloadZipApi, DocumentBatchIndexingEstimateApi, @@ -20,6 +20,7 @@ from controllers.console.datasets.datasets_document import ( DocumentMetadataApi, DocumentPipelineExecutionLogApi, DocumentProcessingApi, + DocumentRenameApi, DocumentRetryApi, DocumentStatusApi, DocumentSummaryStatusApi, @@ -33,7 +34,9 @@ from controllers.console.datasets.error import ( InvalidMetadataError, ) from core.rag.index_processor.constant.index_type import IndexStructureType -from models.enums import DataSourceType, IndexingStatus +from models.dataset import Dataset +from models.dataset import Document as DatasetDocument +from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus def unwrap(func): @@ -42,6 +45,79 @@ def unwrap(func): return func +def make_serializable_document(**overrides): + attrs = { + "id": "doc-1", + "position": 1, + "data_source_type": "upload_file", + "data_source_info_dict": {"upload_file_id": "file-1"}, + "data_source_detail_dict": {}, + "dataset_process_rule_id": None, + "name": "Document", + "created_from": "web", + "created_by": "u1", + "created_at": None, + "tokens": None, + "indexing_status": "completed", + "error": None, + "enabled": True, + "disabled_at": None, + "disabled_by": None, + "archived": False, + "display_status": "available", + "word_count": None, + "hit_count": 0, + "doc_form": "text_model", + "doc_metadata_details": None, + "summary_index_status": None, + "need_summary": False, + "process_rule_dict": None, + "completed_segments": None, + "total_segments": None, + } + attrs.update(overrides) + document = MagicMock(spec_set=list(attrs)) + for name, value in attrs.items(): + setattr(document, name, value) + return document + + +def make_dataset(**overrides): + attrs = { + "id": "ds-1", + "tenant_id": "tenant-1", + "name": "Dataset", + "indexing_technique": "economy", + "created_by": "u1", + "summary_index_setting": {"enable": True}, + } + attrs.update(overrides) + return Dataset(**attrs) + + +def make_document(**overrides): + attrs = { + "id": "doc-1", + "tenant_id": "tenant-1", + "dataset_id": "ds-1", + "position": 1, + "data_source_type": DataSourceType.UPLOAD_FILE, + "data_source_info": None, + "batch": "batch-1", + "name": "Document", + "created_from": DocumentCreatedFrom.WEB, + "created_by": "u1", + "indexing_status": IndexingStatus.COMPLETED, + "enabled": True, + "archived": False, + "doc_metadata": None, + "doc_form": IndexStructureType.PARAGRAPH_INDEX, + "need_summary": False, + } + attrs.update(overrides) + return DatasetDocument(**attrs) + + @pytest.fixture def tenant_ctx(): return (MagicMock(is_dataset_editor=True, id="u1"), "tenant-1") @@ -58,7 +134,7 @@ def patch_tenant(tenant_ctx): @pytest.fixture def dataset(): - return MagicMock(id="ds-1", indexing_technique="economy", summary_index_setting={"enable": True}) + return make_dataset() @pytest.fixture @@ -130,11 +206,9 @@ class TestDatasetDocumentListApi: api = DatasetDocumentListApi() method = unwrap(api.get) - doc = MagicMock(id="doc-1") + doc = make_serializable_document() pagination = MagicMock(items=[doc], total=1) - count_mock = MagicMock(return_value=2) - with ( app.test_request_context("/?fetch=true"), patch( @@ -149,14 +223,12 @@ class TestDatasetDocumentListApi: "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status", return_value=None, ), - patch( - "controllers.console.datasets.datasets_document.marshal", - return_value=[{"id": "doc-1"}], - ), ): resp = method(api, "ds-1") - assert resp["data"] + assert resp["data"][0]["id"] == "doc-1" + assert resp["data"][0]["completed_segments"] == 2 + assert resp["data"][0]["total_segments"] == 2 def test_get_with_search_status_and_created_at_sort( self, app: Flask, patch_tenant, patch_dataset, patch_permission @@ -164,7 +236,7 @@ class TestDatasetDocumentListApi: api = DatasetDocumentListApi() method = unwrap(api.get) - pagination = MagicMock(items=[MagicMock()], total=1) + pagination = MagicMock(items=[make_serializable_document()], total=1) with ( app.test_request_context("/?keyword=test&status=enabled&sort=created_at"), @@ -180,10 +252,6 @@ class TestDatasetDocumentListApi: "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status", return_value=None, ), - patch( - "controllers.console.datasets.datasets_document.marshal", - return_value=[{"id": "doc-1"}], - ), ): resp = method(api, "ds-1") @@ -193,7 +261,7 @@ class TestDatasetDocumentListApi: api = DatasetDocumentListApi() method = unwrap(api.get) - pagination = MagicMock(items=[MagicMock()], total=1) + pagination = MagicMock(items=[make_serializable_document()], total=1) with ( app.test_request_context("/"), @@ -205,22 +273,21 @@ class TestDatasetDocumentListApi: "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status", return_value=None, ), - patch( - "controllers.console.datasets.datasets_document.marshal", - return_value=[{"id": "doc-1"}], - ), ): response = method(api, "ds-1") assert response["total"] == 1 + assert response["data"][0]["id"] == "doc-1" + assert "completed_segments" not in response["data"][0] + assert "total_segments" not in response["data"][0] def test_post_success(self, app: Flask, patch_tenant, patch_dataset, patch_permission): api = DatasetDocumentListApi() method = unwrap(api.post) payload = {"indexing_technique": "economy"} - created_dataset = SimpleNamespace(id="ds-1", name="Dataset", indexing_technique="economy") - created_document = SimpleNamespace(id="doc-1", name="Document", doc_metadata_details=None) + created_dataset = make_dataset() + created_document = make_document() with ( app.test_request_context("/", json=payload), @@ -237,10 +304,17 @@ class TestDatasetDocumentListApi: "controllers.console.datasets.datasets_document.DocumentService.save_document_with_dataset_id", return_value=([created_document], "batch-1"), ), + patch("models.dataset.db.session.scalar", return_value=0), ): response = method(api, "ds-1") assert "documents" in response + assert response["dataset"]["id"] == "ds-1" + assert response["documents"][0]["id"] == "doc-1" + assert response["documents"][0]["data_source_info"] == {} + assert response["documents"][0]["doc_metadata"] == [] + assert "data_source_info_dict" not in response["documents"][0] + assert "doc_metadata_details" not in response["documents"][0] def test_post_forbidden(self, app: Flask): api = DatasetDocumentListApi() @@ -267,7 +341,7 @@ class TestDatasetDocumentListApi: api = DatasetDocumentListApi() method = unwrap(api.get) - pagination = MagicMock(items=[MagicMock()], total=1) + pagination = MagicMock(items=[make_serializable_document()], total=1) with ( app.test_request_context("/?fetch=maybe"), @@ -279,10 +353,6 @@ class TestDatasetDocumentListApi: "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status", return_value=None, ), - patch( - "controllers.console.datasets.datasets_document.marshal", - return_value=[{"id": "doc-1"}], - ), ): response = method(api, "ds-1") @@ -310,6 +380,37 @@ class TestDatasetDocumentListApi: assert response["total"] == 0 +class TestDatasetInitApi: + def test_post_success_serializes_created_dataset_and_documents(self, app: Flask, patch_tenant): + api = DatasetInitApi() + method = unwrap(api.post) + + payload = {"indexing_technique": "economy"} + created_dataset = make_dataset() + created_document = make_document(id="doc-init") + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_document.DocumentService.document_create_args_validate", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.save_document_without_dataset_id", + return_value=(created_dataset, [created_document], "batch-init"), + ), + patch("models.dataset.db.session.scalar", return_value=0), + ): + response = method(api) + + assert response["dataset"]["id"] == "ds-1" + assert response["documents"][0]["id"] == "doc-init" + assert response["documents"][0]["data_source_info"] == {} + assert response["documents"][0]["doc_metadata"] == [] + assert response["batch"] == "batch-init" + + class TestDocumentApi: def test_get_success(self, app: Flask, patch_tenant): api = DocumentApi() @@ -899,7 +1000,7 @@ class TestDocumentBatchDownloadZipApi: api = DocumentBatchDownloadZipApi() method = unwrap(api.post) - payload = {"document_ids": []} + payload: dict[str, list[str]] = {"document_ids": []} with app.test_request_context("/", json=payload), patch.object(type(console_ns), "payload", payload): with pytest.raises(ValueError): @@ -1046,6 +1147,53 @@ class TestDocumentBatchIndexingEstimateApi: class TestDocumentBatchIndexingStatusApi: + def test_get_batch_status_success_serializes_status_shape(self, app: Flask, patch_tenant): + api = DocumentBatchIndexingStatusApi() + method = unwrap(api.get) + + document = MagicMock( + id="doc-1", + indexing_status=IndexingStatus.COMPLETED, + is_paused=False, + processing_started_at=None, + parsing_completed_at=None, + cleaning_completed_at=None, + splitting_completed_at=None, + completed_at=None, + paused_at=None, + error=None, + stopped_at=None, + ) + + with ( + app.test_request_context("/"), + patch.object(api, "get_batch_documents", return_value=[document]), + patch( + "controllers.console.datasets.datasets_document.db.session.scalar", + side_effect=[2, 3], + ), + ): + response = method(api, "ds-1", "batch-1") + + assert response == { + "data": [ + { + "id": "doc-1", + "indexing_status": "completed", + "processing_started_at": None, + "parsing_completed_at": None, + "cleaning_completed_at": None, + "splitting_completed_at": None, + "completed_at": None, + "paused_at": None, + "error": None, + "stopped_at": None, + "completed_segments": 2, + "total_segments": 3, + } + ] + } + def test_get_batch_status_invalid_batch(self, app: Flask, patch_tenant): """Test batch status with invalid batch""" api = DocumentBatchIndexingStatusApi() @@ -1057,6 +1205,39 @@ class TestDocumentBatchIndexingStatusApi: class TestDocumentIndexingStatusApi: + def test_get_status_success_serializes_status_shape(self, app: Flask, patch_tenant): + api = DocumentIndexingStatusApi() + method = unwrap(api.get) + + document = MagicMock( + id="doc-1", + indexing_status=IndexingStatus.INDEXING, + is_paused=False, + processing_started_at=None, + parsing_completed_at=None, + cleaning_completed_at=None, + splitting_completed_at=None, + completed_at=None, + paused_at=None, + error=None, + stopped_at=None, + ) + + with ( + app.test_request_context("/"), + patch.object(api, "get_document", return_value=document), + patch( + "controllers.console.datasets.datasets_document.db.session.scalar", + side_effect=[1, 4], + ), + ): + response = method(api, "ds-1", "doc-1") + + assert response["id"] == "doc-1" + assert response["indexing_status"] == "indexing" + assert response["completed_segments"] == 1 + assert response["total_segments"] == 4 + def test_get_status_document_not_found(self, app: Flask, patch_tenant): """Test getting status for non-existent document""" api = DocumentIndexingStatusApi() @@ -1067,6 +1248,40 @@ class TestDocumentIndexingStatusApi: method(api, "ds-1", "invalid-doc") +class TestDocumentRenameApi: + def test_post_success_serializes_document_shape(self, app: Flask, patch_tenant): + api = DocumentRenameApi() + method = unwrap(api.post) + + payload = {"name": "Renamed Document"} + renamed_document = make_document(id="doc-renamed", name="Renamed Document") + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=make_dataset(), + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_operator_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.rename_document", + return_value=renamed_document, + ), + patch("models.dataset.db.session.scalar", return_value=0), + ): + response = method(api, "ds-1", "doc-1") + + assert response["id"] == "doc-renamed" + assert response["name"] == "Renamed Document" + assert response["data_source_info"] == {} + assert response["doc_metadata"] == [] + assert "data_source_info_dict" not in response + + class TestDocumentApiMetadata: def test_get_with_only_option(self, app: Flask, patch_tenant): """Test get with 'only' metadata option""" @@ -1291,7 +1506,7 @@ class TestDocumentListAdvancedCases: api = DatasetDocumentListApi() method = unwrap(api.get) - pagination = MagicMock(items=[MagicMock()], total=1) + pagination = MagicMock(items=[make_serializable_document()], total=1) with ( app.test_request_context("/?sort=updated_at"), @@ -1303,10 +1518,6 @@ class TestDocumentListAdvancedCases: "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status", return_value=None, ), - patch( - "controllers.console.datasets.datasets_document.marshal", - return_value=[{"id": "doc-1"}], - ), ): response = method(api, "ds-1") diff --git a/api/tests/unit_tests/controllers/service_api/dataset/test_document.py b/api/tests/unit_tests/controllers/service_api/dataset/test_document.py index 2185e65326..16b54acd8c 100644 --- a/api/tests/unit_tests/controllers/service_api/dataset/test_document.py +++ b/api/tests/unit_tests/controllers/service_api/dataset/test_document.py @@ -44,6 +44,41 @@ from services.dataset_service import DocumentService from services.entities.knowledge_entities.knowledge_entities import ProcessRule, RetrievalModel +def make_serializable_document(**overrides: object) -> Mock: + attrs: dict[str, object] = { + "id": str(uuid.uuid4()), + "position": 1, + "data_source_type": "upload_file", + "data_source_info_dict": {"upload_file_id": "file-1"}, + "data_source_detail_dict": {}, + "dataset_process_rule_id": None, + "batch": "batch-1", + "name": "Test Document", + "created_from": "api", + "created_by": "user-1", + "created_at": None, + "tokens": None, + "indexing_status": "completed", + "error": None, + "enabled": True, + "disabled_at": None, + "disabled_by": None, + "archived": False, + "display_status": "available", + "word_count": None, + "hit_count": 0, + "doc_form": "text_model", + "doc_metadata_details": None, + "summary_index_status": None, + "need_summary": False, + } + attrs.update(overrides) + document = Mock(spec_set=list(attrs)) + for name, value in attrs.items(): + setattr(document, name, value) + return document + + class TestDocumentTextCreatePayload: """Test suite for DocumentTextCreatePayload Pydantic model.""" @@ -226,7 +261,7 @@ class TestDocumentService: assert hasattr(DocumentService, "batch_update_document_status") @patch.object(DocumentService, "get_document") - def test_get_document_returns_document(self, mock_get): + def test_get_document_returns_document(self, mock_get: Mock) -> None: """Test get_document returns document object.""" mock_doc = Mock() mock_doc.id = str(uuid.uuid4()) @@ -235,6 +270,7 @@ class TestDocumentService: mock_get.return_value = mock_doc result = DocumentService.get_document(dataset_id="dataset_id", document_id="doc_id") + assert result is not None assert result.name == "Test Document" assert result.indexing_status == "completed" @@ -510,7 +546,7 @@ class TestDocumentApiGet: """ @pytest.fixture - def mock_doc_detail(self, mock_tenant): + def mock_doc_detail(self, mock_tenant: Mock) -> Mock: """A document mock with every attribute ``DocumentApi.get`` reads.""" doc = Mock() doc.id = str(uuid.uuid4()) @@ -551,8 +587,8 @@ class TestDocumentApiGet: @patch("controllers.service_api.dataset.document.DatasetService") @patch("controllers.service_api.dataset.document.DocumentService") def test_get_document_success_with_all_metadata( - self, mock_doc_svc, mock_dataset_svc, app: Flask, mock_tenant, mock_doc_detail - ): + self, mock_doc_svc: Mock, mock_dataset_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock + ) -> None: """Test successful document retrieval with metadata='all'.""" # Arrange dataset_id = str(uuid.uuid4()) @@ -569,8 +605,8 @@ class TestDocumentApiGet: method="GET", ): api = DocumentApi() - api.get_dataset = Mock(return_value=mock_dataset) - response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) + with patch.object(api, "get_dataset", return_value=mock_dataset): + response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) # Assert assert response["id"] == mock_doc_detail.id @@ -580,7 +616,7 @@ class TestDocumentApiGet: assert "doc_metadata" in response @patch("controllers.service_api.dataset.document.DocumentService") - def test_get_document_not_found(self, mock_doc_svc, app: Flask, mock_tenant): + def test_get_document_not_found(self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock) -> None: """Test 404 when document is not found.""" # Arrange dataset_id = str(uuid.uuid4()) @@ -595,12 +631,14 @@ class TestDocumentApiGet: method="GET", ): api = DocumentApi() - api.get_dataset = Mock(return_value=mock_dataset) - with pytest.raises(NotFound): - api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id="nonexistent") + with patch.object(api, "get_dataset", return_value=mock_dataset): + with pytest.raises(NotFound): + api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id="nonexistent") @patch("controllers.service_api.dataset.document.DocumentService") - def test_get_document_forbidden_wrong_tenant(self, mock_doc_svc, app: Flask, mock_tenant, mock_doc_detail): + def test_get_document_forbidden_wrong_tenant( + self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock + ) -> None: """Test 403 when document tenant doesn't match request tenant.""" # Arrange dataset_id = str(uuid.uuid4()) @@ -616,12 +654,14 @@ class TestDocumentApiGet: method="GET", ): api = DocumentApi() - api.get_dataset = Mock(return_value=mock_dataset) - with pytest.raises(Forbidden): - api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) + with patch.object(api, "get_dataset", return_value=mock_dataset): + with pytest.raises(Forbidden): + api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) @patch("controllers.service_api.dataset.document.DocumentService") - def test_get_document_metadata_only(self, mock_doc_svc, app: Flask, mock_tenant, mock_doc_detail): + def test_get_document_metadata_only( + self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock + ) -> None: """Test document retrieval with metadata='only'.""" # Arrange dataset_id = str(uuid.uuid4()) @@ -637,8 +677,8 @@ class TestDocumentApiGet: method="GET", ): api = DocumentApi() - api.get_dataset = Mock(return_value=mock_dataset) - response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) + with patch.object(api, "get_dataset", return_value=mock_dataset): + response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) # Assert — metadata='only' returns only id, doc_type, doc_metadata assert response["id"] == mock_doc_detail.id @@ -649,8 +689,8 @@ class TestDocumentApiGet: @patch("controllers.service_api.dataset.document.DatasetService") @patch("controllers.service_api.dataset.document.DocumentService") def test_get_document_metadata_without( - self, mock_doc_svc, mock_dataset_svc, app: Flask, mock_tenant, mock_doc_detail - ): + self, mock_doc_svc: Mock, mock_dataset_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock + ) -> None: """Test document retrieval with metadata='without'.""" # Arrange dataset_id = str(uuid.uuid4()) @@ -667,8 +707,8 @@ class TestDocumentApiGet: method="GET", ): api = DocumentApi() - api.get_dataset = Mock(return_value=mock_dataset) - response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) + with patch.object(api, "get_dataset", return_value=mock_dataset): + response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) # Assert — metadata='without' omits doc_type / doc_metadata assert response["id"] == mock_doc_detail.id @@ -677,7 +717,9 @@ class TestDocumentApiGet: assert "name" in response @patch("controllers.service_api.dataset.document.DocumentService") - def test_get_document_invalid_metadata_value(self, mock_doc_svc, app: Flask, mock_tenant, mock_doc_detail): + def test_get_document_invalid_metadata_value( + self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock + ) -> None: """Test error when metadata parameter has invalid value.""" # Arrange dataset_id = str(uuid.uuid4()) @@ -693,9 +735,9 @@ class TestDocumentApiGet: method="GET", ): api = DocumentApi() - api.get_dataset = Mock(return_value=mock_dataset) - with pytest.raises(InvalidMetadataError): - api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) + with patch.object(api, "get_dataset", return_value=mock_dataset): + with pytest.raises(InvalidMetadataError): + api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) class TestDocumentApiDelete: @@ -808,21 +850,26 @@ class TestDocumentApiDelete: class TestDocumentListApi: """Test suite for DocumentListApi endpoint.""" - @patch("controllers.service_api.dataset.document.marshal") @patch("controllers.service_api.dataset.document.DocumentService") @patch("controllers.service_api.dataset.document.db") - def test_list_documents_success(self, mock_db, mock_doc_svc, mock_marshal, app: Flask, mock_tenant, mock_dataset): + def test_list_documents_success(self, mock_db, mock_doc_svc, app: Flask, mock_tenant, mock_dataset): """Test successful document list retrieval.""" # Arrange mock_db.session.scalar.return_value = mock_dataset mock_pagination = Mock() - mock_pagination.items = [Mock(), Mock()] + mock_pagination.items = [ + make_serializable_document( + id="doc-1", + name="Document 1", + doc_metadata_details=[{"id": "meta-1", "name": "amount", "type": "number", "value": 42}], + ), + make_serializable_document(id="doc-2", name="Document 2"), + ] mock_pagination.total = 2 mock_db.paginate.return_value = mock_pagination mock_doc_svc.enrich_documents_with_summary_index_status.return_value = None - mock_marshal.return_value = [{"id": "doc1"}, {"id": "doc2"}] # Act with app.test_request_context( @@ -838,6 +885,11 @@ class TestDocumentListApi: assert response["page"] == 1 assert response["limit"] == 20 assert response["total"] == 2 + assert response["data"][0]["id"] == "doc-1" + assert response["data"][0]["data_source_info"] == {"upload_file_id": "file-1"} + assert response["data"][0]["doc_metadata"][0]["value"] == 42 + assert "data_source_info_dict" not in response["data"][0] + assert "doc_metadata_details" not in response["data"][0] @patch("controllers.service_api.dataset.document.db") def test_list_documents_dataset_not_found(self, mock_db, app: Flask, mock_tenant, mock_dataset): @@ -858,12 +910,9 @@ class TestDocumentListApi: class TestDocumentIndexingStatusApi: """Test suite for DocumentIndexingStatusApi endpoint.""" - @patch("controllers.service_api.dataset.document.marshal") @patch("controllers.service_api.dataset.document.DocumentService") @patch("controllers.service_api.dataset.document.db") - def test_get_indexing_status_success( - self, mock_db, mock_doc_svc, mock_marshal, app: Flask, mock_tenant, mock_dataset - ): + def test_get_indexing_status_success(self, mock_db, mock_doc_svc, app: Flask, mock_tenant, mock_dataset): """Test successful indexing status retrieval.""" # Arrange batch_id = "batch_123" @@ -884,7 +933,6 @@ class TestDocumentIndexingStatusApi: # scalar() called 3 times: dataset lookup, completed_segments count, total_segments count mock_db.session.scalar.side_effect = [mock_dataset, 5, 5] - mock_marshal.return_value = {"id": mock_doc.id, "indexing_status": "completed"} # Act with app.test_request_context( @@ -897,6 +945,12 @@ class TestDocumentIndexingStatusApi: # Assert assert "data" in response assert len(response["data"]) == 1 + item = response["data"][0] + assert item["id"] == mock_doc.id + assert item["indexing_status"] == "completed" + assert item["completed_segments"] == 5 + assert item["total_segments"] == 5 + assert item["processing_started_at"] is None @patch("controllers.service_api.dataset.document.db") def test_get_indexing_status_dataset_not_found(self, mock_db, app: Flask, mock_tenant, mock_dataset): @@ -973,7 +1027,6 @@ class TestDocumentAddByTextApi: mock_rate_limit.enabled = False mock_feature_svc.get_knowledge_rate_limit.return_value = mock_rate_limit - @patch("controllers.service_api.dataset.document.marshal") @patch("controllers.service_api.dataset.document.DocumentService") @patch("controllers.service_api.dataset.document.KnowledgeConfig") @patch("controllers.service_api.dataset.document.FileService") @@ -990,7 +1043,6 @@ class TestDocumentAddByTextApi: mock_file_svc_cls, mock_knowledge_config, mock_doc_svc, - mock_marshal, app: Flask, mock_tenant, mock_dataset, @@ -1012,11 +1064,9 @@ class TestDocumentAddByTextApi: mock_config = Mock() mock_knowledge_config.model_validate.return_value = mock_config - mock_doc = Mock() - mock_doc.id = str(uuid.uuid4()) + mock_doc = make_serializable_document(id="doc-create-text", name="Test Document") mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_doc], "batch_123") mock_doc_svc.document_create_args_validate.return_value = None - mock_marshal.return_value = {"id": mock_doc.id, "name": "Test Document"} # Act with app.test_request_context( @@ -1037,6 +1087,10 @@ class TestDocumentAddByTextApi: assert "document" in response assert "batch" in response assert response["batch"] == "batch_123" + assert response["document"]["id"] == "doc-create-text" + assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"} + assert response["document"]["doc_metadata"] == [] + assert "data_source_info_dict" not in response["document"] @patch("controllers.service_api.wraps.FeatureService") @patch("controllers.service_api.wraps.validate_and_get_api_token") @@ -1162,7 +1216,6 @@ class TestDocumentUpdateByTextApiPost: ``@cloud_edition_billing_rate_limit_check``. """ - @patch("controllers.service_api.dataset.document.marshal") @patch("controllers.service_api.dataset.document.DocumentService") @patch("controllers.service_api.dataset.document.FileService") @patch("controllers.service_api.dataset.document.current_user") @@ -1177,7 +1230,6 @@ class TestDocumentUpdateByTextApiPost: mock_current_user, mock_file_svc_cls, mock_doc_svc, - mock_marshal, app: Flask, mock_tenant, mock_dataset, @@ -1193,10 +1245,9 @@ class TestDocumentUpdateByTextApiPost: mock_upload.id = str(uuid.uuid4()) mock_file_svc_cls.return_value.upload_text.return_value = mock_upload - mock_document = Mock() + mock_document = make_serializable_document(id="doc-update-text", name="Updated Doc") mock_doc_svc.document_create_args_validate.return_value = None mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_document], "batch-1") - mock_marshal.return_value = {"id": "doc-1"} doc_id = str(uuid.uuid4()) with app.test_request_context( @@ -1214,6 +1265,9 @@ class TestDocumentUpdateByTextApiPost: assert status == 200 assert "document" in response + assert response["batch"] == "batch-1" + assert response["document"]["id"] == "doc-update-text" + assert response["document"]["doc_metadata"] == [] @patch("controllers.service_api.dataset.document.db") @patch("controllers.service_api.wraps.FeatureService") @@ -1254,6 +1308,61 @@ class TestDocumentAddByFileApiPost: decorators and ``@cloud_edition_billing_rate_limit_check``. """ + @patch("controllers.service_api.dataset.document.DocumentService") + @patch("controllers.service_api.dataset.document.FileService") + @patch("controllers.service_api.dataset.document.current_user") + @patch("controllers.service_api.dataset.document.db") + @patch("controllers.service_api.wraps.FeatureService") + @patch("controllers.service_api.wraps.validate_and_get_api_token") + def test_add_by_file_success_serializes_document_and_batch_shape( + self, + mock_validate_token, + mock_feature_svc, + mock_db, + mock_current_user, + mock_file_svc_cls, + mock_doc_svc, + app: Flask, + mock_tenant, + mock_dataset, + ): + """Test successful document creation by file.""" + _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + mock_dataset.provider = "vendor" + mock_dataset.indexing_technique = "economy" + mock_dataset.chunk_structure = None + mock_dataset.latest_process_rule = Mock() + mock_dataset.created_by_account = Mock() + mock_db.session.scalar.return_value = mock_dataset + + mock_current_user.id = "user-1" + mock_upload = Mock() + mock_upload.id = str(uuid.uuid4()) + mock_file_svc_cls.return_value.upload_file.return_value = mock_upload + + mock_document = make_serializable_document(id="doc-create-file", name="File Document") + mock_doc_svc.document_create_args_validate.return_value = None + mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_document], "batch-file") + + from io import BytesIO + + data = {"file": (BytesIO(b"content"), "test.pdf", "application/pdf")} + with app.test_request_context( + f"/datasets/{mock_dataset.id}/document/create-by-file", + method="POST", + content_type="multipart/form-data", + data=data, + headers={"Authorization": "Bearer test_token"}, + ): + api = DocumentAddByFileApi() + response, status = api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id) + + assert status == 200 + assert response["batch"] == "batch-file" + assert response["document"]["id"] == "doc-create-file" + assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"} + assert response["document"]["doc_metadata"] == [] + @patch("controllers.service_api.dataset.document.db") @patch("controllers.service_api.wraps.FeatureService") @patch("controllers.service_api.wraps.validate_and_get_api_token") @@ -1498,7 +1607,6 @@ class TestDocumentUpdateByFileApiPatch: document_id=doc_id, ) - @patch("controllers.service_api.dataset.document.marshal") @patch("controllers.service_api.dataset.document.DocumentService") @patch("controllers.service_api.dataset.document.FileService") @patch("controllers.service_api.dataset.document.current_user") @@ -1513,7 +1621,6 @@ class TestDocumentUpdateByFileApiPatch: mock_current_user, mock_file_svc_cls, mock_doc_svc, - mock_marshal, app: Flask, mock_tenant, mock_dataset, @@ -1532,11 +1639,9 @@ class TestDocumentUpdateByFileApiPatch: mock_upload.id = str(uuid.uuid4()) mock_file_svc_cls.return_value.upload_file.return_value = mock_upload - mock_document = Mock() - mock_document.batch = "batch-1" + mock_document = make_serializable_document(id="doc-update-file", name="File Document", batch="batch-1") mock_doc_svc.document_create_args_validate.return_value = None mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_document], None) - mock_marshal.return_value = {"id": "doc-1"} from io import BytesIO @@ -1558,3 +1663,6 @@ class TestDocumentUpdateByFileApiPatch: assert status == 200 assert "document" in response + assert response["batch"] == "batch-1" + assert response["document"]["id"] == "doc-update-file" + assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"} diff --git a/api/tests/unit_tests/controllers/test_swagger.py b/api/tests/unit_tests/controllers/test_swagger.py index 999f1ae78d..e45c2658d3 100644 --- a/api/tests/unit_tests/controllers/test_swagger.py +++ b/api/tests/unit_tests/controllers/test_swagger.py @@ -18,6 +18,19 @@ def _definition_refs(value: object) -> set[str]: return refs +def _parameters_by_name(operation: dict[str, object]) -> dict[str, dict[str, object]]: + parameters = operation.get("parameters", []) + assert isinstance(parameters, list) + result: dict[str, dict[str, object]] = {} + for parameter in parameters: + if not isinstance(parameter, dict): + continue + name = parameter.get("name") + if isinstance(name, str): + result[name] = parameter + return result + + @pytest.mark.parametrize( ("first_kwargs", "second_kwargs"), [ @@ -70,3 +83,60 @@ def test_swagger_json_endpoints_render(monkeypatch: pytest.MonkeyPatch): assert not sorted(ref for ref in missing_refs if ref.startswith("_AnonymousInlineModel")) assert app.config["RESTX_INCLUDE_ALL_MODELS"] is True + + +def test_service_document_file_routes_document_multipart_form_data(monkeypatch: pytest.MonkeyPatch): + from configs import dify_config + from controllers.service_api import bp as service_api_bp + + monkeypatch.setattr(dify_config, "SWAGGER_UI_ENABLED", True) + + app = Flask(__name__) + app.config["TESTING"] = True + app.config["RESTX_INCLUDE_ALL_MODELS"] = True + app.register_blueprint(service_api_bp) + + payload = app.test_client().get("/v1/swagger.json").get_json() + paths = payload["paths"] + + create_operation = paths["/datasets/{dataset_id}/document/create-by-file"]["post"] + create_params = _parameters_by_name(create_operation) + assert create_operation["consumes"] == ["multipart/form-data"] + assert create_params["file"]["in"] == "formData" + assert create_params["file"]["type"] == "file" + assert create_params["file"]["required"] is True + assert create_params["data"]["in"] == "formData" + assert create_params["data"]["type"] == "string" + + for path in ( + "/datasets/{dataset_id}/documents/{document_id}", + "/datasets/{dataset_id}/documents/{document_id}/update-by-file", + "/datasets/{dataset_id}/documents/{document_id}/update_by_file", + ): + update_operation = paths[path]["patch" if path.endswith("{document_id}") else "post"] + update_params = _parameters_by_name(update_operation) + assert update_operation["consumes"] == ["multipart/form-data"] + assert update_params["file"]["in"] == "formData" + assert update_params["file"]["type"] == "file" + assert update_params["file"]["required"] is False + assert update_params["data"]["in"] == "formData" + assert update_params["data"]["type"] == "string" + + +def test_service_document_list_documents_query_params_render(monkeypatch: pytest.MonkeyPatch): + from configs import dify_config + from controllers.service_api import bp as service_api_bp + + monkeypatch.setattr(dify_config, "SWAGGER_UI_ENABLED", True) + + app = Flask(__name__) + app.config["TESTING"] = True + app.config["RESTX_INCLUDE_ALL_MODELS"] = True + app.register_blueprint(service_api_bp) + + payload = app.test_client().get("/v1/swagger.json").get_json() + operation = payload["paths"]["/datasets/{dataset_id}/documents"]["get"] + params = _parameters_by_name(operation) + + for name in ("page", "limit", "keyword", "status"): + assert params[name]["in"] == "query" diff --git a/packages/contracts/generated/api/console/datasets/orpc.gen.ts b/packages/contracts/generated/api/console/datasets/orpc.gen.ts index 3a53a9a7f5..0b2c04d64b 100644 --- a/packages/contracts/generated/api/console/datasets/orpc.gen.ts +++ b/packages/contracts/generated/api/console/datasets/orpc.gen.ts @@ -676,16 +676,8 @@ export const indexingEstimate2 = { get: get13, } -/** - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated - */ export const get14 = oc .route({ - deprecated: true, - description: - 'Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdBatchByBatchIndexingStatus', @@ -862,16 +854,10 @@ export const indexingEstimate3 = { /** * Get document indexing status - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get17 = oc .route({ - deprecated: true, - description: - 'Get document indexing status\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get document indexing status', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatus', @@ -1410,16 +1396,10 @@ export const delete7 = oc /** * Get documents in a dataset - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get26 = oc .route({ - deprecated: true, - description: - 'Get documents in a dataset\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get documents in a dataset', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdDocuments', diff --git a/packages/contracts/generated/api/console/datasets/types.gen.ts b/packages/contracts/generated/api/console/datasets/types.gen.ts index 020e8fca76..b92f0b8754 100644 --- a/packages/contracts/generated/api/console/datasets/types.gen.ts +++ b/packages/contracts/generated/api/console/datasets/types.gen.ts @@ -290,6 +290,18 @@ export type AutoDisableLogsResponse = { document_ids: Array } +export type DocumentStatusListResponse = { + data: Array +} + +export type DocumentWithSegmentsListResponse = { + data: Array + has_more: boolean + limit: number + page: number + total: number +} + export type DocumentBatchDownloadZipPayload = { document_ids: Array } @@ -306,6 +318,21 @@ export type UrlResponse = { url: string } +export type DocumentStatusResponse = { + cleaning_completed_at: number | null + completed_at: number | null + completed_segments?: number | null + error: string | null + id: string + indexing_status: string + parsing_completed_at: number | null + paused_at: number | null + processing_started_at: number | null + splitting_completed_at: number | null + stopped_at: number | null + total_segments?: number | null +} + export type DocumentMetadataUpdatePayload = { doc_metadata?: unknown doc_type?: string | null @@ -326,14 +353,14 @@ export type DocumentResponse = { created_by?: string | null created_from?: string | null data_source_detail_dict?: unknown - data_source_info_dict?: unknown + data_source_info?: unknown data_source_type?: string | null dataset_process_rule_id?: string | null disabled_at?: number | null disabled_by?: string | null display_status?: string | null doc_form?: string | null - doc_metadata_details?: Array + doc_metadata?: Array enabled?: boolean | null error?: string | null hit_count?: number | null @@ -433,10 +460,6 @@ export type HitTestingResponse = { records: Array } -export type DocumentStatusListResponse = { - data: Array -} - export type DatasetMetadataListResponse = { built_in_field_enabled: boolean doc_metadata: Array @@ -671,6 +694,36 @@ export type DatasetMetadataBuiltInFieldResponse = { type: string } +export type DocumentWithSegmentsResponse = { + archived?: boolean | null + completed_segments?: number | null + created_at?: number | null + created_by?: string | null + created_from?: string | null + data_source_detail_dict?: unknown + data_source_info?: unknown + data_source_type?: string | null + dataset_process_rule_id?: string | null + disabled_at?: number | null + disabled_by?: string | null + display_status?: string | null + doc_form?: string | null + doc_metadata?: Array + enabled?: boolean | null + error?: string | null + hit_count?: number | null + id: string + indexing_status?: string | null + name: string + need_summary?: boolean | null + position?: number | null + process_rule_dict?: unknown + summary_index_status?: string | null + tokens?: number | null + total_segments?: number | null + word_count?: number | null +} + export type DocumentMetadataOperation = { document_id: string metadata_list: Array @@ -681,7 +734,7 @@ export type DocumentMetadataResponse = { id: string name: string type: string - value?: string | null + value?: unknown } export type SegmentResponse = { @@ -730,21 +783,6 @@ export type ChildChunkUpdateArgs = { id?: string | null } -export type DocumentStatusResponse = { - cleaning_completed_at: number | null - completed_at: number | null - completed_segments?: number | null - error: string | null - id: string - indexing_status: string - parsing_completed_at: number | null - paused_at: number | null - processing_started_at: number | null - splitting_completed_at: number | null - stopped_at: number | null - total_segments?: number | null -} - export type HitTestingQuery = { content: string } @@ -1573,9 +1611,7 @@ export type GetDatasetsByDatasetIdBatchByBatchIndexingStatusData = { } export type GetDatasetsByDatasetIdBatchByBatchIndexingStatusResponses = { - 200: { - [key: string]: unknown - } + 200: DocumentStatusListResponse } export type GetDatasetsByDatasetIdBatchByBatchIndexingStatusResponse @@ -1616,9 +1652,7 @@ export type GetDatasetsByDatasetIdDocumentsData = { } export type GetDatasetsByDatasetIdDocumentsResponses = { - 200: { - [key: string]: unknown - } + 200: DocumentWithSegmentsListResponse } export type GetDatasetsByDatasetIdDocumentsResponse @@ -1841,9 +1875,7 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusError = GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusErrors[keyof GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusErrors] export type GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusResponses = { - 200: { - [key: string]: unknown - } + 200: DocumentStatusResponse } export type GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusResponse diff --git a/packages/contracts/generated/api/console/datasets/zod.gen.ts b/packages/contracts/generated/api/console/datasets/zod.gen.ts index 0387e897d2..6539cd79d8 100644 --- a/packages/contracts/generated/api/console/datasets/zod.gen.ts +++ b/packages/contracts/generated/api/console/datasets/zod.gen.ts @@ -143,6 +143,31 @@ export const zUrlResponse = z.object({ url: z.string(), }) +/** + * DocumentStatusResponse + */ +export const zDocumentStatusResponse = z.object({ + cleaning_completed_at: z.int().nullable(), + completed_at: z.int().nullable(), + completed_segments: z.int().nullish(), + error: z.string().nullable(), + id: z.string(), + indexing_status: z.string(), + parsing_completed_at: z.int().nullable(), + paused_at: z.int().nullable(), + processing_started_at: z.int().nullable(), + splitting_completed_at: z.int().nullable(), + stopped_at: z.int().nullable(), + total_segments: z.int().nullish(), +}) + +/** + * DocumentStatusListResponse + */ +export const zDocumentStatusListResponse = z.object({ + data: z.array(zDocumentStatusResponse), +}) + /** * DocumentMetadataUpdatePayload */ @@ -202,6 +227,14 @@ export const zChildChunkUpdatePayload = z.object({ content: z.string(), }) +/** + * ErrorDocsResponse + */ +export const zErrorDocsResponse = z.object({ + data: z.array(zDocumentStatusResponse), + total: z.int(), +}) + /** * ExternalHitTestingPayload */ @@ -455,7 +488,7 @@ export const zDocumentMetadataResponse = z.object({ id: z.string(), name: z.string(), type: z.string(), - value: z.string().nullish(), + value: z.unknown().optional(), }) /** @@ -467,14 +500,14 @@ export const zDocumentResponse = z.object({ created_by: z.string().nullish(), created_from: z.string().nullish(), data_source_detail_dict: z.unknown().optional(), - data_source_info_dict: z.unknown().optional(), + data_source_info: z.unknown().optional(), data_source_type: z.string().nullish(), dataset_process_rule_id: z.string().nullish(), disabled_at: z.int().nullish(), disabled_by: z.string().nullish(), display_status: z.string().nullish(), doc_form: z.string().nullish(), - doc_metadata_details: z.array(zDocumentMetadataResponse).optional(), + doc_metadata: z.array(zDocumentMetadataResponse).optional(), enabled: z.boolean().nullish(), error: z.string().nullish(), hit_count: z.int().nullish(), @@ -497,6 +530,50 @@ export const zDatasetAndDocumentResponse = z.object({ documents: z.array(zDocumentResponse), }) +/** + * DocumentWithSegmentsResponse + */ +export const zDocumentWithSegmentsResponse = z.object({ + archived: z.boolean().nullish(), + completed_segments: z.int().nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + created_from: z.string().nullish(), + data_source_detail_dict: z.unknown().optional(), + data_source_info: z.unknown().optional(), + data_source_type: z.string().nullish(), + dataset_process_rule_id: z.string().nullish(), + disabled_at: z.int().nullish(), + disabled_by: z.string().nullish(), + display_status: z.string().nullish(), + doc_form: z.string().nullish(), + doc_metadata: z.array(zDocumentMetadataResponse).optional(), + enabled: z.boolean().nullish(), + error: z.string().nullish(), + hit_count: z.int().nullish(), + id: z.string(), + indexing_status: z.string().nullish(), + name: z.string(), + need_summary: z.boolean().nullish(), + position: z.int().nullish(), + process_rule_dict: z.unknown().optional(), + summary_index_status: z.string().nullish(), + tokens: z.int().nullish(), + total_segments: z.int().nullish(), + word_count: z.int().nullish(), +}) + +/** + * DocumentWithSegmentsListResponse + */ +export const zDocumentWithSegmentsListResponse = z.object({ + data: z.array(zDocumentWithSegmentsResponse), + has_more: z.boolean(), + limit: z.int(), + page: z.int(), + total: z.int(), +}) + /** * ChildChunkResponse */ @@ -551,39 +628,6 @@ export const zChildChunkBatchUpdatePayload = z.object({ chunks: z.array(zChildChunkUpdateArgs), }) -/** - * DocumentStatusResponse - */ -export const zDocumentStatusResponse = z.object({ - cleaning_completed_at: z.int().nullable(), - completed_at: z.int().nullable(), - completed_segments: z.int().nullish(), - error: z.string().nullable(), - id: z.string(), - indexing_status: z.string(), - parsing_completed_at: z.int().nullable(), - paused_at: z.int().nullable(), - processing_started_at: z.int().nullable(), - splitting_completed_at: z.int().nullable(), - stopped_at: z.int().nullable(), - total_segments: z.int().nullish(), -}) - -/** - * ErrorDocsResponse - */ -export const zErrorDocsResponse = z.object({ - data: z.array(zDocumentStatusResponse), - total: z.int(), -}) - -/** - * DocumentStatusListResponse - */ -export const zDocumentStatusListResponse = z.object({ - data: z.array(zDocumentStatusResponse), -}) - /** * HitTestingQuery */ @@ -1586,12 +1630,9 @@ export const zGetDatasetsByDatasetIdBatchByBatchIndexingStatusPath = z.object({ }) /** - * Success + * Indexing status retrieved successfully */ -export const zGetDatasetsByDatasetIdBatchByBatchIndexingStatusResponse = z.record( - z.string(), - z.unknown(), -) +export const zGetDatasetsByDatasetIdBatchByBatchIndexingStatusResponse = zDocumentStatusListResponse export const zDeleteDatasetsByDatasetIdDocumentsPath = z.object({ dataset_id: z.string(), @@ -1618,7 +1659,7 @@ export const zGetDatasetsByDatasetIdDocumentsQuery = z.object({ /** * Documents retrieved successfully */ -export const zGetDatasetsByDatasetIdDocumentsResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsByDatasetIdDocumentsResponse = zDocumentWithSegmentsListResponse export const zPostDatasetsByDatasetIdDocumentsBody = zKnowledgeConfig @@ -1738,10 +1779,8 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusPath = z. /** * Indexing status retrieved successfully */ -export const zGetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusResponse = z.record( - z.string(), - z.unknown(), -) +export const zGetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusResponse + = zDocumentStatusResponse export const zPutDatasetsByDatasetIdDocumentsByDocumentIdMetadataBody = zDocumentMetadataUpdatePayload diff --git a/packages/contracts/generated/api/service/orpc.gen.ts b/packages/contracts/generated/api/service/orpc.gen.ts index a3a1e6c66f..940b844d25 100644 --- a/packages/contracts/generated/api/service/orpc.gen.ts +++ b/packages/contracts/generated/api/service/orpc.gen.ts @@ -46,6 +46,7 @@ import { zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsQuery, zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse, zGetDatasetsByDatasetIdDocumentsPath, + zGetDatasetsByDatasetIdDocumentsQuery, zGetDatasetsByDatasetIdDocumentsResponse, zGetDatasetsByDatasetIdMetadataBuiltInPath, zGetDatasetsByDatasetIdMetadataBuiltInResponse, @@ -87,6 +88,7 @@ import { zGetWorkspacesCurrentModelsModelTypesByModelTypePath, zGetWorkspacesCurrentModelsModelTypesByModelTypeResponse, zPatchDatasetsByDatasetIdBody, + zPatchDatasetsByDatasetIdDocumentsByDocumentIdBody, zPatchDatasetsByDatasetIdDocumentsByDocumentIdPath, zPatchDatasetsByDatasetIdDocumentsByDocumentIdResponse, zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdBody, @@ -119,8 +121,10 @@ import { zPostConversationsByCIdNamePath, zPostConversationsByCIdNameResponse, zPostDatasetsBody, + zPostDatasetsByDatasetIdDocumentCreateByFile2Body, zPostDatasetsByDatasetIdDocumentCreateByFile2Path, zPostDatasetsByDatasetIdDocumentCreateByFile2Response, + zPostDatasetsByDatasetIdDocumentCreateByFileBody, zPostDatasetsByDatasetIdDocumentCreateByFilePath, zPostDatasetsByDatasetIdDocumentCreateByFileResponse, zPostDatasetsByDatasetIdDocumentCreateByText2Body, @@ -138,8 +142,10 @@ import { zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse, zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsPath, zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse, + zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Body, zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Path, zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Response, + zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileBody, zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFilePath, zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileResponse, zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Body, @@ -853,44 +859,42 @@ export const tags = { /** * Create a new document by uploading a file - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const post13 = oc .route({ - deprecated: true, - description: - 'Create a new document by uploading a file\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Create a new document by uploading a file', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentCreateByFile', path: '/datasets/{dataset_id}/document/create-by-file', tags: ['service_api'], }) - .input(z.object({ params: zPostDatasetsByDatasetIdDocumentCreateByFilePath })) + .input( + z.object({ + body: zPostDatasetsByDatasetIdDocumentCreateByFileBody, + params: zPostDatasetsByDatasetIdDocumentCreateByFilePath, + }), + ) .output(zPostDatasetsByDatasetIdDocumentCreateByFileResponse) /** * Create a new document by uploading a file - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const post14 = oc .route({ - deprecated: true, - description: - 'Create a new document by uploading a file\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Create a new document by uploading a file', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentCreateByFile', path: '/datasets/{dataset_id}/document/create_by_file', tags: ['service_api'], }) - .input(z.object({ params: zPostDatasetsByDatasetIdDocumentCreateByFile2Path })) + .input( + z.object({ + body: zPostDatasetsByDatasetIdDocumentCreateByFile2Body, + params: zPostDatasetsByDatasetIdDocumentCreateByFile2Path, + }), + ) .output(zPostDatasetsByDatasetIdDocumentCreateByFile2Response) export const createByFile = { @@ -899,16 +903,10 @@ export const createByFile = { /** * Create a new document by providing text content - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const post15 = oc .route({ - deprecated: true, - description: - 'Create a new document by providing text content\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Create a new document by providing text content', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentCreateByText', @@ -926,15 +924,13 @@ export const post15 = oc /** * Deprecated legacy alias for creating a new document by providing text content. Use /datasets/{dataset_id}/document/create-by-text instead. * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * * @deprecated */ export const post16 = oc .route({ deprecated: true, description: - 'Deprecated legacy alias for creating a new document by providing text content. Use /datasets/{dataset_id}/document/create-by-text instead.\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + 'Deprecated legacy alias for creating a new document by providing text content. Use /datasets/{dataset_id}/document/create-by-text instead.', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentCreateByText', @@ -1062,16 +1058,10 @@ export const status2 = { /** * Get indexing status for documents in a batch - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get8 = oc .route({ - deprecated: true, - description: - 'Get indexing status for documents in a batch\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Get indexing status for documents in a batch', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdDocumentsByBatchIndexingStatus', @@ -1317,43 +1307,49 @@ export const segments = { /** * Deprecated legacy alias for updating an existing document by uploading a file. Use PATCH /datasets/{dataset_id}/documents/{document_id} instead. * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * * @deprecated */ export const post22 = oc .route({ deprecated: true, description: - 'Deprecated legacy alias for updating an existing document by uploading a file. Use PATCH /datasets/{dataset_id}/documents/{document_id} instead.\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + 'Deprecated legacy alias for updating an existing document by uploading a file. Use PATCH /datasets/{dataset_id}/documents/{document_id} instead.', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile', path: '/datasets/{dataset_id}/documents/{document_id}/update-by-file', tags: ['service_api'], }) - .input(z.object({ params: zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFilePath })) + .input( + z.object({ + body: zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileBody.optional(), + params: zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFilePath, + }), + ) .output(zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileResponse) /** * Deprecated legacy alias for updating an existing document by uploading a file. Use PATCH /datasets/{dataset_id}/documents/{document_id} instead. * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * * @deprecated */ export const post23 = oc .route({ deprecated: true, description: - 'Deprecated legacy alias for updating an existing document by uploading a file. Use PATCH /datasets/{dataset_id}/documents/{document_id} instead.\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + 'Deprecated legacy alias for updating an existing document by uploading a file. Use PATCH /datasets/{dataset_id}/documents/{document_id} instead.', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile', path: '/datasets/{dataset_id}/documents/{document_id}/update_by_file', tags: ['service_api'], }) - .input(z.object({ params: zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Path })) + .input( + z.object({ + body: zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Body.optional(), + params: zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Path, + }), + ) .output(zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Response) export const updateByFile = { @@ -1362,16 +1358,10 @@ export const updateByFile = { /** * Update an existing document by providing text content - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const post24 = oc .route({ - deprecated: true, - description: - 'Update an existing document by providing text content\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Update an existing document by providing text content', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText', @@ -1389,15 +1379,13 @@ export const post24 = oc /** * Deprecated legacy alias for updating an existing document by providing text content. Use /datasets/{dataset_id}/documents/{document_id}/update-by-text instead. * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * * @deprecated */ export const post25 = oc .route({ deprecated: true, description: - 'Deprecated legacy alias for updating an existing document by providing text content. Use /datasets/{dataset_id}/documents/{document_id}/update-by-text instead.\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + 'Deprecated legacy alias for updating an existing document by providing text content. Use /datasets/{dataset_id}/documents/{document_id}/update-by-text instead.', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText', @@ -1458,23 +1446,22 @@ export const get13 = oc /** * Update an existing document by uploading a file - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const patch4 = oc .route({ - deprecated: true, - description: - 'Update an existing document by uploading a file\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'Update an existing document by uploading a file', inputStructure: 'detailed', method: 'PATCH', operationId: 'patchDatasetsByDatasetIdDocumentsByDocumentId', path: '/datasets/{dataset_id}/documents/{document_id}', tags: ['service_api'], }) - .input(z.object({ params: zPatchDatasetsByDatasetIdDocumentsByDocumentIdPath })) + .input( + z.object({ + body: zPatchDatasetsByDatasetIdDocumentsByDocumentIdBody.optional(), + params: zPatchDatasetsByDatasetIdDocumentsByDocumentIdPath, + }), + ) .output(zPatchDatasetsByDatasetIdDocumentsByDocumentIdResponse) export const byDocumentId = { @@ -1489,23 +1476,22 @@ export const byDocumentId = { /** * List all documents in a dataset - * - * Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate. - * - * @deprecated */ export const get14 = oc .route({ - deprecated: true, - description: - 'List all documents in a dataset\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.', + description: 'List all documents in a dataset', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdDocuments', path: '/datasets/{dataset_id}/documents', tags: ['service_api'], }) - .input(z.object({ params: zGetDatasetsByDatasetIdDocumentsPath })) + .input( + z.object({ + params: zGetDatasetsByDatasetIdDocumentsPath, + query: zGetDatasetsByDatasetIdDocumentsQuery.optional(), + }), + ) .output(zGetDatasetsByDatasetIdDocumentsResponse) export const documents = { diff --git a/packages/contracts/generated/api/service/types.gen.ts b/packages/contracts/generated/api/service/types.gen.ts index 45c4a09cdd..771eb34d3a 100644 --- a/packages/contracts/generated/api/service/types.gen.ts +++ b/packages/contracts/generated/api/service/types.gen.ts @@ -420,6 +420,11 @@ export type DatasourceNodeRunPayload = { is_published: boolean } +export type DocumentAndBatchResponse = { + batch: string + document: DocumentResponse +} + export type DocumentBatchDownloadZipPayload = { document_ids: Array } @@ -431,12 +436,73 @@ export type DocumentListQuery = { status?: string | null } +export type DocumentListResponse = { + data: Array + has_more: boolean + limit: number + page: number + total: number +} + export type DocumentMetadataOperation = { document_id: string metadata_list: Array partial_update?: boolean } +export type DocumentMetadataResponse = { + id: string + name: string + type: string + value?: unknown +} + +export type DocumentResponse = { + archived?: boolean | null + created_at?: number | null + created_by?: string | null + created_from?: string | null + data_source_detail_dict?: unknown + data_source_info?: unknown + data_source_type?: string | null + dataset_process_rule_id?: string | null + disabled_at?: number | null + disabled_by?: string | null + display_status?: string | null + doc_form?: string | null + doc_metadata?: Array + enabled?: boolean | null + error?: string | null + hit_count?: number | null + id: string + indexing_status?: string | null + name: string + need_summary?: boolean | null + position?: number | null + summary_index_status?: string | null + tokens?: number | null + word_count?: number | null +} + +export type DocumentStatusListResponse = { + data: Array +} + +export type DocumentStatusResponse = { + cleaning_completed_at: number | null + completed_at: number | null + completed_segments?: number | null + error: string | null + id: string + indexing_status: string + parsing_completed_at: number | null + paused_at: number | null + processing_started_at: number | null + splitting_completed_at: number | null + stopped_at: number | null + total_segments?: number | null +} + export type DocumentTextCreatePayload = { doc_form?: string doc_language?: string @@ -1819,7 +1885,10 @@ export type PatchDatasetsByDatasetIdResponse = PatchDatasetsByDatasetIdResponses[keyof PatchDatasetsByDatasetIdResponses] export type PostDatasetsByDatasetIdDocumentCreateByFileData = { - body?: never + body: { + data?: string + file: Blob | File + } path: { dataset_id: string } @@ -1840,9 +1909,7 @@ export type PostDatasetsByDatasetIdDocumentCreateByFileError = PostDatasetsByDatasetIdDocumentCreateByFileErrors[keyof PostDatasetsByDatasetIdDocumentCreateByFileErrors] export type PostDatasetsByDatasetIdDocumentCreateByFileResponses = { - 200: { - [key: string]: unknown - } + 200: DocumentAndBatchResponse } export type PostDatasetsByDatasetIdDocumentCreateByFileResponse @@ -1870,16 +1937,17 @@ export type PostDatasetsByDatasetIdDocumentCreateByTextError = PostDatasetsByDatasetIdDocumentCreateByTextErrors[keyof PostDatasetsByDatasetIdDocumentCreateByTextErrors] export type PostDatasetsByDatasetIdDocumentCreateByTextResponses = { - 200: { - [key: string]: unknown - } + 200: DocumentAndBatchResponse } export type PostDatasetsByDatasetIdDocumentCreateByTextResponse = PostDatasetsByDatasetIdDocumentCreateByTextResponses[keyof PostDatasetsByDatasetIdDocumentCreateByTextResponses] export type PostDatasetsByDatasetIdDocumentCreateByFile2Data = { - body?: never + body: { + data?: string + file: Blob | File + } path: { dataset_id: string } @@ -1900,9 +1968,7 @@ export type PostDatasetsByDatasetIdDocumentCreateByFile2Error = PostDatasetsByDatasetIdDocumentCreateByFile2Errors[keyof PostDatasetsByDatasetIdDocumentCreateByFile2Errors] export type PostDatasetsByDatasetIdDocumentCreateByFile2Responses = { - 200: { - [key: string]: unknown - } + 200: DocumentAndBatchResponse } export type PostDatasetsByDatasetIdDocumentCreateByFile2Response @@ -1930,9 +1996,7 @@ export type PostDatasetsByDatasetIdDocumentCreateByText2Error = PostDatasetsByDatasetIdDocumentCreateByText2Errors[keyof PostDatasetsByDatasetIdDocumentCreateByText2Errors] export type PostDatasetsByDatasetIdDocumentCreateByText2Responses = { - 200: { - [key: string]: unknown - } + 200: DocumentAndBatchResponse } export type PostDatasetsByDatasetIdDocumentCreateByText2Response @@ -1943,7 +2007,12 @@ export type GetDatasetsByDatasetIdDocumentsData = { path: { dataset_id: string } - query?: never + query?: { + keyword?: string + limit?: number + page?: number + status?: string + } url: '/datasets/{dataset_id}/documents' } @@ -1960,9 +2029,7 @@ export type GetDatasetsByDatasetIdDocumentsError = GetDatasetsByDatasetIdDocumentsErrors[keyof GetDatasetsByDatasetIdDocumentsErrors] export type GetDatasetsByDatasetIdDocumentsResponses = { - 200: { - [key: string]: unknown - } + 200: DocumentListResponse } export type GetDatasetsByDatasetIdDocumentsResponse @@ -2087,9 +2154,7 @@ export type GetDatasetsByDatasetIdDocumentsByBatchIndexingStatusError = GetDatasetsByDatasetIdDocumentsByBatchIndexingStatusErrors[keyof GetDatasetsByDatasetIdDocumentsByBatchIndexingStatusErrors] export type GetDatasetsByDatasetIdDocumentsByBatchIndexingStatusResponses = { - 200: { - [key: string]: unknown - } + 200: DocumentStatusListResponse } export type GetDatasetsByDatasetIdDocumentsByBatchIndexingStatusResponse @@ -2164,7 +2229,10 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdResponse = GetDatasetsByDatasetIdDocumentsByDocumentIdResponses[keyof GetDatasetsByDatasetIdDocumentsByDocumentIdResponses] export type PatchDatasetsByDatasetIdDocumentsByDocumentIdData = { - body?: never + body?: { + data?: string + file?: Blob | File + } path: { dataset_id: string document_id: string @@ -2186,9 +2254,7 @@ export type PatchDatasetsByDatasetIdDocumentsByDocumentIdError = PatchDatasetsByDatasetIdDocumentsByDocumentIdErrors[keyof PatchDatasetsByDatasetIdDocumentsByDocumentIdErrors] export type PatchDatasetsByDatasetIdDocumentsByDocumentIdResponses = { - 200: { - [key: string]: unknown - } + 200: DocumentAndBatchResponse } export type PatchDatasetsByDatasetIdDocumentsByDocumentIdResponse @@ -2519,7 +2585,10 @@ export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChil = PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponses[keyof PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponses] export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileData = { - body?: never + body?: { + data?: string + file?: Blob | File + } path: { dataset_id: string document_id: string @@ -2541,9 +2610,7 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileError = PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileErrors[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileErrors] export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileResponses = { - 200: { - [key: string]: unknown - } + 200: DocumentAndBatchResponse } export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileResponse @@ -2572,16 +2639,17 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextError = PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextErrors[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextErrors] export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextResponses = { - 200: { - [key: string]: unknown - } + 200: DocumentAndBatchResponse } export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextResponse = PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextResponses[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextResponses] export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Data = { - body?: never + body?: { + data?: string + file?: Blob | File + } path: { dataset_id: string document_id: string @@ -2603,9 +2671,7 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Error = PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Errors[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Errors] export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Responses = { - 200: { - [key: string]: unknown - } + 200: DocumentAndBatchResponse } export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Response @@ -2634,9 +2700,7 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Error = PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Errors[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Errors] export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Responses = { - 200: { - [key: string]: unknown - } + 200: DocumentAndBatchResponse } export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Response diff --git a/packages/contracts/generated/api/service/zod.gen.ts b/packages/contracts/generated/api/service/zod.gen.ts index d27b64e54f..194ee70e60 100644 --- a/packages/contracts/generated/api/service/zod.gen.ts +++ b/packages/contracts/generated/api/service/zod.gen.ts @@ -537,6 +537,90 @@ export const zDocumentListQuery = z.object({ status: z.string().nullish(), }) +/** + * DocumentMetadataResponse + */ +export const zDocumentMetadataResponse = z.object({ + id: z.string(), + name: z.string(), + type: z.string(), + value: z.unknown().optional(), +}) + +/** + * DocumentResponse + */ +export const zDocumentResponse = z.object({ + archived: z.boolean().nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + created_from: z.string().nullish(), + data_source_detail_dict: z.unknown().optional(), + data_source_info: z.unknown().optional(), + data_source_type: z.string().nullish(), + dataset_process_rule_id: z.string().nullish(), + disabled_at: z.int().nullish(), + disabled_by: z.string().nullish(), + display_status: z.string().nullish(), + doc_form: z.string().nullish(), + doc_metadata: z.array(zDocumentMetadataResponse).optional(), + enabled: z.boolean().nullish(), + error: z.string().nullish(), + hit_count: z.int().nullish(), + id: z.string(), + indexing_status: z.string().nullish(), + name: z.string(), + need_summary: z.boolean().nullish(), + position: z.int().nullish(), + summary_index_status: z.string().nullish(), + tokens: z.int().nullish(), + word_count: z.int().nullish(), +}) + +/** + * DocumentAndBatchResponse + */ +export const zDocumentAndBatchResponse = z.object({ + batch: z.string(), + document: zDocumentResponse, +}) + +/** + * DocumentListResponse + */ +export const zDocumentListResponse = z.object({ + data: z.array(zDocumentResponse), + has_more: z.boolean(), + limit: z.int(), + page: z.int(), + total: z.int(), +}) + +/** + * DocumentStatusResponse + */ +export const zDocumentStatusResponse = z.object({ + cleaning_completed_at: z.int().nullable(), + completed_at: z.int().nullable(), + completed_segments: z.int().nullish(), + error: z.string().nullable(), + id: z.string(), + indexing_status: z.string(), + parsing_completed_at: z.int().nullable(), + paused_at: z.int().nullable(), + processing_started_at: z.int().nullable(), + splitting_completed_at: z.int().nullable(), + stopped_at: z.int().nullable(), + total_segments: z.int().nullish(), +}) + +/** + * DocumentStatusListResponse + */ +export const zDocumentStatusListResponse = z.object({ + data: z.array(zDocumentStatusResponse), +}) + /** * EndUserDetail * @@ -1573,6 +1657,11 @@ export const zPatchDatasetsByDatasetIdPath = z.object({ */ export const zPatchDatasetsByDatasetIdResponse = zDatasetDetailWithPartialMembersResponse +export const zPostDatasetsByDatasetIdDocumentCreateByFileBody = z.object({ + data: z.string().optional(), + file: z.custom(), +}) + export const zPostDatasetsByDatasetIdDocumentCreateByFilePath = z.object({ dataset_id: z.string(), }) @@ -1580,10 +1669,7 @@ export const zPostDatasetsByDatasetIdDocumentCreateByFilePath = z.object({ /** * Document created successfully */ -export const zPostDatasetsByDatasetIdDocumentCreateByFileResponse = z.record( - z.string(), - z.unknown(), -) +export const zPostDatasetsByDatasetIdDocumentCreateByFileResponse = zDocumentAndBatchResponse export const zPostDatasetsByDatasetIdDocumentCreateByTextBody = zDocumentTextCreatePayload @@ -1594,10 +1680,12 @@ export const zPostDatasetsByDatasetIdDocumentCreateByTextPath = z.object({ /** * Document created successfully */ -export const zPostDatasetsByDatasetIdDocumentCreateByTextResponse = z.record( - z.string(), - z.unknown(), -) +export const zPostDatasetsByDatasetIdDocumentCreateByTextResponse = zDocumentAndBatchResponse + +export const zPostDatasetsByDatasetIdDocumentCreateByFile2Body = z.object({ + data: z.string().optional(), + file: z.custom(), +}) export const zPostDatasetsByDatasetIdDocumentCreateByFile2Path = z.object({ dataset_id: z.string(), @@ -1606,10 +1694,7 @@ export const zPostDatasetsByDatasetIdDocumentCreateByFile2Path = z.object({ /** * Document created successfully */ -export const zPostDatasetsByDatasetIdDocumentCreateByFile2Response = z.record( - z.string(), - z.unknown(), -) +export const zPostDatasetsByDatasetIdDocumentCreateByFile2Response = zDocumentAndBatchResponse export const zPostDatasetsByDatasetIdDocumentCreateByText2Body = zDocumentTextCreatePayload @@ -1620,19 +1705,23 @@ export const zPostDatasetsByDatasetIdDocumentCreateByText2Path = z.object({ /** * Document created successfully */ -export const zPostDatasetsByDatasetIdDocumentCreateByText2Response = z.record( - z.string(), - z.unknown(), -) +export const zPostDatasetsByDatasetIdDocumentCreateByText2Response = zDocumentAndBatchResponse export const zGetDatasetsByDatasetIdDocumentsPath = z.object({ dataset_id: z.string(), }) +export const zGetDatasetsByDatasetIdDocumentsQuery = z.object({ + keyword: z.string().optional(), + limit: z.int().optional().default(20), + page: z.int().optional().default(1), + status: z.string().optional(), +}) + /** * Documents retrieved successfully */ -export const zGetDatasetsByDatasetIdDocumentsResponse = z.record(z.string(), z.unknown()) +export const zGetDatasetsByDatasetIdDocumentsResponse = zDocumentListResponse export const zPostDatasetsByDatasetIdDocumentsDownloadZipBody = zDocumentBatchDownloadZipPayload @@ -1677,10 +1766,8 @@ export const zGetDatasetsByDatasetIdDocumentsByBatchIndexingStatusPath = z.objec /** * Indexing status retrieved successfully */ -export const zGetDatasetsByDatasetIdDocumentsByBatchIndexingStatusResponse = z.record( - z.string(), - z.unknown(), -) +export const zGetDatasetsByDatasetIdDocumentsByBatchIndexingStatusResponse + = zDocumentStatusListResponse export const zDeleteDatasetsByDatasetIdDocumentsByDocumentIdPath = z.object({ dataset_id: z.string(), @@ -1708,6 +1795,11 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdResponse = z.record( z.unknown(), ) +export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdBody = z.object({ + data: z.string().optional(), + file: z.custom().optional(), +}) + export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdPath = z.object({ dataset_id: z.string(), document_id: z.string(), @@ -1716,10 +1808,7 @@ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdPath = z.object({ /** * Document updated successfully */ -export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdResponse = z.record( - z.string(), - z.unknown(), -) +export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdResponse = zDocumentAndBatchResponse export const zGetDatasetsByDatasetIdDocumentsByDocumentIdDownloadPath = z.object({ dataset_id: z.string(), @@ -1869,6 +1958,11 @@ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdCh export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponse = zChildChunkDetailResponse +export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileBody = z.object({ + data: z.string().optional(), + file: z.custom().optional(), +}) + export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFilePath = z.object({ dataset_id: z.string(), document_id: z.string(), @@ -1877,10 +1971,8 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFilePath = z.o /** * Document updated successfully */ -export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileResponse = z.record( - z.string(), - z.unknown(), -) +export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileResponse + = zDocumentAndBatchResponse export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextBody = zDocumentTextUpdate @@ -1892,10 +1984,13 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextPath = z.o /** * Document updated successfully */ -export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextResponse = z.record( - z.string(), - z.unknown(), -) +export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextResponse + = zDocumentAndBatchResponse + +export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Body = z.object({ + data: z.string().optional(), + file: z.custom().optional(), +}) export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Path = z.object({ dataset_id: z.string(), @@ -1905,10 +2000,8 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Path = z. /** * Document updated successfully */ -export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Response = z.record( - z.string(), - z.unknown(), -) +export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Response + = zDocumentAndBatchResponse export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Body = zDocumentTextUpdate @@ -1920,10 +2013,8 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Path = z. /** * Document updated successfully */ -export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Response = z.record( - z.string(), - z.unknown(), -) +export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Response + = zDocumentAndBatchResponse export const zPostDatasetsByDatasetIdHitTestingBody = zHitTestingPayload diff --git a/packages/contracts/openapi-ts.api.config.ts b/packages/contracts/openapi-ts.api.config.ts index 58ff98351e..122107eb78 100644 --- a/packages/contracts/openapi-ts.api.config.ts +++ b/packages/contracts/openapi-ts.api.config.ts @@ -2,7 +2,7 @@ import type { UserConfig } from '@hey-api/openapi-ts' import fs from 'node:fs' import path from 'node:path' import { fileURLToPath } from 'node:url' -import { defineConfig } from '@hey-api/openapi-ts' +import { $, defineConfig } from '@hey-api/openapi-ts' type JsonObject = Record @@ -976,6 +976,12 @@ const createApiConfig = (job: ApiJob): UserConfig => ({ 'name': 'zod', '~resolvers': { enum: markNullableEnumSchema, + string: (ctx) => { + if (ctx.schema.format !== 'binary') + return undefined + + return $(ctx.symbols.z).attr('custom').call().generic($.type.or($.type('Blob'), $.type('File'))) + }, }, }, {