refactor(api): migrate console/service_api.dataset.document to BaseModel (#36506)

Co-authored-by: WH-2099 <wh2099@pm.me>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
chariri 2026-05-30 23:38:27 +09:00 committed by GitHub
parent 6805d9bfc0
commit 599960024d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 1412 additions and 611 deletions

View File

@ -9,7 +9,7 @@ from uuid import UUID
import sqlalchemy as sa
from flask import request, send_file
from flask_restx import Resource, marshal
from flask_restx import Resource
from pydantic import BaseModel, Field, field_validator
from sqlalchemy import asc, desc, func, select
from werkzeug.exceptions import Forbidden, NotFound
@ -34,14 +34,16 @@ from core.rag.index_processor.constant.index_type import IndexTechniqueType
from extensions.ext_database import db
from fields.base import ResponseModel
from fields.document_fields import (
document_fields,
document_status_fields,
document_with_segments_fields,
DocumentMetadataResponse,
DocumentResponse,
DocumentStatusListResponse,
DocumentStatusResponse,
normalize_enum,
)
from graphon.model_runtime.entities.model_entities import ModelType
from graphon.model_runtime.errors.invoke import InvokeAuthorizationError
from libs.datetime_utils import naive_utc_now
from libs.helper import to_timestamp
from libs.helper import dump_response, to_timestamp
from libs.login import current_account_with_tenant, login_required
from models import DatasetProcessRule, Document, DocumentSegment, UploadFile
from models.dataset import DocumentPipelineExecutionLog
@ -74,12 +76,6 @@ from ..wraps import (
logger = logging.getLogger(__name__)
def _normalize_enum(value: Any) -> Any:
if isinstance(value, str) or value is None:
return value
return getattr(value, "value", value)
class DatasetResponse(ResponseModel):
id: str
name: str
@ -93,7 +89,7 @@ class DatasetResponse(ResponseModel):
@field_validator("data_source_type", "indexing_technique", mode="before")
@classmethod
def _normalize_enum_fields(cls, value: Any) -> Any:
return _normalize_enum(value)
return normalize_enum(value)
@field_validator("created_at", mode="before")
@classmethod
@ -101,61 +97,10 @@ class DatasetResponse(ResponseModel):
return to_timestamp(value)
class DocumentMetadataResponse(ResponseModel):
id: str
name: str
type: str
value: str | None = None
class DocumentResponse(ResponseModel):
id: str
position: int | None = None
data_source_type: str | None = None
data_source_info: Any = Field(default=None, validation_alias="data_source_info_dict")
data_source_detail_dict: Any = None
dataset_process_rule_id: str | None = None
name: str
created_from: str | None = None
created_by: str | None = None
created_at: int | None = None
tokens: int | None = None
indexing_status: str | None = None
error: str | None = None
enabled: bool | None = None
disabled_at: int | None = None
disabled_by: str | None = None
archived: bool | None = None
display_status: str | None = None
word_count: int | None = None
hit_count: int | None = None
doc_form: str | None = None
doc_metadata: list[DocumentMetadataResponse] = Field(default_factory=list, validation_alias="doc_metadata_details")
summary_index_status: str | None = None
need_summary: bool | None = None
@field_validator("data_source_type", "indexing_status", "display_status", "doc_form", mode="before")
@classmethod
def _normalize_enum_fields(cls, value: Any) -> Any:
return _normalize_enum(value)
@field_validator("doc_metadata", mode="before")
@classmethod
def _normalize_doc_metadata(cls, value: Any) -> list[Any]:
if value is None:
return []
return value
@field_validator("created_at", "disabled_at", mode="before")
@classmethod
def _normalize_timestamp(cls, value: datetime | int | None) -> int | None:
return to_timestamp(value)
class DocumentWithSegmentsResponse(DocumentResponse):
process_rule_dict: Any = None
completed_segments: int | None = None
total_segments: int | None = None
completed_segments: int | None = Field(default=None, exclude_if=lambda value: value is None)
total_segments: int | None = Field(default=None, exclude_if=lambda value: value is None)
class DatasetAndDocumentResponse(ResponseModel):
@ -190,6 +135,14 @@ class DocumentDatasetListParam(BaseModel):
fetch_val: str = Field("false", alias="fetch")
class DocumentWithSegmentsListResponse(ResponseModel):
data: list[DocumentWithSegmentsResponse]
has_more: bool
limit: int
total: int
page: int
register_schema_models(
console_ns,
KnowledgeConfig,
@ -200,13 +153,19 @@ register_schema_models(
GenerateSummaryPayload,
DocumentMetadataUpdatePayload,
DocumentBatchDownloadZipPayload,
)
register_response_schema_models(
console_ns,
SimpleResultMessageResponse,
SimpleResultResponse,
UrlResponse,
DatasetResponse,
DocumentMetadataResponse,
DocumentResponse,
DocumentWithSegmentsResponse,
DatasetAndDocumentResponse,
DocumentWithSegmentsListResponse,
)
register_response_schema_models(console_ns, SimpleResultMessageResponse, SimpleResultResponse, UrlResponse)
class DocumentResource(Resource):
@ -312,7 +271,11 @@ class DatasetDocumentListApi(Resource):
"status": "Filter documents by display status",
}
)
@console_ns.response(200, "Documents retrieved successfully")
@console_ns.response(
200,
"Documents retrieved successfully",
console_ns.models[DocumentWithSegmentsListResponse.__name__],
)
@setup_required
@login_required
@account_initialization_required
@ -425,18 +388,15 @@ class DatasetDocumentListApi(Resource):
)
document.completed_segments = completed_segments
document.total_segments = total_segments
data = marshal(documents, document_with_segments_fields)
else:
data = marshal(documents, document_fields)
response = {
"data": data,
"data": documents,
"has_more": len(documents) == limit,
"limit": limit,
"total": paginated_documents.total,
"page": page,
}
return response
return dump_response(DocumentWithSegmentsListResponse, response)
@setup_required
@login_required
@ -482,9 +442,7 @@ class DatasetDocumentListApi(Resource):
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
return DatasetAndDocumentResponse.model_validate(
{"dataset": dataset, "documents": documents, "batch": batch}, from_attributes=True
).model_dump(mode="json")
return dump_response(DatasetAndDocumentResponse, {"dataset": dataset, "documents": documents, "batch": batch})
@setup_required
@login_required
@ -567,9 +525,7 @@ class DatasetInitApi(Resource):
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
return DatasetAndDocumentResponse.model_validate(
{"dataset": dataset, "documents": documents, "batch": batch}, from_attributes=True
).model_dump(mode="json")
return dump_response(DatasetAndDocumentResponse, {"dataset": dataset, "documents": documents, "batch": batch})
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-estimate")
@ -742,6 +698,9 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
@console_ns.route("/datasets/<uuid:dataset_id>/batch/<string:batch>/indexing-status")
class DocumentBatchIndexingStatusApi(DocumentResource):
@console_ns.response(
200, "Indexing status retrieved successfully", console_ns.models[DocumentStatusListResponse.__name__]
)
@setup_required
@login_required
@account_initialization_required
@ -784,9 +743,8 @@ class DocumentBatchIndexingStatusApi(DocumentResource):
"completed_segments": completed_segments,
"total_segments": total_segments,
}
documents_status.append(marshal(document_dict, document_status_fields))
data = {"data": documents_status}
return data
documents_status.append(document_dict)
return dump_response(DocumentStatusListResponse, {"data": documents_status})
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-status")
@ -794,7 +752,9 @@ class DocumentIndexingStatusApi(DocumentResource):
@console_ns.doc("get_document_indexing_status")
@console_ns.doc(description="Get document indexing status")
@console_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@console_ns.response(200, "Indexing status retrieved successfully")
@console_ns.response(
200, "Indexing status retrieved successfully", console_ns.models[DocumentStatusResponse.__name__]
)
@console_ns.response(404, "Document not found")
@setup_required
@login_required
@ -839,7 +799,7 @@ class DocumentIndexingStatusApi(DocumentResource):
"completed_segments": completed_segments,
"total_segments": total_segments,
}
return marshal(document_dict, document_status_fields)
return dump_response(DocumentStatusResponse, document_dict)
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
@ -1304,7 +1264,7 @@ class DocumentRenameApi(DocumentResource):
except services.errors.document.DocumentIndexingError:
raise DocumentIndexingError("Cannot delete document during indexing.")
return DocumentResponse.model_validate(document, from_attributes=True).model_dump(mode="json")
return dump_response(DocumentResponse, document)
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/website-sync")

View File

@ -12,7 +12,6 @@ from typing import Self
from uuid import UUID
from flask import request, send_file
from flask_restx import marshal
from pydantic import BaseModel, Field, field_validator, model_validator
from sqlalchemy import desc, func, select
from werkzeug.exceptions import Forbidden, NotFound
@ -27,7 +26,12 @@ from controllers.common.errors import (
UnsupportedFileTypeError,
)
from controllers.common.fields import UrlResponse
from controllers.common.schema import register_enum_models, register_response_schema_models, register_schema_models
from controllers.common.schema import (
query_params_from_model,
register_enum_models,
register_response_schema_models,
register_schema_models,
)
from controllers.service_api import service_api_ns
from controllers.service_api.app.error import ProviderNotInitializeError
from controllers.service_api.dataset.error import (
@ -44,7 +48,13 @@ from core.errors.error import ProviderTokenNotInitError
from core.rag.entities import PreProcessingRule, Rule, Segmentation
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db
from fields.document_fields import document_fields, document_status_fields
from fields.base import ResponseModel
from fields.document_fields import (
DocumentListResponse,
DocumentResponse,
DocumentStatusListResponse,
)
from libs.helper import dump_response
from libs.login import current_user
from models.dataset import Dataset, Document, DocumentSegment
from models.enums import SegmentStatus
@ -107,6 +117,44 @@ class DocumentListQuery(BaseModel):
status: str | None = Field(default=None, description="Document status filter")
DOCUMENT_CREATE_BY_FILE_PARAMS = {
"dataset_id": "Dataset ID",
"file": {
"in": "formData",
"type": "file",
"required": True,
"description": "Document file to upload.",
},
"data": {
"in": "formData",
"type": "string",
"required": False,
"description": "Optional JSON string with document creation settings.",
},
}
DOCUMENT_UPDATE_BY_FILE_PARAMS = {
"dataset_id": "Dataset ID",
"document_id": "Document ID",
"file": {
"in": "formData",
"type": "file",
"required": False,
"description": "Replacement document file.",
},
"data": {
"in": "formData",
"type": "string",
"required": False,
"description": "Optional JSON string with document update settings.",
},
}
class DocumentAndBatchResponse(ResponseModel):
document: DocumentResponse
batch: str
register_enum_models(service_api_ns, RetrievalMethod)
register_schema_models(
@ -121,7 +169,14 @@ register_schema_models(
PreProcessingRule,
Segmentation,
)
register_response_schema_models(service_api_ns, UrlResponse)
register_response_schema_models(
service_api_ns,
UrlResponse,
DocumentResponse,
DocumentAndBatchResponse,
DocumentListResponse,
DocumentStatusListResponse,
)
def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[str, object], int]:
@ -188,8 +243,7 @@ def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[
raise ProviderNotInitializeError(ex.description)
document = documents[0]
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
return documents_and_batch_fields, 200
return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200
def _update_document_by_text(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]:
@ -248,8 +302,7 @@ def _update_document_by_text(tenant_id: str, dataset_id: UUID, document_id: UUID
raise ProviderNotInitializeError(ex.description)
document = documents[0]
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
return documents_and_batch_fields, 200
return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200
@service_api_ns.route("/datasets/<uuid:dataset_id>/document/create-by-text")
@ -267,6 +320,9 @@ class DocumentAddByTextApi(DatasetApiResource):
400: "Bad request - invalid parameters",
}
)
@service_api_ns.response(
200, "Document created successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_resource_check("documents", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
@ -296,6 +352,9 @@ class DeprecatedDocumentAddByTextApi(DatasetApiResource):
400: "Bad request - invalid parameters",
}
)
@service_api_ns.response(
200, "Document created successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_resource_check("documents", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
@ -319,6 +378,9 @@ class DocumentUpdateByTextApi(DatasetApiResource):
404: "Document not found",
}
)
@service_api_ns.response(
200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
@ -347,6 +409,9 @@ class DeprecatedDocumentUpdateByTextApi(DatasetApiResource):
404: "Document not found",
}
)
@service_api_ns.response(
200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
@ -363,7 +428,7 @@ class DocumentAddByFileApi(DatasetApiResource):
@service_api_ns.doc("create_document_by_file")
@service_api_ns.doc(description="Create a new document by uploading a file")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_CREATE_BY_FILE_PARAMS)
@service_api_ns.doc(
responses={
200: "Document created successfully",
@ -371,6 +436,9 @@ class DocumentAddByFileApi(DatasetApiResource):
400: "Bad request - invalid file or parameters",
}
)
@service_api_ns.response(
200, "Document created successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_resource_check("documents", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
@ -462,8 +530,7 @@ class DocumentAddByFileApi(DatasetApiResource):
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
document = documents[0]
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
return documents_and_batch_fields, 200
return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200
def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]:
@ -539,8 +606,7 @@ def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
document = documents[0]
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": document.batch}
return documents_and_batch_fields, 200
return dump_response(DocumentAndBatchResponse, {"document": document, "batch": document.batch}), 200
@service_api_ns.route(
@ -558,7 +624,7 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource):
"Use PATCH /datasets/{dataset_id}/documents/{document_id} instead."
)
)
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_UPDATE_BY_FILE_PARAMS)
@service_api_ns.doc(
responses={
200: "Document updated successfully",
@ -566,6 +632,9 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource):
404: "Document not found",
}
)
@service_api_ns.response(
200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
@ -577,7 +646,7 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource):
class DocumentListApi(DatasetApiResource):
@service_api_ns.doc("list_documents")
@service_api_ns.doc(description="List all documents in a dataset")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Dataset ID", **query_params_from_model(DocumentListQuery)})
@service_api_ns.doc(
responses={
200: "Documents retrieved successfully",
@ -585,6 +654,9 @@ class DocumentListApi(DatasetApiResource):
404: "Dataset not found",
}
)
@service_api_ns.response(
200, "Documents retrieved successfully", service_api_ns.models[DocumentListResponse.__name__]
)
def get(self, tenant_id, dataset_id: UUID):
dataset_id_str = str(dataset_id)
tenant_id = str(tenant_id)
@ -618,14 +690,14 @@ class DocumentListApi(DatasetApiResource):
)
response = {
"data": marshal(documents, document_fields),
"data": documents,
"has_more": len(documents) == query_params.limit,
"limit": query_params.limit,
"total": paginated_documents.total,
"page": query_params.page,
}
return response
return dump_response(DocumentListResponse, response)
@service_api_ns.route("/datasets/<uuid:dataset_id>/documents/download-zip")
@ -680,6 +752,11 @@ class DocumentIndexingStatusApi(DatasetApiResource):
404: "Dataset or documents not found",
}
)
@service_api_ns.response(
200,
"Indexing status retrieved successfully",
service_api_ns.models[DocumentStatusListResponse.__name__],
)
def get(self, tenant_id, dataset_id: UUID, batch: str):
dataset_id_str = str(dataset_id)
tenant_id = str(tenant_id)
@ -729,9 +806,8 @@ class DocumentIndexingStatusApi(DatasetApiResource):
"completed_segments": completed_segments,
"total_segments": total_segments,
}
documents_status.append(marshal(document_dict, document_status_fields))
data = {"data": documents_status}
return data
documents_status.append(document_dict)
return dump_response(DocumentStatusListResponse, {"data": documents_status})
@service_api_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/download")
@ -890,7 +966,7 @@ class DocumentApi(DatasetApiResource):
@service_api_ns.doc("update_document_by_file")
@service_api_ns.doc(description="Update an existing document by uploading a file")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_UPDATE_BY_FILE_PARAMS)
@service_api_ns.doc(
responses={
200: "Document updated successfully",
@ -898,6 +974,9 @@ class DocumentApi(DatasetApiResource):
404: "Document not found",
}
)
@service_api_ns.response(
200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def patch(self, tenant_id: str, dataset_id: UUID, document_id: UUID):

View File

@ -103,7 +103,11 @@ def _replace_schema_table_type(markdown: str, definition_name: str, row_name: st
lines[index] = "|".join(cells)
break
return "\n".join(lines)
return "\n".join(lines) + ("\n" if markdown.endswith("\n") else "")
def _has_union_schema(schema: object) -> bool:
return isinstance(schema, dict) and (isinstance(schema.get("oneOf"), list) or isinstance(schema.get("anyOf"), list))
def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str:
@ -117,8 +121,20 @@ def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str:
for definition_name, schema in definitions.items():
if not isinstance(definition_name, str) or not isinstance(schema, dict):
continue
one_of = schema.get("oneOf")
if not isinstance(one_of, list):
properties = schema.get("properties")
if isinstance(properties, dict):
for property_name, property_schema in properties.items():
if isinstance(property_name, str) and _has_union_schema(property_schema):
markdown = _replace_schema_table_type(
markdown,
definition_name,
property_name,
_schema_markdown_type(property_schema),
)
union_variants = schema.get("oneOf") or schema.get("anyOf")
if not isinstance(union_variants, list):
continue
markdown = _replace_schema_table_type(
@ -128,7 +144,7 @@ def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str:
_schema_markdown_type(schema),
)
for variant in one_of:
for variant in union_variants:
variant_name = _definition_ref_name(variant)
variant_schema = definitions.get(variant_name) if variant_name is not None else None
if not isinstance(variant_name, str) or not isinstance(variant_schema, dict):
@ -150,7 +166,7 @@ def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str:
def _convert_spec_to_markdown(spec_path: Path, markdown_path: Path) -> None:
markdown_path.parent.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory(prefix=f"{markdown_path.stem}-", dir=markdown_path.parent) as temp_dir:
with tempfile.TemporaryDirectory(prefix=f"{markdown_path.stem}-") as temp_dir:
temp_markdown_path = Path(temp_dir) / markdown_path.name
result = subprocess.run(
[
@ -158,12 +174,13 @@ def _convert_spec_to_markdown(spec_path: Path, markdown_path: Path) -> None:
"--yes",
SWAGGER_MARKDOWN_PACKAGE,
"-i",
str(spec_path),
str(spec_path.resolve()),
"-o",
str(temp_markdown_path),
str(temp_markdown_path.resolve()),
],
check=False,
capture_output=True,
cwd=temp_dir,
text=True,
)
if result.returncode != 0:

View File

@ -1,95 +1,112 @@
from flask_restx import fields
"""Response schemas for dataset document endpoints."""
from fields.dataset_fields import dataset_fields
from libs.helper import TimestampField
from datetime import datetime
from typing import Any
document_metadata_fields = {
"id": fields.String,
"name": fields.String,
"type": fields.String,
"value": fields.String,
}
from pydantic import Field, field_validator
document_fields = {
"id": fields.String,
"position": fields.Integer,
"data_source_type": fields.String,
"data_source_info": fields.Raw(attribute="data_source_info_dict"),
"data_source_detail_dict": fields.Raw(attribute="data_source_detail_dict"),
"dataset_process_rule_id": fields.String,
"name": fields.String,
"created_from": fields.String,
"created_by": fields.String,
"created_at": TimestampField,
"tokens": fields.Integer,
"indexing_status": fields.String,
"error": fields.String,
"enabled": fields.Boolean,
"disabled_at": TimestampField,
"disabled_by": fields.String,
"archived": fields.Boolean,
"display_status": fields.String,
"word_count": fields.Integer,
"hit_count": fields.Integer,
"doc_form": fields.String,
"doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"),
# Summary index generation status:
# "SUMMARIZING" (when task is queued and generating)
"summary_index_status": fields.String,
# Whether this document needs summary index generation
"need_summary": fields.Boolean,
}
from fields.base import ResponseModel
from libs.helper import to_timestamp
document_with_segments_fields = {
"id": fields.String,
"position": fields.Integer,
"data_source_type": fields.String,
"data_source_info": fields.Raw(attribute="data_source_info_dict"),
"data_source_detail_dict": fields.Raw(attribute="data_source_detail_dict"),
"dataset_process_rule_id": fields.String,
"process_rule_dict": fields.Raw(attribute="process_rule_dict"),
"name": fields.String,
"created_from": fields.String,
"created_by": fields.String,
"created_at": TimestampField,
"tokens": fields.Integer,
"indexing_status": fields.String,
"error": fields.String,
"enabled": fields.Boolean,
"disabled_at": TimestampField,
"disabled_by": fields.String,
"archived": fields.Boolean,
"display_status": fields.String,
"word_count": fields.Integer,
"hit_count": fields.Integer,
"completed_segments": fields.Integer,
"total_segments": fields.Integer,
"doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"),
# Summary index generation status:
# "SUMMARIZING" (when task is queued and generating)
"summary_index_status": fields.String,
"need_summary": fields.Boolean, # Whether this document needs summary index generation
}
dataset_and_document_fields = {
"dataset": fields.Nested(dataset_fields),
"documents": fields.List(fields.Nested(document_fields)),
"batch": fields.String,
}
def normalize_enum(value: Any) -> Any:
if isinstance(value, str) or value is None:
return value
return getattr(value, "value", value)
document_status_fields = {
"id": fields.String,
"indexing_status": fields.String,
"processing_started_at": TimestampField,
"parsing_completed_at": TimestampField,
"cleaning_completed_at": TimestampField,
"splitting_completed_at": TimestampField,
"completed_at": TimestampField,
"paused_at": TimestampField,
"error": fields.String,
"stopped_at": TimestampField,
"completed_segments": fields.Integer,
"total_segments": fields.Integer,
}
document_status_fields_list = {"data": fields.List(fields.Nested(document_status_fields))}
class DocumentMetadataResponse(ResponseModel):
id: str
name: str
type: str
value: str | int | float | bool | None = None
class DocumentResponse(ResponseModel):
id: str
position: int | None = None
data_source_type: str | None = None
data_source_info: Any = Field(default=None, validation_alias="data_source_info_dict")
data_source_detail_dict: Any = None
dataset_process_rule_id: str | None = None
name: str
created_from: str | None = None
created_by: str | None = None
created_at: int | None = None
tokens: int | None = None
indexing_status: str | None = None
error: str | None = None
enabled: bool | None = None
disabled_at: int | None = None
disabled_by: str | None = None
archived: bool | None = None
display_status: str | None = None
word_count: int | None = None
hit_count: int | None = None
doc_form: str | None = None
doc_metadata: list[DocumentMetadataResponse] = Field(default_factory=list, validation_alias="doc_metadata_details")
summary_index_status: str | None = None
need_summary: bool | None = None
@field_validator("data_source_type", "indexing_status", "display_status", "doc_form", mode="before")
@classmethod
def _normalize_enum_fields(cls, value: Any) -> Any:
return normalize_enum(value)
@field_validator("doc_metadata", mode="before")
@classmethod
def _normalize_doc_metadata(cls, value: Any) -> list[Any]:
if value is None:
return []
return value
@field_validator("created_at", "disabled_at", mode="before")
@classmethod
def _normalize_timestamp(cls, value: datetime | int | None) -> int | None:
return to_timestamp(value)
class DocumentListResponse(ResponseModel):
data: list[DocumentResponse]
has_more: bool
limit: int
total: int
page: int
class DocumentStatusResponse(ResponseModel):
id: str
indexing_status: str
processing_started_at: int | None
parsing_completed_at: int | None
cleaning_completed_at: int | None
splitting_completed_at: int | None
completed_at: int | None
paused_at: int | None
error: str | None
stopped_at: int | None
completed_segments: int | None = None
total_segments: int | None = None
@field_validator("indexing_status", mode="before")
@classmethod
def _normalize_indexing_status(cls, value: Any) -> Any:
return normalize_enum(value)
@field_validator(
"processing_started_at",
"parsing_completed_at",
"cleaning_completed_at",
"splitting_completed_at",
"completed_at",
"paused_at",
"stopped_at",
mode="before",
)
@classmethod
def _normalize_timestamp(cls, value: datetime | int | None) -> int | None:
return to_timestamp(value)
class DocumentStatusListResponse(ResponseModel):
data: list[DocumentStatusResponse]

View File

@ -4792,9 +4792,9 @@ Get dataset auto disable logs
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Success |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Indexing status retrieved successfully | [DocumentStatusListResponse](#documentstatuslistresponse) |
### /datasets/{dataset_id}/documents
@ -4830,9 +4830,9 @@ Get documents in a dataset
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Documents retrieved successfully |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Documents retrieved successfully | [DocumentWithSegmentsListResponse](#documentwithsegmentslistresponse) |
#### POST
##### Parameters
@ -5028,10 +5028,10 @@ Get document indexing status
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Indexing status retrieved successfully |
| 404 | Document not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Indexing status retrieved successfully | [DocumentStatusResponse](#documentstatusresponse) |
| 404 | Document not found | |
### /datasets/{dataset_id}/documents/{document_id}/metadata
@ -11432,7 +11432,7 @@ Enum class for api provider schema type.
| description | string | | Yes |
| id | string | | Yes |
| name | string | | Yes |
| parameters | | | Yes |
| parameters | object<br>[ object ]<br>string | | Yes |
| server_code | string | | Yes |
| status | [AppMCPServerStatus](#appmcpserverstatus) | | Yes |
| updated_at | integer | | No |
@ -11903,7 +11903,7 @@ Condition detail
| ---- | ---- | ----------- | -------- |
| comparison_operator | string | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
| name | string | | Yes |
| value | | | No |
| value | string<br>[ string ]<br>integer<br>number | | No |
#### ConsoleDatasetListQuery
@ -12820,7 +12820,7 @@ Request payload for bulk downloading documents as a zip archive.
| id | string | | Yes |
| name | string | | Yes |
| type | string | | Yes |
| value | string | | No |
| value | string<br>integer<br>number<br>boolean | | No |
#### DocumentMetadataUpdatePayload
@ -12844,14 +12844,14 @@ Request payload for bulk downloading documents as a zip archive.
| created_by | string | | No |
| created_from | string | | No |
| data_source_detail_dict | | | No |
| data_source_info_dict | | | No |
| data_source_info | | | No |
| data_source_type | string | | No |
| dataset_process_rule_id | string | | No |
| disabled_at | integer | | No |
| disabled_by | string | | No |
| display_status | string | | No |
| doc_form | string | | No |
| doc_metadata_details | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No |
| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No |
| enabled | boolean | | No |
| error | string | | No |
| hit_count | integer | | No |
@ -12893,6 +12893,16 @@ Request payload for bulk downloading documents as a zip archive.
| stopped_at | integer | | Yes |
| total_segments | integer | | No |
#### DocumentWithSegmentsListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [DocumentWithSegmentsResponse](#documentwithsegmentsresponse) ] | | Yes |
| has_more | boolean | | Yes |
| limit | integer | | Yes |
| page | integer | | Yes |
| total | integer | | Yes |
#### DocumentWithSegmentsResponse
| Name | Type | Description | Required |
@ -12903,14 +12913,14 @@ Request payload for bulk downloading documents as a zip archive.
| created_by | string | | No |
| created_from | string | | No |
| data_source_detail_dict | | | No |
| data_source_info_dict | | | No |
| data_source_info | | | No |
| data_source_type | string | | No |
| dataset_process_rule_id | string | | No |
| disabled_at | integer | | No |
| disabled_by | string | | No |
| display_status | string | | No |
| doc_form | string | | No |
| doc_metadata_details | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No |
| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No |
| enabled | boolean | | No |
| error | string | | No |
| hit_count | integer | | No |
@ -14000,7 +14010,7 @@ Enum class for large language model mode.
| ---- | ---- | ----------- | -------- |
| id | string | | Yes |
| name | string | | Yes |
| value | | | No |
| value | string<br>integer<br>number | | No |
#### MetadataFilteringCondition
@ -14595,7 +14605,7 @@ Form input definition.
| ---- | ---- | ----------- | -------- |
| current_identifier | string | | No |
| type | [Type](#type) | | Yes |
| value | | | Yes |
| value | [Github](#github)<br>[Marketplace](#marketplace)<br>[Package](#package) | | Yes |
#### PluginEndpointListResponse
@ -15130,7 +15140,7 @@ Form input definition.
| description | string | | No |
| icon | string | | No |
| icon_background | string | | No |
| icon_type | | | No |
| icon_type | string<br>[IconType](#icontype) | | No |
| privacy_policy | string | | No |
| prompt_public | boolean | | No |
| show_workflow_steps | boolean | | No |

View File

@ -753,15 +753,17 @@ Create a new document by uploading a file
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| data | formData | Optional JSON string with document creation settings. | No | string |
| file | formData | Document file to upload. | Yes | file |
| dataset_id | path | Dataset ID | Yes | string |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document created successfully |
| 400 | Bad request - invalid file or parameters |
| 401 | Unauthorized - invalid API token |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 400 | Bad request - invalid file or parameters | |
| 401 | Unauthorized - invalid API token | |
### /datasets/{dataset_id}/document/create-by-text
@ -779,11 +781,11 @@ Create a new document by providing text content
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document created successfully |
| 400 | Bad request - invalid parameters |
| 401 | Unauthorized - invalid API token |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 400 | Bad request - invalid parameters | |
| 401 | Unauthorized - invalid API token | |
### /datasets/{dataset_id}/document/create_by_file
@ -796,15 +798,17 @@ Create a new document by uploading a file
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| data | formData | Optional JSON string with document creation settings. | No | string |
| file | formData | Document file to upload. | Yes | file |
| dataset_id | path | Dataset ID | Yes | string |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document created successfully |
| 400 | Bad request - invalid file or parameters |
| 401 | Unauthorized - invalid API token |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 400 | Bad request - invalid file or parameters | |
| 401 | Unauthorized - invalid API token | |
### /datasets/{dataset_id}/document/create_by_text
@ -823,11 +827,11 @@ Deprecated legacy alias for creating a new document by providing text content. U
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document created successfully |
| 400 | Bad request - invalid parameters |
| 401 | Unauthorized - invalid API token |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 400 | Bad request - invalid parameters | |
| 401 | Unauthorized - invalid API token | |
### /datasets/{dataset_id}/documents
@ -841,14 +845,18 @@ List all documents in a dataset
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| dataset_id | path | Dataset ID | Yes | string |
| keyword | query | Search keyword | No | string |
| limit | query | Number of items per page | No | integer |
| page | query | Page number | No | integer |
| status | query | Document status filter | No | string |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Documents retrieved successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Dataset not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Documents retrieved successfully | [DocumentListResponse](#documentlistresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Dataset not found | |
### /datasets/{dataset_id}/documents/download-zip
@ -956,11 +964,11 @@ Get indexing status for documents in a batch
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Indexing status retrieved successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Dataset or documents not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Indexing status retrieved successfully | [DocumentStatusListResponse](#documentstatuslistresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Dataset or documents not found | |
### /datasets/{dataset_id}/documents/{document_id}
@ -1019,16 +1027,18 @@ Update an existing document by uploading a file
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| data | formData | Optional JSON string with document update settings. | No | string |
| file | formData | Replacement document file. | No | file |
| dataset_id | path | Dataset ID | Yes | string |
| document_id | path | Document ID | Yes | string |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document updated successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Document not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Document not found | |
### /datasets/{dataset_id}/documents/{document_id}/download
@ -1274,16 +1284,18 @@ Deprecated legacy alias for updating an existing document by uploading a file. U
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| data | formData | Optional JSON string with document update settings. | No | string |
| file | formData | Replacement document file. | No | file |
| dataset_id | path | Dataset ID | Yes | string |
| document_id | path | Document ID | Yes | string |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document updated successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Document not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Document not found | |
### /datasets/{dataset_id}/documents/{document_id}/update-by-text
@ -1302,11 +1314,11 @@ Update an existing document by providing text content
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document updated successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Document not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Document not found | |
### /datasets/{dataset_id}/documents/{document_id}/update_by_file
@ -1320,16 +1332,18 @@ Deprecated legacy alias for updating an existing document by uploading a file. U
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| data | formData | Optional JSON string with document update settings. | No | string |
| file | formData | Replacement document file. | No | file |
| dataset_id | path | Dataset ID | Yes | string |
| document_id | path | Document ID | Yes | string |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document updated successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Document not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Document not found | |
### /datasets/{dataset_id}/documents/{document_id}/update_by_text
@ -1349,11 +1363,11 @@ Deprecated legacy alias for updating an existing document by providing text cont
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document updated successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Document not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Document not found | |
### /datasets/{dataset_id}/hit-testing
@ -2288,7 +2302,7 @@ Condition detail
| ---- | ---- | ----------- | -------- |
| comparison_operator | string | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
| name | string | | Yes |
| value | | | No |
| value | string<br>[ string ]<br>integer<br>number | | No |
#### ConversationListQuery
@ -2637,6 +2651,13 @@ Condition detail
| inputs | object | | Yes |
| is_published | boolean | | Yes |
#### DocumentAndBatchResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| batch | string | | Yes |
| document | [DocumentResponse](#documentresponse) | | Yes |
#### DocumentBatchDownloadZipPayload
Request payload for bulk downloading documents as a zip archive.
@ -2654,6 +2675,16 @@ Request payload for bulk downloading documents as a zip archive.
| page | integer | Page number | No |
| status | string | Document status filter | No |
#### DocumentListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [DocumentResponse](#documentresponse) ] | | Yes |
| has_more | boolean | | Yes |
| limit | integer | | Yes |
| page | integer | | Yes |
| total | integer | | Yes |
#### DocumentMetadataOperation
| Name | Type | Description | Required |
@ -2662,6 +2693,67 @@ Request payload for bulk downloading documents as a zip archive.
| metadata_list | [ [MetadataDetail](#metadatadetail) ] | | Yes |
| partial_update | boolean | | No |
#### DocumentMetadataResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| id | string | | Yes |
| name | string | | Yes |
| type | string | | Yes |
| value | string<br>integer<br>number<br>boolean | | No |
#### DocumentResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| archived | boolean | | No |
| created_at | integer | | No |
| created_by | string | | No |
| created_from | string | | No |
| data_source_detail_dict | | | No |
| data_source_info | | | No |
| data_source_type | string | | No |
| dataset_process_rule_id | string | | No |
| disabled_at | integer | | No |
| disabled_by | string | | No |
| display_status | string | | No |
| doc_form | string | | No |
| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No |
| enabled | boolean | | No |
| error | string | | No |
| hit_count | integer | | No |
| id | string | | Yes |
| indexing_status | string | | No |
| name | string | | Yes |
| need_summary | boolean | | No |
| position | integer | | No |
| summary_index_status | string | | No |
| tokens | integer | | No |
| word_count | integer | | No |
#### DocumentStatusListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [DocumentStatusResponse](#documentstatusresponse) ] | | Yes |
#### DocumentStatusResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| cleaning_completed_at | integer | | Yes |
| completed_at | integer | | Yes |
| completed_segments | integer | | No |
| error | string | | Yes |
| id | string | | Yes |
| indexing_status | string | | Yes |
| parsing_completed_at | integer | | Yes |
| paused_at | integer | | Yes |
| processing_started_at | integer | | Yes |
| splitting_completed_at | integer | | Yes |
| stopped_at | integer | | Yes |
| total_segments | integer | | No |
#### DocumentTextCreatePayload
| Name | Type | Description | Required |
@ -2896,7 +2988,7 @@ Note: The SQLAlchemy model defines an `is_anonymous` property for Flask-Login se
| ---- | ---- | ----------- | -------- |
| id | string | | Yes |
| name | string | | Yes |
| value | | | No |
| value | string<br>integer<br>number | | No |
#### MetadataFilteringCondition
@ -3247,7 +3339,7 @@ Accept the legacy single-tag Service API payload while exposing a normalized tag
| created_by_end_user | [SimpleEndUser](#simpleenduser) | | No |
| created_by_role | string | | No |
| created_from | string | | No |
| details | | | No |
| details | object<br>[ object ]<br>string<br>integer<br>number<br>boolean | | No |
| id | string | | Yes |
| workflow_run | [WorkflowRunForLogResponse](#workflowrunforlogresponse) | | No |
@ -3269,7 +3361,7 @@ Accept the legacy single-tag Service API payload while exposing a normalized tag
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| created_at | integer | | No |
| elapsed_time | | | No |
| elapsed_time | number<br>integer | | No |
| error | string | | No |
| exceptions_count | integer | | No |
| finished_at | integer | | No |
@ -3293,11 +3385,11 @@ Accept the legacy single-tag Service API payload while exposing a normalized tag
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| created_at | integer | | No |
| elapsed_time | | | No |
| elapsed_time | number<br>integer | | No |
| error | string | | No |
| finished_at | integer | | No |
| id | string | | Yes |
| inputs | | | No |
| inputs | object<br>[ object ]<br>string<br>integer<br>number<br>boolean | | No |
| outputs | object | | No |
| status | string | | Yes |
| total_steps | integer | | No |

View File

@ -188,6 +188,45 @@ def test_patch_union_schema_markdown_fills_converter_blank_schema_types(tmp_path
assert "| allowed_file_types | [ [FileType](#filetype) ] | | No |" in patched
def test_patch_union_schema_markdown_fills_regular_definition_union_property(tmp_path):
module = _load_generate_swagger_markdown_docs_module()
spec_path = tmp_path / "service-swagger.json"
spec_path.write_text(
json.dumps(
{
"definitions": {
"DocumentMetadataResponse": {
"properties": {
"id": {"type": "string"},
"value": {
"anyOf": [
{"type": "string"},
{"type": "integer"},
{"type": "number"},
{"type": "boolean"},
{"type": "null"},
],
},
},
},
}
}
),
encoding="utf-8",
)
markdown = """#### DocumentMetadataResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| id | string | | Yes |
| value | string | | No |
"""
patched = module._patch_union_schema_markdown(markdown, spec_path)
assert "| value | string<br>integer<br>number<br>boolean | | No |" in patched
def test_patch_union_schema_markdown_ignores_specs_without_definitions(tmp_path):
module = _load_generate_swagger_markdown_docs_module()
spec_path = tmp_path / "console-swagger.json"
@ -236,7 +275,7 @@ def test_patch_union_schema_markdown_ignores_unrenderable_shapes(tmp_path):
== "#### Definition\n| field |"
)
assert module._patch_union_schema_markdown("#### BrokenUnion\n", spec_path) == "#### BrokenUnion"
assert module._patch_union_schema_markdown("#### BrokenUnion\n", spec_path) == "#### BrokenUnion\n"
def test_convert_spec_to_markdown_patches_generated_union_tables(tmp_path, monkeypatch):

View File

@ -1,4 +1,3 @@
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
@ -9,6 +8,7 @@ import services
from controllers.console import console_ns
from controllers.console.datasets.datasets_document import (
DatasetDocumentListApi,
DatasetInitApi,
DocumentApi,
DocumentBatchDownloadZipApi,
DocumentBatchIndexingEstimateApi,
@ -20,6 +20,7 @@ from controllers.console.datasets.datasets_document import (
DocumentMetadataApi,
DocumentPipelineExecutionLogApi,
DocumentProcessingApi,
DocumentRenameApi,
DocumentRetryApi,
DocumentStatusApi,
DocumentSummaryStatusApi,
@ -33,7 +34,9 @@ from controllers.console.datasets.error import (
InvalidMetadataError,
)
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.enums import DataSourceType, IndexingStatus
from models.dataset import Dataset
from models.dataset import Document as DatasetDocument
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus
def unwrap(func):
@ -42,6 +45,79 @@ def unwrap(func):
return func
def make_serializable_document(**overrides):
attrs = {
"id": "doc-1",
"position": 1,
"data_source_type": "upload_file",
"data_source_info_dict": {"upload_file_id": "file-1"},
"data_source_detail_dict": {},
"dataset_process_rule_id": None,
"name": "Document",
"created_from": "web",
"created_by": "u1",
"created_at": None,
"tokens": None,
"indexing_status": "completed",
"error": None,
"enabled": True,
"disabled_at": None,
"disabled_by": None,
"archived": False,
"display_status": "available",
"word_count": None,
"hit_count": 0,
"doc_form": "text_model",
"doc_metadata_details": None,
"summary_index_status": None,
"need_summary": False,
"process_rule_dict": None,
"completed_segments": None,
"total_segments": None,
}
attrs.update(overrides)
document = MagicMock(spec_set=list(attrs))
for name, value in attrs.items():
setattr(document, name, value)
return document
def make_dataset(**overrides):
attrs = {
"id": "ds-1",
"tenant_id": "tenant-1",
"name": "Dataset",
"indexing_technique": "economy",
"created_by": "u1",
"summary_index_setting": {"enable": True},
}
attrs.update(overrides)
return Dataset(**attrs)
def make_document(**overrides):
attrs = {
"id": "doc-1",
"tenant_id": "tenant-1",
"dataset_id": "ds-1",
"position": 1,
"data_source_type": DataSourceType.UPLOAD_FILE,
"data_source_info": None,
"batch": "batch-1",
"name": "Document",
"created_from": DocumentCreatedFrom.WEB,
"created_by": "u1",
"indexing_status": IndexingStatus.COMPLETED,
"enabled": True,
"archived": False,
"doc_metadata": None,
"doc_form": IndexStructureType.PARAGRAPH_INDEX,
"need_summary": False,
}
attrs.update(overrides)
return DatasetDocument(**attrs)
@pytest.fixture
def tenant_ctx():
return (MagicMock(is_dataset_editor=True, id="u1"), "tenant-1")
@ -58,7 +134,7 @@ def patch_tenant(tenant_ctx):
@pytest.fixture
def dataset():
return MagicMock(id="ds-1", indexing_technique="economy", summary_index_setting={"enable": True})
return make_dataset()
@pytest.fixture
@ -130,11 +206,9 @@ class TestDatasetDocumentListApi:
api = DatasetDocumentListApi()
method = unwrap(api.get)
doc = MagicMock(id="doc-1")
doc = make_serializable_document()
pagination = MagicMock(items=[doc], total=1)
count_mock = MagicMock(return_value=2)
with (
app.test_request_context("/?fetch=true"),
patch(
@ -149,14 +223,12 @@ class TestDatasetDocumentListApi:
"controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.marshal",
return_value=[{"id": "doc-1"}],
),
):
resp = method(api, "ds-1")
assert resp["data"]
assert resp["data"][0]["id"] == "doc-1"
assert resp["data"][0]["completed_segments"] == 2
assert resp["data"][0]["total_segments"] == 2
def test_get_with_search_status_and_created_at_sort(
self, app: Flask, patch_tenant, patch_dataset, patch_permission
@ -164,7 +236,7 @@ class TestDatasetDocumentListApi:
api = DatasetDocumentListApi()
method = unwrap(api.get)
pagination = MagicMock(items=[MagicMock()], total=1)
pagination = MagicMock(items=[make_serializable_document()], total=1)
with (
app.test_request_context("/?keyword=test&status=enabled&sort=created_at"),
@ -180,10 +252,6 @@ class TestDatasetDocumentListApi:
"controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.marshal",
return_value=[{"id": "doc-1"}],
),
):
resp = method(api, "ds-1")
@ -193,7 +261,7 @@ class TestDatasetDocumentListApi:
api = DatasetDocumentListApi()
method = unwrap(api.get)
pagination = MagicMock(items=[MagicMock()], total=1)
pagination = MagicMock(items=[make_serializable_document()], total=1)
with (
app.test_request_context("/"),
@ -205,22 +273,21 @@ class TestDatasetDocumentListApi:
"controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.marshal",
return_value=[{"id": "doc-1"}],
),
):
response = method(api, "ds-1")
assert response["total"] == 1
assert response["data"][0]["id"] == "doc-1"
assert "completed_segments" not in response["data"][0]
assert "total_segments" not in response["data"][0]
def test_post_success(self, app: Flask, patch_tenant, patch_dataset, patch_permission):
api = DatasetDocumentListApi()
method = unwrap(api.post)
payload = {"indexing_technique": "economy"}
created_dataset = SimpleNamespace(id="ds-1", name="Dataset", indexing_technique="economy")
created_document = SimpleNamespace(id="doc-1", name="Document", doc_metadata_details=None)
created_dataset = make_dataset()
created_document = make_document()
with (
app.test_request_context("/", json=payload),
@ -237,10 +304,17 @@ class TestDatasetDocumentListApi:
"controllers.console.datasets.datasets_document.DocumentService.save_document_with_dataset_id",
return_value=([created_document], "batch-1"),
),
patch("models.dataset.db.session.scalar", return_value=0),
):
response = method(api, "ds-1")
assert "documents" in response
assert response["dataset"]["id"] == "ds-1"
assert response["documents"][0]["id"] == "doc-1"
assert response["documents"][0]["data_source_info"] == {}
assert response["documents"][0]["doc_metadata"] == []
assert "data_source_info_dict" not in response["documents"][0]
assert "doc_metadata_details" not in response["documents"][0]
def test_post_forbidden(self, app: Flask):
api = DatasetDocumentListApi()
@ -267,7 +341,7 @@ class TestDatasetDocumentListApi:
api = DatasetDocumentListApi()
method = unwrap(api.get)
pagination = MagicMock(items=[MagicMock()], total=1)
pagination = MagicMock(items=[make_serializable_document()], total=1)
with (
app.test_request_context("/?fetch=maybe"),
@ -279,10 +353,6 @@ class TestDatasetDocumentListApi:
"controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.marshal",
return_value=[{"id": "doc-1"}],
),
):
response = method(api, "ds-1")
@ -310,6 +380,37 @@ class TestDatasetDocumentListApi:
assert response["total"] == 0
class TestDatasetInitApi:
def test_post_success_serializes_created_dataset_and_documents(self, app: Flask, patch_tenant):
api = DatasetInitApi()
method = unwrap(api.post)
payload = {"indexing_technique": "economy"}
created_dataset = make_dataset()
created_document = make_document(id="doc-init")
with (
app.test_request_context("/", json=payload),
patch.object(type(console_ns), "payload", payload),
patch(
"controllers.console.datasets.datasets_document.DocumentService.document_create_args_validate",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.DocumentService.save_document_without_dataset_id",
return_value=(created_dataset, [created_document], "batch-init"),
),
patch("models.dataset.db.session.scalar", return_value=0),
):
response = method(api)
assert response["dataset"]["id"] == "ds-1"
assert response["documents"][0]["id"] == "doc-init"
assert response["documents"][0]["data_source_info"] == {}
assert response["documents"][0]["doc_metadata"] == []
assert response["batch"] == "batch-init"
class TestDocumentApi:
def test_get_success(self, app: Flask, patch_tenant):
api = DocumentApi()
@ -899,7 +1000,7 @@ class TestDocumentBatchDownloadZipApi:
api = DocumentBatchDownloadZipApi()
method = unwrap(api.post)
payload = {"document_ids": []}
payload: dict[str, list[str]] = {"document_ids": []}
with app.test_request_context("/", json=payload), patch.object(type(console_ns), "payload", payload):
with pytest.raises(ValueError):
@ -1046,6 +1147,53 @@ class TestDocumentBatchIndexingEstimateApi:
class TestDocumentBatchIndexingStatusApi:
def test_get_batch_status_success_serializes_status_shape(self, app: Flask, patch_tenant):
api = DocumentBatchIndexingStatusApi()
method = unwrap(api.get)
document = MagicMock(
id="doc-1",
indexing_status=IndexingStatus.COMPLETED,
is_paused=False,
processing_started_at=None,
parsing_completed_at=None,
cleaning_completed_at=None,
splitting_completed_at=None,
completed_at=None,
paused_at=None,
error=None,
stopped_at=None,
)
with (
app.test_request_context("/"),
patch.object(api, "get_batch_documents", return_value=[document]),
patch(
"controllers.console.datasets.datasets_document.db.session.scalar",
side_effect=[2, 3],
),
):
response = method(api, "ds-1", "batch-1")
assert response == {
"data": [
{
"id": "doc-1",
"indexing_status": "completed",
"processing_started_at": None,
"parsing_completed_at": None,
"cleaning_completed_at": None,
"splitting_completed_at": None,
"completed_at": None,
"paused_at": None,
"error": None,
"stopped_at": None,
"completed_segments": 2,
"total_segments": 3,
}
]
}
def test_get_batch_status_invalid_batch(self, app: Flask, patch_tenant):
"""Test batch status with invalid batch"""
api = DocumentBatchIndexingStatusApi()
@ -1057,6 +1205,39 @@ class TestDocumentBatchIndexingStatusApi:
class TestDocumentIndexingStatusApi:
def test_get_status_success_serializes_status_shape(self, app: Flask, patch_tenant):
api = DocumentIndexingStatusApi()
method = unwrap(api.get)
document = MagicMock(
id="doc-1",
indexing_status=IndexingStatus.INDEXING,
is_paused=False,
processing_started_at=None,
parsing_completed_at=None,
cleaning_completed_at=None,
splitting_completed_at=None,
completed_at=None,
paused_at=None,
error=None,
stopped_at=None,
)
with (
app.test_request_context("/"),
patch.object(api, "get_document", return_value=document),
patch(
"controllers.console.datasets.datasets_document.db.session.scalar",
side_effect=[1, 4],
),
):
response = method(api, "ds-1", "doc-1")
assert response["id"] == "doc-1"
assert response["indexing_status"] == "indexing"
assert response["completed_segments"] == 1
assert response["total_segments"] == 4
def test_get_status_document_not_found(self, app: Flask, patch_tenant):
"""Test getting status for non-existent document"""
api = DocumentIndexingStatusApi()
@ -1067,6 +1248,40 @@ class TestDocumentIndexingStatusApi:
method(api, "ds-1", "invalid-doc")
class TestDocumentRenameApi:
def test_post_success_serializes_document_shape(self, app: Flask, patch_tenant):
api = DocumentRenameApi()
method = unwrap(api.post)
payload = {"name": "Renamed Document"}
renamed_document = make_document(id="doc-renamed", name="Renamed Document")
with (
app.test_request_context("/", json=payload),
patch.object(type(console_ns), "payload", payload),
patch(
"controllers.console.datasets.datasets_document.DatasetService.get_dataset",
return_value=make_dataset(),
),
patch(
"controllers.console.datasets.datasets_document.DatasetService.check_dataset_operator_permission",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.DocumentService.rename_document",
return_value=renamed_document,
),
patch("models.dataset.db.session.scalar", return_value=0),
):
response = method(api, "ds-1", "doc-1")
assert response["id"] == "doc-renamed"
assert response["name"] == "Renamed Document"
assert response["data_source_info"] == {}
assert response["doc_metadata"] == []
assert "data_source_info_dict" not in response
class TestDocumentApiMetadata:
def test_get_with_only_option(self, app: Flask, patch_tenant):
"""Test get with 'only' metadata option"""
@ -1291,7 +1506,7 @@ class TestDocumentListAdvancedCases:
api = DatasetDocumentListApi()
method = unwrap(api.get)
pagination = MagicMock(items=[MagicMock()], total=1)
pagination = MagicMock(items=[make_serializable_document()], total=1)
with (
app.test_request_context("/?sort=updated_at"),
@ -1303,10 +1518,6 @@ class TestDocumentListAdvancedCases:
"controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.marshal",
return_value=[{"id": "doc-1"}],
),
):
response = method(api, "ds-1")

View File

@ -44,6 +44,41 @@ from services.dataset_service import DocumentService
from services.entities.knowledge_entities.knowledge_entities import ProcessRule, RetrievalModel
def make_serializable_document(**overrides: object) -> Mock:
attrs: dict[str, object] = {
"id": str(uuid.uuid4()),
"position": 1,
"data_source_type": "upload_file",
"data_source_info_dict": {"upload_file_id": "file-1"},
"data_source_detail_dict": {},
"dataset_process_rule_id": None,
"batch": "batch-1",
"name": "Test Document",
"created_from": "api",
"created_by": "user-1",
"created_at": None,
"tokens": None,
"indexing_status": "completed",
"error": None,
"enabled": True,
"disabled_at": None,
"disabled_by": None,
"archived": False,
"display_status": "available",
"word_count": None,
"hit_count": 0,
"doc_form": "text_model",
"doc_metadata_details": None,
"summary_index_status": None,
"need_summary": False,
}
attrs.update(overrides)
document = Mock(spec_set=list(attrs))
for name, value in attrs.items():
setattr(document, name, value)
return document
class TestDocumentTextCreatePayload:
"""Test suite for DocumentTextCreatePayload Pydantic model."""
@ -226,7 +261,7 @@ class TestDocumentService:
assert hasattr(DocumentService, "batch_update_document_status")
@patch.object(DocumentService, "get_document")
def test_get_document_returns_document(self, mock_get):
def test_get_document_returns_document(self, mock_get: Mock) -> None:
"""Test get_document returns document object."""
mock_doc = Mock()
mock_doc.id = str(uuid.uuid4())
@ -235,6 +270,7 @@ class TestDocumentService:
mock_get.return_value = mock_doc
result = DocumentService.get_document(dataset_id="dataset_id", document_id="doc_id")
assert result is not None
assert result.name == "Test Document"
assert result.indexing_status == "completed"
@ -510,7 +546,7 @@ class TestDocumentApiGet:
"""
@pytest.fixture
def mock_doc_detail(self, mock_tenant):
def mock_doc_detail(self, mock_tenant: Mock) -> Mock:
"""A document mock with every attribute ``DocumentApi.get`` reads."""
doc = Mock()
doc.id = str(uuid.uuid4())
@ -551,8 +587,8 @@ class TestDocumentApiGet:
@patch("controllers.service_api.dataset.document.DatasetService")
@patch("controllers.service_api.dataset.document.DocumentService")
def test_get_document_success_with_all_metadata(
self, mock_doc_svc, mock_dataset_svc, app: Flask, mock_tenant, mock_doc_detail
):
self, mock_doc_svc: Mock, mock_dataset_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
) -> None:
"""Test successful document retrieval with metadata='all'."""
# Arrange
dataset_id = str(uuid.uuid4())
@ -569,8 +605,8 @@ class TestDocumentApiGet:
method="GET",
):
api = DocumentApi()
api.get_dataset = Mock(return_value=mock_dataset)
response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
with patch.object(api, "get_dataset", return_value=mock_dataset):
response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
# Assert
assert response["id"] == mock_doc_detail.id
@ -580,7 +616,7 @@ class TestDocumentApiGet:
assert "doc_metadata" in response
@patch("controllers.service_api.dataset.document.DocumentService")
def test_get_document_not_found(self, mock_doc_svc, app: Flask, mock_tenant):
def test_get_document_not_found(self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock) -> None:
"""Test 404 when document is not found."""
# Arrange
dataset_id = str(uuid.uuid4())
@ -595,12 +631,14 @@ class TestDocumentApiGet:
method="GET",
):
api = DocumentApi()
api.get_dataset = Mock(return_value=mock_dataset)
with pytest.raises(NotFound):
api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id="nonexistent")
with patch.object(api, "get_dataset", return_value=mock_dataset):
with pytest.raises(NotFound):
api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id="nonexistent")
@patch("controllers.service_api.dataset.document.DocumentService")
def test_get_document_forbidden_wrong_tenant(self, mock_doc_svc, app: Flask, mock_tenant, mock_doc_detail):
def test_get_document_forbidden_wrong_tenant(
self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
) -> None:
"""Test 403 when document tenant doesn't match request tenant."""
# Arrange
dataset_id = str(uuid.uuid4())
@ -616,12 +654,14 @@ class TestDocumentApiGet:
method="GET",
):
api = DocumentApi()
api.get_dataset = Mock(return_value=mock_dataset)
with pytest.raises(Forbidden):
api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
with patch.object(api, "get_dataset", return_value=mock_dataset):
with pytest.raises(Forbidden):
api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
@patch("controllers.service_api.dataset.document.DocumentService")
def test_get_document_metadata_only(self, mock_doc_svc, app: Flask, mock_tenant, mock_doc_detail):
def test_get_document_metadata_only(
self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
) -> None:
"""Test document retrieval with metadata='only'."""
# Arrange
dataset_id = str(uuid.uuid4())
@ -637,8 +677,8 @@ class TestDocumentApiGet:
method="GET",
):
api = DocumentApi()
api.get_dataset = Mock(return_value=mock_dataset)
response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
with patch.object(api, "get_dataset", return_value=mock_dataset):
response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
# Assert — metadata='only' returns only id, doc_type, doc_metadata
assert response["id"] == mock_doc_detail.id
@ -649,8 +689,8 @@ class TestDocumentApiGet:
@patch("controllers.service_api.dataset.document.DatasetService")
@patch("controllers.service_api.dataset.document.DocumentService")
def test_get_document_metadata_without(
self, mock_doc_svc, mock_dataset_svc, app: Flask, mock_tenant, mock_doc_detail
):
self, mock_doc_svc: Mock, mock_dataset_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
) -> None:
"""Test document retrieval with metadata='without'."""
# Arrange
dataset_id = str(uuid.uuid4())
@ -667,8 +707,8 @@ class TestDocumentApiGet:
method="GET",
):
api = DocumentApi()
api.get_dataset = Mock(return_value=mock_dataset)
response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
with patch.object(api, "get_dataset", return_value=mock_dataset):
response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
# Assert — metadata='without' omits doc_type / doc_metadata
assert response["id"] == mock_doc_detail.id
@ -677,7 +717,9 @@ class TestDocumentApiGet:
assert "name" in response
@patch("controllers.service_api.dataset.document.DocumentService")
def test_get_document_invalid_metadata_value(self, mock_doc_svc, app: Flask, mock_tenant, mock_doc_detail):
def test_get_document_invalid_metadata_value(
self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
) -> None:
"""Test error when metadata parameter has invalid value."""
# Arrange
dataset_id = str(uuid.uuid4())
@ -693,9 +735,9 @@ class TestDocumentApiGet:
method="GET",
):
api = DocumentApi()
api.get_dataset = Mock(return_value=mock_dataset)
with pytest.raises(InvalidMetadataError):
api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
with patch.object(api, "get_dataset", return_value=mock_dataset):
with pytest.raises(InvalidMetadataError):
api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
class TestDocumentApiDelete:
@ -808,21 +850,26 @@ class TestDocumentApiDelete:
class TestDocumentListApi:
"""Test suite for DocumentListApi endpoint."""
@patch("controllers.service_api.dataset.document.marshal")
@patch("controllers.service_api.dataset.document.DocumentService")
@patch("controllers.service_api.dataset.document.db")
def test_list_documents_success(self, mock_db, mock_doc_svc, mock_marshal, app: Flask, mock_tenant, mock_dataset):
def test_list_documents_success(self, mock_db, mock_doc_svc, app: Flask, mock_tenant, mock_dataset):
"""Test successful document list retrieval."""
# Arrange
mock_db.session.scalar.return_value = mock_dataset
mock_pagination = Mock()
mock_pagination.items = [Mock(), Mock()]
mock_pagination.items = [
make_serializable_document(
id="doc-1",
name="Document 1",
doc_metadata_details=[{"id": "meta-1", "name": "amount", "type": "number", "value": 42}],
),
make_serializable_document(id="doc-2", name="Document 2"),
]
mock_pagination.total = 2
mock_db.paginate.return_value = mock_pagination
mock_doc_svc.enrich_documents_with_summary_index_status.return_value = None
mock_marshal.return_value = [{"id": "doc1"}, {"id": "doc2"}]
# Act
with app.test_request_context(
@ -838,6 +885,11 @@ class TestDocumentListApi:
assert response["page"] == 1
assert response["limit"] == 20
assert response["total"] == 2
assert response["data"][0]["id"] == "doc-1"
assert response["data"][0]["data_source_info"] == {"upload_file_id": "file-1"}
assert response["data"][0]["doc_metadata"][0]["value"] == 42
assert "data_source_info_dict" not in response["data"][0]
assert "doc_metadata_details" not in response["data"][0]
@patch("controllers.service_api.dataset.document.db")
def test_list_documents_dataset_not_found(self, mock_db, app: Flask, mock_tenant, mock_dataset):
@ -858,12 +910,9 @@ class TestDocumentListApi:
class TestDocumentIndexingStatusApi:
"""Test suite for DocumentIndexingStatusApi endpoint."""
@patch("controllers.service_api.dataset.document.marshal")
@patch("controllers.service_api.dataset.document.DocumentService")
@patch("controllers.service_api.dataset.document.db")
def test_get_indexing_status_success(
self, mock_db, mock_doc_svc, mock_marshal, app: Flask, mock_tenant, mock_dataset
):
def test_get_indexing_status_success(self, mock_db, mock_doc_svc, app: Flask, mock_tenant, mock_dataset):
"""Test successful indexing status retrieval."""
# Arrange
batch_id = "batch_123"
@ -884,7 +933,6 @@ class TestDocumentIndexingStatusApi:
# scalar() called 3 times: dataset lookup, completed_segments count, total_segments count
mock_db.session.scalar.side_effect = [mock_dataset, 5, 5]
mock_marshal.return_value = {"id": mock_doc.id, "indexing_status": "completed"}
# Act
with app.test_request_context(
@ -897,6 +945,12 @@ class TestDocumentIndexingStatusApi:
# Assert
assert "data" in response
assert len(response["data"]) == 1
item = response["data"][0]
assert item["id"] == mock_doc.id
assert item["indexing_status"] == "completed"
assert item["completed_segments"] == 5
assert item["total_segments"] == 5
assert item["processing_started_at"] is None
@patch("controllers.service_api.dataset.document.db")
def test_get_indexing_status_dataset_not_found(self, mock_db, app: Flask, mock_tenant, mock_dataset):
@ -973,7 +1027,6 @@ class TestDocumentAddByTextApi:
mock_rate_limit.enabled = False
mock_feature_svc.get_knowledge_rate_limit.return_value = mock_rate_limit
@patch("controllers.service_api.dataset.document.marshal")
@patch("controllers.service_api.dataset.document.DocumentService")
@patch("controllers.service_api.dataset.document.KnowledgeConfig")
@patch("controllers.service_api.dataset.document.FileService")
@ -990,7 +1043,6 @@ class TestDocumentAddByTextApi:
mock_file_svc_cls,
mock_knowledge_config,
mock_doc_svc,
mock_marshal,
app: Flask,
mock_tenant,
mock_dataset,
@ -1012,11 +1064,9 @@ class TestDocumentAddByTextApi:
mock_config = Mock()
mock_knowledge_config.model_validate.return_value = mock_config
mock_doc = Mock()
mock_doc.id = str(uuid.uuid4())
mock_doc = make_serializable_document(id="doc-create-text", name="Test Document")
mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_doc], "batch_123")
mock_doc_svc.document_create_args_validate.return_value = None
mock_marshal.return_value = {"id": mock_doc.id, "name": "Test Document"}
# Act
with app.test_request_context(
@ -1037,6 +1087,10 @@ class TestDocumentAddByTextApi:
assert "document" in response
assert "batch" in response
assert response["batch"] == "batch_123"
assert response["document"]["id"] == "doc-create-text"
assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"}
assert response["document"]["doc_metadata"] == []
assert "data_source_info_dict" not in response["document"]
@patch("controllers.service_api.wraps.FeatureService")
@patch("controllers.service_api.wraps.validate_and_get_api_token")
@ -1162,7 +1216,6 @@ class TestDocumentUpdateByTextApiPost:
``@cloud_edition_billing_rate_limit_check``.
"""
@patch("controllers.service_api.dataset.document.marshal")
@patch("controllers.service_api.dataset.document.DocumentService")
@patch("controllers.service_api.dataset.document.FileService")
@patch("controllers.service_api.dataset.document.current_user")
@ -1177,7 +1230,6 @@ class TestDocumentUpdateByTextApiPost:
mock_current_user,
mock_file_svc_cls,
mock_doc_svc,
mock_marshal,
app: Flask,
mock_tenant,
mock_dataset,
@ -1193,10 +1245,9 @@ class TestDocumentUpdateByTextApiPost:
mock_upload.id = str(uuid.uuid4())
mock_file_svc_cls.return_value.upload_text.return_value = mock_upload
mock_document = Mock()
mock_document = make_serializable_document(id="doc-update-text", name="Updated Doc")
mock_doc_svc.document_create_args_validate.return_value = None
mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_document], "batch-1")
mock_marshal.return_value = {"id": "doc-1"}
doc_id = str(uuid.uuid4())
with app.test_request_context(
@ -1214,6 +1265,9 @@ class TestDocumentUpdateByTextApiPost:
assert status == 200
assert "document" in response
assert response["batch"] == "batch-1"
assert response["document"]["id"] == "doc-update-text"
assert response["document"]["doc_metadata"] == []
@patch("controllers.service_api.dataset.document.db")
@patch("controllers.service_api.wraps.FeatureService")
@ -1254,6 +1308,61 @@ class TestDocumentAddByFileApiPost:
decorators and ``@cloud_edition_billing_rate_limit_check``.
"""
@patch("controllers.service_api.dataset.document.DocumentService")
@patch("controllers.service_api.dataset.document.FileService")
@patch("controllers.service_api.dataset.document.current_user")
@patch("controllers.service_api.dataset.document.db")
@patch("controllers.service_api.wraps.FeatureService")
@patch("controllers.service_api.wraps.validate_and_get_api_token")
def test_add_by_file_success_serializes_document_and_batch_shape(
self,
mock_validate_token,
mock_feature_svc,
mock_db,
mock_current_user,
mock_file_svc_cls,
mock_doc_svc,
app: Flask,
mock_tenant,
mock_dataset,
):
"""Test successful document creation by file."""
_setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id)
mock_dataset.provider = "vendor"
mock_dataset.indexing_technique = "economy"
mock_dataset.chunk_structure = None
mock_dataset.latest_process_rule = Mock()
mock_dataset.created_by_account = Mock()
mock_db.session.scalar.return_value = mock_dataset
mock_current_user.id = "user-1"
mock_upload = Mock()
mock_upload.id = str(uuid.uuid4())
mock_file_svc_cls.return_value.upload_file.return_value = mock_upload
mock_document = make_serializable_document(id="doc-create-file", name="File Document")
mock_doc_svc.document_create_args_validate.return_value = None
mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_document], "batch-file")
from io import BytesIO
data = {"file": (BytesIO(b"content"), "test.pdf", "application/pdf")}
with app.test_request_context(
f"/datasets/{mock_dataset.id}/document/create-by-file",
method="POST",
content_type="multipart/form-data",
data=data,
headers={"Authorization": "Bearer test_token"},
):
api = DocumentAddByFileApi()
response, status = api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id)
assert status == 200
assert response["batch"] == "batch-file"
assert response["document"]["id"] == "doc-create-file"
assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"}
assert response["document"]["doc_metadata"] == []
@patch("controllers.service_api.dataset.document.db")
@patch("controllers.service_api.wraps.FeatureService")
@patch("controllers.service_api.wraps.validate_and_get_api_token")
@ -1498,7 +1607,6 @@ class TestDocumentUpdateByFileApiPatch:
document_id=doc_id,
)
@patch("controllers.service_api.dataset.document.marshal")
@patch("controllers.service_api.dataset.document.DocumentService")
@patch("controllers.service_api.dataset.document.FileService")
@patch("controllers.service_api.dataset.document.current_user")
@ -1513,7 +1621,6 @@ class TestDocumentUpdateByFileApiPatch:
mock_current_user,
mock_file_svc_cls,
mock_doc_svc,
mock_marshal,
app: Flask,
mock_tenant,
mock_dataset,
@ -1532,11 +1639,9 @@ class TestDocumentUpdateByFileApiPatch:
mock_upload.id = str(uuid.uuid4())
mock_file_svc_cls.return_value.upload_file.return_value = mock_upload
mock_document = Mock()
mock_document.batch = "batch-1"
mock_document = make_serializable_document(id="doc-update-file", name="File Document", batch="batch-1")
mock_doc_svc.document_create_args_validate.return_value = None
mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_document], None)
mock_marshal.return_value = {"id": "doc-1"}
from io import BytesIO
@ -1558,3 +1663,6 @@ class TestDocumentUpdateByFileApiPatch:
assert status == 200
assert "document" in response
assert response["batch"] == "batch-1"
assert response["document"]["id"] == "doc-update-file"
assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"}

View File

@ -18,6 +18,19 @@ def _definition_refs(value: object) -> set[str]:
return refs
def _parameters_by_name(operation: dict[str, object]) -> dict[str, dict[str, object]]:
parameters = operation.get("parameters", [])
assert isinstance(parameters, list)
result: dict[str, dict[str, object]] = {}
for parameter in parameters:
if not isinstance(parameter, dict):
continue
name = parameter.get("name")
if isinstance(name, str):
result[name] = parameter
return result
@pytest.mark.parametrize(
("first_kwargs", "second_kwargs"),
[
@ -70,3 +83,60 @@ def test_swagger_json_endpoints_render(monkeypatch: pytest.MonkeyPatch):
assert not sorted(ref for ref in missing_refs if ref.startswith("_AnonymousInlineModel"))
assert app.config["RESTX_INCLUDE_ALL_MODELS"] is True
def test_service_document_file_routes_document_multipart_form_data(monkeypatch: pytest.MonkeyPatch):
from configs import dify_config
from controllers.service_api import bp as service_api_bp
monkeypatch.setattr(dify_config, "SWAGGER_UI_ENABLED", True)
app = Flask(__name__)
app.config["TESTING"] = True
app.config["RESTX_INCLUDE_ALL_MODELS"] = True
app.register_blueprint(service_api_bp)
payload = app.test_client().get("/v1/swagger.json").get_json()
paths = payload["paths"]
create_operation = paths["/datasets/{dataset_id}/document/create-by-file"]["post"]
create_params = _parameters_by_name(create_operation)
assert create_operation["consumes"] == ["multipart/form-data"]
assert create_params["file"]["in"] == "formData"
assert create_params["file"]["type"] == "file"
assert create_params["file"]["required"] is True
assert create_params["data"]["in"] == "formData"
assert create_params["data"]["type"] == "string"
for path in (
"/datasets/{dataset_id}/documents/{document_id}",
"/datasets/{dataset_id}/documents/{document_id}/update-by-file",
"/datasets/{dataset_id}/documents/{document_id}/update_by_file",
):
update_operation = paths[path]["patch" if path.endswith("{document_id}") else "post"]
update_params = _parameters_by_name(update_operation)
assert update_operation["consumes"] == ["multipart/form-data"]
assert update_params["file"]["in"] == "formData"
assert update_params["file"]["type"] == "file"
assert update_params["file"]["required"] is False
assert update_params["data"]["in"] == "formData"
assert update_params["data"]["type"] == "string"
def test_service_document_list_documents_query_params_render(monkeypatch: pytest.MonkeyPatch):
from configs import dify_config
from controllers.service_api import bp as service_api_bp
monkeypatch.setattr(dify_config, "SWAGGER_UI_ENABLED", True)
app = Flask(__name__)
app.config["TESTING"] = True
app.config["RESTX_INCLUDE_ALL_MODELS"] = True
app.register_blueprint(service_api_bp)
payload = app.test_client().get("/v1/swagger.json").get_json()
operation = payload["paths"]["/datasets/{dataset_id}/documents"]["get"]
params = _parameters_by_name(operation)
for name in ("page", "limit", "keyword", "status"):
assert params[name]["in"] == "query"

View File

@ -676,16 +676,8 @@ export const indexingEstimate2 = {
get: get13,
}
/**
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get14 = oc
.route({
deprecated: true,
description:
'Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetIdBatchByBatchIndexingStatus',
@ -862,16 +854,10 @@ export const indexingEstimate3 = {
/**
* Get document indexing status
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get17 = oc
.route({
deprecated: true,
description:
'Get document indexing status\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get document indexing status',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatus',
@ -1410,16 +1396,10 @@ export const delete7 = oc
/**
* Get documents in a dataset
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get26 = oc
.route({
deprecated: true,
description:
'Get documents in a dataset\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get documents in a dataset',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetIdDocuments',

View File

@ -290,6 +290,18 @@ export type AutoDisableLogsResponse = {
document_ids: Array<string>
}
export type DocumentStatusListResponse = {
data: Array<DocumentStatusResponse>
}
export type DocumentWithSegmentsListResponse = {
data: Array<DocumentWithSegmentsResponse>
has_more: boolean
limit: number
page: number
total: number
}
export type DocumentBatchDownloadZipPayload = {
document_ids: Array<string>
}
@ -306,6 +318,21 @@ export type UrlResponse = {
url: string
}
export type DocumentStatusResponse = {
cleaning_completed_at: number | null
completed_at: number | null
completed_segments?: number | null
error: string | null
id: string
indexing_status: string
parsing_completed_at: number | null
paused_at: number | null
processing_started_at: number | null
splitting_completed_at: number | null
stopped_at: number | null
total_segments?: number | null
}
export type DocumentMetadataUpdatePayload = {
doc_metadata?: unknown
doc_type?: string | null
@ -326,14 +353,14 @@ export type DocumentResponse = {
created_by?: string | null
created_from?: string | null
data_source_detail_dict?: unknown
data_source_info_dict?: unknown
data_source_info?: unknown
data_source_type?: string | null
dataset_process_rule_id?: string | null
disabled_at?: number | null
disabled_by?: string | null
display_status?: string | null
doc_form?: string | null
doc_metadata_details?: Array<DocumentMetadataResponse>
doc_metadata?: Array<DocumentMetadataResponse>
enabled?: boolean | null
error?: string | null
hit_count?: number | null
@ -433,10 +460,6 @@ export type HitTestingResponse = {
records: Array<HitTestingRecord>
}
export type DocumentStatusListResponse = {
data: Array<DocumentStatusResponse>
}
export type DatasetMetadataListResponse = {
built_in_field_enabled: boolean
doc_metadata: Array<DatasetMetadataListItemResponse>
@ -671,6 +694,36 @@ export type DatasetMetadataBuiltInFieldResponse = {
type: string
}
export type DocumentWithSegmentsResponse = {
archived?: boolean | null
completed_segments?: number | null
created_at?: number | null
created_by?: string | null
created_from?: string | null
data_source_detail_dict?: unknown
data_source_info?: unknown
data_source_type?: string | null
dataset_process_rule_id?: string | null
disabled_at?: number | null
disabled_by?: string | null
display_status?: string | null
doc_form?: string | null
doc_metadata?: Array<DocumentMetadataResponse>
enabled?: boolean | null
error?: string | null
hit_count?: number | null
id: string
indexing_status?: string | null
name: string
need_summary?: boolean | null
position?: number | null
process_rule_dict?: unknown
summary_index_status?: string | null
tokens?: number | null
total_segments?: number | null
word_count?: number | null
}
export type DocumentMetadataOperation = {
document_id: string
metadata_list: Array<MetadataDetail>
@ -681,7 +734,7 @@ export type DocumentMetadataResponse = {
id: string
name: string
type: string
value?: string | null
value?: unknown
}
export type SegmentResponse = {
@ -730,21 +783,6 @@ export type ChildChunkUpdateArgs = {
id?: string | null
}
export type DocumentStatusResponse = {
cleaning_completed_at: number | null
completed_at: number | null
completed_segments?: number | null
error: string | null
id: string
indexing_status: string
parsing_completed_at: number | null
paused_at: number | null
processing_started_at: number | null
splitting_completed_at: number | null
stopped_at: number | null
total_segments?: number | null
}
export type HitTestingQuery = {
content: string
}
@ -1573,9 +1611,7 @@ export type GetDatasetsByDatasetIdBatchByBatchIndexingStatusData = {
}
export type GetDatasetsByDatasetIdBatchByBatchIndexingStatusResponses = {
200: {
[key: string]: unknown
}
200: DocumentStatusListResponse
}
export type GetDatasetsByDatasetIdBatchByBatchIndexingStatusResponse
@ -1616,9 +1652,7 @@ export type GetDatasetsByDatasetIdDocumentsData = {
}
export type GetDatasetsByDatasetIdDocumentsResponses = {
200: {
[key: string]: unknown
}
200: DocumentWithSegmentsListResponse
}
export type GetDatasetsByDatasetIdDocumentsResponse
@ -1841,9 +1875,7 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusError
= GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusErrors[keyof GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusErrors]
export type GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusResponses = {
200: {
[key: string]: unknown
}
200: DocumentStatusResponse
}
export type GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusResponse

View File

@ -143,6 +143,31 @@ export const zUrlResponse = z.object({
url: z.string(),
})
/**
* DocumentStatusResponse
*/
export const zDocumentStatusResponse = z.object({
cleaning_completed_at: z.int().nullable(),
completed_at: z.int().nullable(),
completed_segments: z.int().nullish(),
error: z.string().nullable(),
id: z.string(),
indexing_status: z.string(),
parsing_completed_at: z.int().nullable(),
paused_at: z.int().nullable(),
processing_started_at: z.int().nullable(),
splitting_completed_at: z.int().nullable(),
stopped_at: z.int().nullable(),
total_segments: z.int().nullish(),
})
/**
* DocumentStatusListResponse
*/
export const zDocumentStatusListResponse = z.object({
data: z.array(zDocumentStatusResponse),
})
/**
* DocumentMetadataUpdatePayload
*/
@ -202,6 +227,14 @@ export const zChildChunkUpdatePayload = z.object({
content: z.string(),
})
/**
* ErrorDocsResponse
*/
export const zErrorDocsResponse = z.object({
data: z.array(zDocumentStatusResponse),
total: z.int(),
})
/**
* ExternalHitTestingPayload
*/
@ -455,7 +488,7 @@ export const zDocumentMetadataResponse = z.object({
id: z.string(),
name: z.string(),
type: z.string(),
value: z.string().nullish(),
value: z.unknown().optional(),
})
/**
@ -467,14 +500,14 @@ export const zDocumentResponse = z.object({
created_by: z.string().nullish(),
created_from: z.string().nullish(),
data_source_detail_dict: z.unknown().optional(),
data_source_info_dict: z.unknown().optional(),
data_source_info: z.unknown().optional(),
data_source_type: z.string().nullish(),
dataset_process_rule_id: z.string().nullish(),
disabled_at: z.int().nullish(),
disabled_by: z.string().nullish(),
display_status: z.string().nullish(),
doc_form: z.string().nullish(),
doc_metadata_details: z.array(zDocumentMetadataResponse).optional(),
doc_metadata: z.array(zDocumentMetadataResponse).optional(),
enabled: z.boolean().nullish(),
error: z.string().nullish(),
hit_count: z.int().nullish(),
@ -497,6 +530,50 @@ export const zDatasetAndDocumentResponse = z.object({
documents: z.array(zDocumentResponse),
})
/**
* DocumentWithSegmentsResponse
*/
export const zDocumentWithSegmentsResponse = z.object({
archived: z.boolean().nullish(),
completed_segments: z.int().nullish(),
created_at: z.int().nullish(),
created_by: z.string().nullish(),
created_from: z.string().nullish(),
data_source_detail_dict: z.unknown().optional(),
data_source_info: z.unknown().optional(),
data_source_type: z.string().nullish(),
dataset_process_rule_id: z.string().nullish(),
disabled_at: z.int().nullish(),
disabled_by: z.string().nullish(),
display_status: z.string().nullish(),
doc_form: z.string().nullish(),
doc_metadata: z.array(zDocumentMetadataResponse).optional(),
enabled: z.boolean().nullish(),
error: z.string().nullish(),
hit_count: z.int().nullish(),
id: z.string(),
indexing_status: z.string().nullish(),
name: z.string(),
need_summary: z.boolean().nullish(),
position: z.int().nullish(),
process_rule_dict: z.unknown().optional(),
summary_index_status: z.string().nullish(),
tokens: z.int().nullish(),
total_segments: z.int().nullish(),
word_count: z.int().nullish(),
})
/**
* DocumentWithSegmentsListResponse
*/
export const zDocumentWithSegmentsListResponse = z.object({
data: z.array(zDocumentWithSegmentsResponse),
has_more: z.boolean(),
limit: z.int(),
page: z.int(),
total: z.int(),
})
/**
* ChildChunkResponse
*/
@ -551,39 +628,6 @@ export const zChildChunkBatchUpdatePayload = z.object({
chunks: z.array(zChildChunkUpdateArgs),
})
/**
* DocumentStatusResponse
*/
export const zDocumentStatusResponse = z.object({
cleaning_completed_at: z.int().nullable(),
completed_at: z.int().nullable(),
completed_segments: z.int().nullish(),
error: z.string().nullable(),
id: z.string(),
indexing_status: z.string(),
parsing_completed_at: z.int().nullable(),
paused_at: z.int().nullable(),
processing_started_at: z.int().nullable(),
splitting_completed_at: z.int().nullable(),
stopped_at: z.int().nullable(),
total_segments: z.int().nullish(),
})
/**
* ErrorDocsResponse
*/
export const zErrorDocsResponse = z.object({
data: z.array(zDocumentStatusResponse),
total: z.int(),
})
/**
* DocumentStatusListResponse
*/
export const zDocumentStatusListResponse = z.object({
data: z.array(zDocumentStatusResponse),
})
/**
* HitTestingQuery
*/
@ -1586,12 +1630,9 @@ export const zGetDatasetsByDatasetIdBatchByBatchIndexingStatusPath = z.object({
})
/**
* Success
* Indexing status retrieved successfully
*/
export const zGetDatasetsByDatasetIdBatchByBatchIndexingStatusResponse = z.record(
z.string(),
z.unknown(),
)
export const zGetDatasetsByDatasetIdBatchByBatchIndexingStatusResponse = zDocumentStatusListResponse
export const zDeleteDatasetsByDatasetIdDocumentsPath = z.object({
dataset_id: z.string(),
@ -1618,7 +1659,7 @@ export const zGetDatasetsByDatasetIdDocumentsQuery = z.object({
/**
* Documents retrieved successfully
*/
export const zGetDatasetsByDatasetIdDocumentsResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsByDatasetIdDocumentsResponse = zDocumentWithSegmentsListResponse
export const zPostDatasetsByDatasetIdDocumentsBody = zKnowledgeConfig
@ -1738,10 +1779,8 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusPath = z.
/**
* Indexing status retrieved successfully
*/
export const zGetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusResponse = z.record(
z.string(),
z.unknown(),
)
export const zGetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusResponse
= zDocumentStatusResponse
export const zPutDatasetsByDatasetIdDocumentsByDocumentIdMetadataBody
= zDocumentMetadataUpdatePayload

View File

@ -46,6 +46,7 @@ import {
zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsQuery,
zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse,
zGetDatasetsByDatasetIdDocumentsPath,
zGetDatasetsByDatasetIdDocumentsQuery,
zGetDatasetsByDatasetIdDocumentsResponse,
zGetDatasetsByDatasetIdMetadataBuiltInPath,
zGetDatasetsByDatasetIdMetadataBuiltInResponse,
@ -87,6 +88,7 @@ import {
zGetWorkspacesCurrentModelsModelTypesByModelTypePath,
zGetWorkspacesCurrentModelsModelTypesByModelTypeResponse,
zPatchDatasetsByDatasetIdBody,
zPatchDatasetsByDatasetIdDocumentsByDocumentIdBody,
zPatchDatasetsByDatasetIdDocumentsByDocumentIdPath,
zPatchDatasetsByDatasetIdDocumentsByDocumentIdResponse,
zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdBody,
@ -119,8 +121,10 @@ import {
zPostConversationsByCIdNamePath,
zPostConversationsByCIdNameResponse,
zPostDatasetsBody,
zPostDatasetsByDatasetIdDocumentCreateByFile2Body,
zPostDatasetsByDatasetIdDocumentCreateByFile2Path,
zPostDatasetsByDatasetIdDocumentCreateByFile2Response,
zPostDatasetsByDatasetIdDocumentCreateByFileBody,
zPostDatasetsByDatasetIdDocumentCreateByFilePath,
zPostDatasetsByDatasetIdDocumentCreateByFileResponse,
zPostDatasetsByDatasetIdDocumentCreateByText2Body,
@ -138,8 +142,10 @@ import {
zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdResponse,
zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsPath,
zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsResponse,
zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Body,
zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Path,
zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Response,
zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileBody,
zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFilePath,
zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileResponse,
zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Body,
@ -853,44 +859,42 @@ export const tags = {
/**
* Create a new document by uploading a file
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const post13 = oc
.route({
deprecated: true,
description:
'Create a new document by uploading a file\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Create a new document by uploading a file',
inputStructure: 'detailed',
method: 'POST',
operationId: 'postDatasetsByDatasetIdDocumentCreateByFile',
path: '/datasets/{dataset_id}/document/create-by-file',
tags: ['service_api'],
})
.input(z.object({ params: zPostDatasetsByDatasetIdDocumentCreateByFilePath }))
.input(
z.object({
body: zPostDatasetsByDatasetIdDocumentCreateByFileBody,
params: zPostDatasetsByDatasetIdDocumentCreateByFilePath,
}),
)
.output(zPostDatasetsByDatasetIdDocumentCreateByFileResponse)
/**
* Create a new document by uploading a file
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const post14 = oc
.route({
deprecated: true,
description:
'Create a new document by uploading a file\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Create a new document by uploading a file',
inputStructure: 'detailed',
method: 'POST',
operationId: 'postDatasetsByDatasetIdDocumentCreateByFile',
path: '/datasets/{dataset_id}/document/create_by_file',
tags: ['service_api'],
})
.input(z.object({ params: zPostDatasetsByDatasetIdDocumentCreateByFile2Path }))
.input(
z.object({
body: zPostDatasetsByDatasetIdDocumentCreateByFile2Body,
params: zPostDatasetsByDatasetIdDocumentCreateByFile2Path,
}),
)
.output(zPostDatasetsByDatasetIdDocumentCreateByFile2Response)
export const createByFile = {
@ -899,16 +903,10 @@ export const createByFile = {
/**
* Create a new document by providing text content
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const post15 = oc
.route({
deprecated: true,
description:
'Create a new document by providing text content\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Create a new document by providing text content',
inputStructure: 'detailed',
method: 'POST',
operationId: 'postDatasetsByDatasetIdDocumentCreateByText',
@ -926,15 +924,13 @@ export const post15 = oc
/**
* Deprecated legacy alias for creating a new document by providing text content. Use /datasets/{dataset_id}/document/create-by-text instead.
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const post16 = oc
.route({
deprecated: true,
description:
'Deprecated legacy alias for creating a new document by providing text content. Use /datasets/{dataset_id}/document/create-by-text instead.\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
'Deprecated legacy alias for creating a new document by providing text content. Use /datasets/{dataset_id}/document/create-by-text instead.',
inputStructure: 'detailed',
method: 'POST',
operationId: 'postDatasetsByDatasetIdDocumentCreateByText',
@ -1062,16 +1058,10 @@ export const status2 = {
/**
* Get indexing status for documents in a batch
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get8 = oc
.route({
deprecated: true,
description:
'Get indexing status for documents in a batch\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Get indexing status for documents in a batch',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetIdDocumentsByBatchIndexingStatus',
@ -1317,43 +1307,49 @@ export const segments = {
/**
* Deprecated legacy alias for updating an existing document by uploading a file. Use PATCH /datasets/{dataset_id}/documents/{document_id} instead.
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const post22 = oc
.route({
deprecated: true,
description:
'Deprecated legacy alias for updating an existing document by uploading a file. Use PATCH /datasets/{dataset_id}/documents/{document_id} instead.\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
'Deprecated legacy alias for updating an existing document by uploading a file. Use PATCH /datasets/{dataset_id}/documents/{document_id} instead.',
inputStructure: 'detailed',
method: 'POST',
operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile',
path: '/datasets/{dataset_id}/documents/{document_id}/update-by-file',
tags: ['service_api'],
})
.input(z.object({ params: zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFilePath }))
.input(
z.object({
body: zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileBody.optional(),
params: zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFilePath,
}),
)
.output(zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileResponse)
/**
* Deprecated legacy alias for updating an existing document by uploading a file. Use PATCH /datasets/{dataset_id}/documents/{document_id} instead.
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const post23 = oc
.route({
deprecated: true,
description:
'Deprecated legacy alias for updating an existing document by uploading a file. Use PATCH /datasets/{dataset_id}/documents/{document_id} instead.\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
'Deprecated legacy alias for updating an existing document by uploading a file. Use PATCH /datasets/{dataset_id}/documents/{document_id} instead.',
inputStructure: 'detailed',
method: 'POST',
operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile',
path: '/datasets/{dataset_id}/documents/{document_id}/update_by_file',
tags: ['service_api'],
})
.input(z.object({ params: zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Path }))
.input(
z.object({
body: zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Body.optional(),
params: zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Path,
}),
)
.output(zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Response)
export const updateByFile = {
@ -1362,16 +1358,10 @@ export const updateByFile = {
/**
* Update an existing document by providing text content
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const post24 = oc
.route({
deprecated: true,
description:
'Update an existing document by providing text content\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Update an existing document by providing text content',
inputStructure: 'detailed',
method: 'POST',
operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText',
@ -1389,15 +1379,13 @@ export const post24 = oc
/**
* Deprecated legacy alias for updating an existing document by providing text content. Use /datasets/{dataset_id}/documents/{document_id}/update-by-text instead.
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const post25 = oc
.route({
deprecated: true,
description:
'Deprecated legacy alias for updating an existing document by providing text content. Use /datasets/{dataset_id}/documents/{document_id}/update-by-text instead.\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
'Deprecated legacy alias for updating an existing document by providing text content. Use /datasets/{dataset_id}/documents/{document_id}/update-by-text instead.',
inputStructure: 'detailed',
method: 'POST',
operationId: 'postDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText',
@ -1458,23 +1446,22 @@ export const get13 = oc
/**
* Update an existing document by uploading a file
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const patch4 = oc
.route({
deprecated: true,
description:
'Update an existing document by uploading a file\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'Update an existing document by uploading a file',
inputStructure: 'detailed',
method: 'PATCH',
operationId: 'patchDatasetsByDatasetIdDocumentsByDocumentId',
path: '/datasets/{dataset_id}/documents/{document_id}',
tags: ['service_api'],
})
.input(z.object({ params: zPatchDatasetsByDatasetIdDocumentsByDocumentIdPath }))
.input(
z.object({
body: zPatchDatasetsByDatasetIdDocumentsByDocumentIdBody.optional(),
params: zPatchDatasetsByDatasetIdDocumentsByDocumentIdPath,
}),
)
.output(zPatchDatasetsByDatasetIdDocumentsByDocumentIdResponse)
export const byDocumentId = {
@ -1489,23 +1476,22 @@ export const byDocumentId = {
/**
* List all documents in a dataset
*
* Generated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.
*
* @deprecated
*/
export const get14 = oc
.route({
deprecated: true,
description:
'List all documents in a dataset\n\nGenerated contract types may be inaccurate because backend OpenAPI annotations are incomplete. Do not migrate callers until the generated contract is accurate.',
description: 'List all documents in a dataset',
inputStructure: 'detailed',
method: 'GET',
operationId: 'getDatasetsByDatasetIdDocuments',
path: '/datasets/{dataset_id}/documents',
tags: ['service_api'],
})
.input(z.object({ params: zGetDatasetsByDatasetIdDocumentsPath }))
.input(
z.object({
params: zGetDatasetsByDatasetIdDocumentsPath,
query: zGetDatasetsByDatasetIdDocumentsQuery.optional(),
}),
)
.output(zGetDatasetsByDatasetIdDocumentsResponse)
export const documents = {

View File

@ -420,6 +420,11 @@ export type DatasourceNodeRunPayload = {
is_published: boolean
}
export type DocumentAndBatchResponse = {
batch: string
document: DocumentResponse
}
export type DocumentBatchDownloadZipPayload = {
document_ids: Array<string>
}
@ -431,12 +436,73 @@ export type DocumentListQuery = {
status?: string | null
}
export type DocumentListResponse = {
data: Array<DocumentResponse>
has_more: boolean
limit: number
page: number
total: number
}
export type DocumentMetadataOperation = {
document_id: string
metadata_list: Array<MetadataDetail>
partial_update?: boolean
}
export type DocumentMetadataResponse = {
id: string
name: string
type: string
value?: unknown
}
export type DocumentResponse = {
archived?: boolean | null
created_at?: number | null
created_by?: string | null
created_from?: string | null
data_source_detail_dict?: unknown
data_source_info?: unknown
data_source_type?: string | null
dataset_process_rule_id?: string | null
disabled_at?: number | null
disabled_by?: string | null
display_status?: string | null
doc_form?: string | null
doc_metadata?: Array<DocumentMetadataResponse>
enabled?: boolean | null
error?: string | null
hit_count?: number | null
id: string
indexing_status?: string | null
name: string
need_summary?: boolean | null
position?: number | null
summary_index_status?: string | null
tokens?: number | null
word_count?: number | null
}
export type DocumentStatusListResponse = {
data: Array<DocumentStatusResponse>
}
export type DocumentStatusResponse = {
cleaning_completed_at: number | null
completed_at: number | null
completed_segments?: number | null
error: string | null
id: string
indexing_status: string
parsing_completed_at: number | null
paused_at: number | null
processing_started_at: number | null
splitting_completed_at: number | null
stopped_at: number | null
total_segments?: number | null
}
export type DocumentTextCreatePayload = {
doc_form?: string
doc_language?: string
@ -1819,7 +1885,10 @@ export type PatchDatasetsByDatasetIdResponse
= PatchDatasetsByDatasetIdResponses[keyof PatchDatasetsByDatasetIdResponses]
export type PostDatasetsByDatasetIdDocumentCreateByFileData = {
body?: never
body: {
data?: string
file: Blob | File
}
path: {
dataset_id: string
}
@ -1840,9 +1909,7 @@ export type PostDatasetsByDatasetIdDocumentCreateByFileError
= PostDatasetsByDatasetIdDocumentCreateByFileErrors[keyof PostDatasetsByDatasetIdDocumentCreateByFileErrors]
export type PostDatasetsByDatasetIdDocumentCreateByFileResponses = {
200: {
[key: string]: unknown
}
200: DocumentAndBatchResponse
}
export type PostDatasetsByDatasetIdDocumentCreateByFileResponse
@ -1870,16 +1937,17 @@ export type PostDatasetsByDatasetIdDocumentCreateByTextError
= PostDatasetsByDatasetIdDocumentCreateByTextErrors[keyof PostDatasetsByDatasetIdDocumentCreateByTextErrors]
export type PostDatasetsByDatasetIdDocumentCreateByTextResponses = {
200: {
[key: string]: unknown
}
200: DocumentAndBatchResponse
}
export type PostDatasetsByDatasetIdDocumentCreateByTextResponse
= PostDatasetsByDatasetIdDocumentCreateByTextResponses[keyof PostDatasetsByDatasetIdDocumentCreateByTextResponses]
export type PostDatasetsByDatasetIdDocumentCreateByFile2Data = {
body?: never
body: {
data?: string
file: Blob | File
}
path: {
dataset_id: string
}
@ -1900,9 +1968,7 @@ export type PostDatasetsByDatasetIdDocumentCreateByFile2Error
= PostDatasetsByDatasetIdDocumentCreateByFile2Errors[keyof PostDatasetsByDatasetIdDocumentCreateByFile2Errors]
export type PostDatasetsByDatasetIdDocumentCreateByFile2Responses = {
200: {
[key: string]: unknown
}
200: DocumentAndBatchResponse
}
export type PostDatasetsByDatasetIdDocumentCreateByFile2Response
@ -1930,9 +1996,7 @@ export type PostDatasetsByDatasetIdDocumentCreateByText2Error
= PostDatasetsByDatasetIdDocumentCreateByText2Errors[keyof PostDatasetsByDatasetIdDocumentCreateByText2Errors]
export type PostDatasetsByDatasetIdDocumentCreateByText2Responses = {
200: {
[key: string]: unknown
}
200: DocumentAndBatchResponse
}
export type PostDatasetsByDatasetIdDocumentCreateByText2Response
@ -1943,7 +2007,12 @@ export type GetDatasetsByDatasetIdDocumentsData = {
path: {
dataset_id: string
}
query?: never
query?: {
keyword?: string
limit?: number
page?: number
status?: string
}
url: '/datasets/{dataset_id}/documents'
}
@ -1960,9 +2029,7 @@ export type GetDatasetsByDatasetIdDocumentsError
= GetDatasetsByDatasetIdDocumentsErrors[keyof GetDatasetsByDatasetIdDocumentsErrors]
export type GetDatasetsByDatasetIdDocumentsResponses = {
200: {
[key: string]: unknown
}
200: DocumentListResponse
}
export type GetDatasetsByDatasetIdDocumentsResponse
@ -2087,9 +2154,7 @@ export type GetDatasetsByDatasetIdDocumentsByBatchIndexingStatusError
= GetDatasetsByDatasetIdDocumentsByBatchIndexingStatusErrors[keyof GetDatasetsByDatasetIdDocumentsByBatchIndexingStatusErrors]
export type GetDatasetsByDatasetIdDocumentsByBatchIndexingStatusResponses = {
200: {
[key: string]: unknown
}
200: DocumentStatusListResponse
}
export type GetDatasetsByDatasetIdDocumentsByBatchIndexingStatusResponse
@ -2164,7 +2229,10 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdResponse
= GetDatasetsByDatasetIdDocumentsByDocumentIdResponses[keyof GetDatasetsByDatasetIdDocumentsByDocumentIdResponses]
export type PatchDatasetsByDatasetIdDocumentsByDocumentIdData = {
body?: never
body?: {
data?: string
file?: Blob | File
}
path: {
dataset_id: string
document_id: string
@ -2186,9 +2254,7 @@ export type PatchDatasetsByDatasetIdDocumentsByDocumentIdError
= PatchDatasetsByDatasetIdDocumentsByDocumentIdErrors[keyof PatchDatasetsByDatasetIdDocumentsByDocumentIdErrors]
export type PatchDatasetsByDatasetIdDocumentsByDocumentIdResponses = {
200: {
[key: string]: unknown
}
200: DocumentAndBatchResponse
}
export type PatchDatasetsByDatasetIdDocumentsByDocumentIdResponse
@ -2519,7 +2585,10 @@ export type PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChil
= PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponses[keyof PatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponses]
export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileData = {
body?: never
body?: {
data?: string
file?: Blob | File
}
path: {
dataset_id: string
document_id: string
@ -2541,9 +2610,7 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileError
= PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileErrors[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileErrors]
export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileResponses = {
200: {
[key: string]: unknown
}
200: DocumentAndBatchResponse
}
export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileResponse
@ -2572,16 +2639,17 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextError
= PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextErrors[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextErrors]
export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextResponses = {
200: {
[key: string]: unknown
}
200: DocumentAndBatchResponse
}
export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextResponse
= PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextResponses[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextResponses]
export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Data = {
body?: never
body?: {
data?: string
file?: Blob | File
}
path: {
dataset_id: string
document_id: string
@ -2603,9 +2671,7 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Error
= PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Errors[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Errors]
export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Responses = {
200: {
[key: string]: unknown
}
200: DocumentAndBatchResponse
}
export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Response
@ -2634,9 +2700,7 @@ export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Error
= PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Errors[keyof PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Errors]
export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Responses = {
200: {
[key: string]: unknown
}
200: DocumentAndBatchResponse
}
export type PostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Response

View File

@ -537,6 +537,90 @@ export const zDocumentListQuery = z.object({
status: z.string().nullish(),
})
/**
* DocumentMetadataResponse
*/
export const zDocumentMetadataResponse = z.object({
id: z.string(),
name: z.string(),
type: z.string(),
value: z.unknown().optional(),
})
/**
* DocumentResponse
*/
export const zDocumentResponse = z.object({
archived: z.boolean().nullish(),
created_at: z.int().nullish(),
created_by: z.string().nullish(),
created_from: z.string().nullish(),
data_source_detail_dict: z.unknown().optional(),
data_source_info: z.unknown().optional(),
data_source_type: z.string().nullish(),
dataset_process_rule_id: z.string().nullish(),
disabled_at: z.int().nullish(),
disabled_by: z.string().nullish(),
display_status: z.string().nullish(),
doc_form: z.string().nullish(),
doc_metadata: z.array(zDocumentMetadataResponse).optional(),
enabled: z.boolean().nullish(),
error: z.string().nullish(),
hit_count: z.int().nullish(),
id: z.string(),
indexing_status: z.string().nullish(),
name: z.string(),
need_summary: z.boolean().nullish(),
position: z.int().nullish(),
summary_index_status: z.string().nullish(),
tokens: z.int().nullish(),
word_count: z.int().nullish(),
})
/**
* DocumentAndBatchResponse
*/
export const zDocumentAndBatchResponse = z.object({
batch: z.string(),
document: zDocumentResponse,
})
/**
* DocumentListResponse
*/
export const zDocumentListResponse = z.object({
data: z.array(zDocumentResponse),
has_more: z.boolean(),
limit: z.int(),
page: z.int(),
total: z.int(),
})
/**
* DocumentStatusResponse
*/
export const zDocumentStatusResponse = z.object({
cleaning_completed_at: z.int().nullable(),
completed_at: z.int().nullable(),
completed_segments: z.int().nullish(),
error: z.string().nullable(),
id: z.string(),
indexing_status: z.string(),
parsing_completed_at: z.int().nullable(),
paused_at: z.int().nullable(),
processing_started_at: z.int().nullable(),
splitting_completed_at: z.int().nullable(),
stopped_at: z.int().nullable(),
total_segments: z.int().nullish(),
})
/**
* DocumentStatusListResponse
*/
export const zDocumentStatusListResponse = z.object({
data: z.array(zDocumentStatusResponse),
})
/**
* EndUserDetail
*
@ -1573,6 +1657,11 @@ export const zPatchDatasetsByDatasetIdPath = z.object({
*/
export const zPatchDatasetsByDatasetIdResponse = zDatasetDetailWithPartialMembersResponse
export const zPostDatasetsByDatasetIdDocumentCreateByFileBody = z.object({
data: z.string().optional(),
file: z.custom<Blob | File>(),
})
export const zPostDatasetsByDatasetIdDocumentCreateByFilePath = z.object({
dataset_id: z.string(),
})
@ -1580,10 +1669,7 @@ export const zPostDatasetsByDatasetIdDocumentCreateByFilePath = z.object({
/**
* Document created successfully
*/
export const zPostDatasetsByDatasetIdDocumentCreateByFileResponse = z.record(
z.string(),
z.unknown(),
)
export const zPostDatasetsByDatasetIdDocumentCreateByFileResponse = zDocumentAndBatchResponse
export const zPostDatasetsByDatasetIdDocumentCreateByTextBody = zDocumentTextCreatePayload
@ -1594,10 +1680,12 @@ export const zPostDatasetsByDatasetIdDocumentCreateByTextPath = z.object({
/**
* Document created successfully
*/
export const zPostDatasetsByDatasetIdDocumentCreateByTextResponse = z.record(
z.string(),
z.unknown(),
)
export const zPostDatasetsByDatasetIdDocumentCreateByTextResponse = zDocumentAndBatchResponse
export const zPostDatasetsByDatasetIdDocumentCreateByFile2Body = z.object({
data: z.string().optional(),
file: z.custom<Blob | File>(),
})
export const zPostDatasetsByDatasetIdDocumentCreateByFile2Path = z.object({
dataset_id: z.string(),
@ -1606,10 +1694,7 @@ export const zPostDatasetsByDatasetIdDocumentCreateByFile2Path = z.object({
/**
* Document created successfully
*/
export const zPostDatasetsByDatasetIdDocumentCreateByFile2Response = z.record(
z.string(),
z.unknown(),
)
export const zPostDatasetsByDatasetIdDocumentCreateByFile2Response = zDocumentAndBatchResponse
export const zPostDatasetsByDatasetIdDocumentCreateByText2Body = zDocumentTextCreatePayload
@ -1620,19 +1705,23 @@ export const zPostDatasetsByDatasetIdDocumentCreateByText2Path = z.object({
/**
* Document created successfully
*/
export const zPostDatasetsByDatasetIdDocumentCreateByText2Response = z.record(
z.string(),
z.unknown(),
)
export const zPostDatasetsByDatasetIdDocumentCreateByText2Response = zDocumentAndBatchResponse
export const zGetDatasetsByDatasetIdDocumentsPath = z.object({
dataset_id: z.string(),
})
export const zGetDatasetsByDatasetIdDocumentsQuery = z.object({
keyword: z.string().optional(),
limit: z.int().optional().default(20),
page: z.int().optional().default(1),
status: z.string().optional(),
})
/**
* Documents retrieved successfully
*/
export const zGetDatasetsByDatasetIdDocumentsResponse = z.record(z.string(), z.unknown())
export const zGetDatasetsByDatasetIdDocumentsResponse = zDocumentListResponse
export const zPostDatasetsByDatasetIdDocumentsDownloadZipBody = zDocumentBatchDownloadZipPayload
@ -1677,10 +1766,8 @@ export const zGetDatasetsByDatasetIdDocumentsByBatchIndexingStatusPath = z.objec
/**
* Indexing status retrieved successfully
*/
export const zGetDatasetsByDatasetIdDocumentsByBatchIndexingStatusResponse = z.record(
z.string(),
z.unknown(),
)
export const zGetDatasetsByDatasetIdDocumentsByBatchIndexingStatusResponse
= zDocumentStatusListResponse
export const zDeleteDatasetsByDatasetIdDocumentsByDocumentIdPath = z.object({
dataset_id: z.string(),
@ -1708,6 +1795,11 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdResponse = z.record(
z.unknown(),
)
export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdBody = z.object({
data: z.string().optional(),
file: z.custom<Blob | File>().optional(),
})
export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdPath = z.object({
dataset_id: z.string(),
document_id: z.string(),
@ -1716,10 +1808,7 @@ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdPath = z.object({
/**
* Document updated successfully
*/
export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdResponse = z.record(
z.string(),
z.unknown(),
)
export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdResponse = zDocumentAndBatchResponse
export const zGetDatasetsByDatasetIdDocumentsByDocumentIdDownloadPath = z.object({
dataset_id: z.string(),
@ -1869,6 +1958,11 @@ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdCh
export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksByChildChunkIdResponse
= zChildChunkDetailResponse
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileBody = z.object({
data: z.string().optional(),
file: z.custom<Blob | File>().optional(),
})
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFilePath = z.object({
dataset_id: z.string(),
document_id: z.string(),
@ -1877,10 +1971,8 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFilePath = z.o
/**
* Document updated successfully
*/
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileResponse = z.record(
z.string(),
z.unknown(),
)
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFileResponse
= zDocumentAndBatchResponse
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextBody = zDocumentTextUpdate
@ -1892,10 +1984,13 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextPath = z.o
/**
* Document updated successfully
*/
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextResponse = z.record(
z.string(),
z.unknown(),
)
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByTextResponse
= zDocumentAndBatchResponse
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Body = z.object({
data: z.string().optional(),
file: z.custom<Blob | File>().optional(),
})
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Path = z.object({
dataset_id: z.string(),
@ -1905,10 +2000,8 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Path = z.
/**
* Document updated successfully
*/
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Response = z.record(
z.string(),
z.unknown(),
)
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByFile2Response
= zDocumentAndBatchResponse
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Body = zDocumentTextUpdate
@ -1920,10 +2013,8 @@ export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Path = z.
/**
* Document updated successfully
*/
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Response = z.record(
z.string(),
z.unknown(),
)
export const zPostDatasetsByDatasetIdDocumentsByDocumentIdUpdateByText2Response
= zDocumentAndBatchResponse
export const zPostDatasetsByDatasetIdHitTestingBody = zHitTestingPayload

View File

@ -2,7 +2,7 @@ import type { UserConfig } from '@hey-api/openapi-ts'
import fs from 'node:fs'
import path from 'node:path'
import { fileURLToPath } from 'node:url'
import { defineConfig } from '@hey-api/openapi-ts'
import { $, defineConfig } from '@hey-api/openapi-ts'
type JsonObject = Record<string, unknown>
@ -976,6 +976,12 @@ const createApiConfig = (job: ApiJob): UserConfig => ({
'name': 'zod',
'~resolvers': {
enum: markNullableEnumSchema,
string: (ctx) => {
if (ctx.schema.format !== 'binary')
return undefined
return $(ctx.symbols.z).attr('custom').call().generic($.type.or($.type('Blob'), $.type('File')))
},
},
},
{