From 54509cb65843acbb738b25b1417220c8ab650a83 Mon Sep 17 00:00:00 2001 From: chariri Date: Fri, 26 Jun 2026 03:16:22 +0900 Subject: [PATCH] refactor(api): migrate dataset endpoints to BaseModel --- api/controllers/console/datasets/datasets.py | 46 +- .../console/datasets/datasets_document.py | 275 ++++--- .../console/datasets/datasets_segments.py | 2 +- api/controllers/console/datasets/external.py | 191 +++-- api/controllers/console/explore/trial.py | 154 ++-- .../service_api/dataset/dataset.py | 23 +- .../service_api/dataset/document.py | 147 ++-- api/core/rag/entities/processing_entities.py | 11 +- api/core/rag/extractor/extract_processor.py | 26 +- api/fields/dataset_fields.py | 113 +-- api/openapi/markdown/console-openapi.md | 613 ++++++---------- api/openapi/markdown/service-openapi.md | 81 +-- .../console/datasets/test_datasets.py | 12 +- .../datasets/test_datasets_document.py | 104 ++- .../console/datasets/test_external.py | 407 +++++++++-- .../controllers/console/explore/test_trial.py | 228 +++--- .../service_api/dataset/test_document.py | 632 ++++++++++------ ..._rag_pipeline_file_upload_serialization.py | 25 +- .../generated/api/console/agent/types.gen.ts | 32 +- .../generated/api/console/agent/zod.gen.ts | 121 +-- .../generated/api/console/apps/types.gen.ts | 32 +- .../generated/api/console/apps/zod.gen.ts | 233 +++--- .../api/console/datasets/orpc.gen.ts | 99 +-- .../api/console/datasets/types.gen.ts | 321 ++++---- .../generated/api/console/datasets/zod.gen.ts | 441 +++++------ .../console/instruction-generate/types.gen.ts | 41 +- .../console/instruction-generate/zod.gen.ts | 44 +- .../console/rule-code-generate/types.gen.ts | 41 +- .../api/console/rule-code-generate/zod.gen.ts | 44 +- .../api/console/rule-generate/types.gen.ts | 41 +- .../api/console/rule-generate/zod.gen.ts | 44 +- .../types.gen.ts | 41 +- .../zod.gen.ts | 44 +- .../generated/api/console/test/types.gen.ts | 21 +- .../generated/api/console/test/zod.gen.ts | 27 +- .../api/console/trial-apps/orpc.gen.ts | 84 +-- .../api/console/trial-apps/types.gen.ts | 544 +++++++------- .../api/console/trial-apps/zod.gen.ts | 686 +++++++++--------- .../console/workflow-generate/types.gen.ts | 41 +- .../api/console/workflow-generate/zod.gen.ts | 44 +- .../generated/api/service/orpc.gen.ts | 10 +- .../generated/api/service/types.gen.ts | 42 +- .../generated/api/service/zod.gen.ts | 40 +- packages/contracts/openapi-ts.api.config.ts | 78 +- 44 files changed, 3446 insertions(+), 2880 deletions(-) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 55bc85483d5..6303ede2f93 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -28,6 +28,7 @@ from controllers.console.wraps import ( with_current_tenant_id, with_current_user, ) +from core.entities.knowledge_entities import IndexingEstimate from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.indexing_runner import IndexingRunner from core.plugin.impl.model_runtime_factory import create_plugin_provider_manager @@ -266,21 +267,10 @@ class ErrorDocsResponse(DocumentStatusListResponse): total: int -class IndexingEstimatePreviewItemResponse(ResponseModel): - content: str - child_chunks: list[str] | None = None - summary: str | None = None - - -class IndexingEstimateQaPreviewItemResponse(ResponseModel): - question: str - answer: str - - -class IndexingEstimateResponse(ResponseModel): - total_segments: int - preview: list[IndexingEstimatePreviewItemResponse] - qa_preview: list[IndexingEstimateQaPreviewItemResponse] | None = None +class IndexingEstimateResponse(IndexingEstimate): + tokens: int + total_price: float | int + currency: str class RetrievalSettingResponse(ResponseModel): @@ -640,7 +630,7 @@ class DatasetApi(Resource): else: data["embedding_available"] = True - return data, 200 + return dump_response(DatasetDetailWithPartialMembersResponse, data), 200 @console_ns.doc("update_dataset") @console_ns.doc(description="Update dataset details") @@ -706,7 +696,7 @@ class DatasetApi(Resource): partial_member_list = DatasetPermissionService.get_dataset_partial_member_list(dataset_id_str) result_data.update({"partial_member_list": partial_member_list}) - return result_data, 200 + return dump_response(DatasetDetailWithPartialMembersResponse, result_data), 200 @setup_required @login_required @@ -749,7 +739,7 @@ class DatasetUseCheckApi(Resource): dataset_id_str = str(dataset_id) dataset_is_using = DatasetService.dataset_use_check(dataset_id_str) - return {"is_using": dataset_is_using}, 200 + return UsageCheckResponse(is_using=dataset_is_using).model_dump(mode="json"), 200 @console_ns.route("/datasets//queries") @@ -890,7 +880,17 @@ class DatasetIndexingEstimateApi(Resource): except Exception as e: raise IndexingEstimateError(str(e)) - return response.model_dump(), 200 + return ( + IndexingEstimateResponse( + tokens=0, + total_price=0, + currency="USD", + total_segments=response.total_segments, + preview=response.preview, + qa_preview=response.qa_preview, + ).model_dump(mode="json", exclude_none=True), + 200, + ) @console_ns.route("/datasets//related-apps") @@ -1007,7 +1007,7 @@ class DatasetApiKeyApi(Resource): keys = db.session.scalars( select(ApiToken).where(ApiToken.type == self.resource_type, ApiToken.tenant_id == current_tenant_id) ).all() - return ApiKeyList.model_validate({"data": keys}, from_attributes=True).model_dump(mode="json") + return dump_response(ApiKeyList, {"data": keys}) @console_ns.response(200, "API key created successfully", console_ns.models[ApiKeyItem.__name__]) @console_ns.response(400, "Maximum keys exceeded") @@ -1041,7 +1041,7 @@ class DatasetApiKeyApi(Resource): api_token.type = self.resource_type db.session.add(api_token) db.session.commit() - return ApiKeyItem.model_validate(api_token, from_attributes=True).model_dump(mode="json"), 200 + return dump_response(ApiKeyItem, api_token), 200 @console_ns.route("/datasets/api-keys/") @@ -1096,7 +1096,7 @@ class DatasetEnableApiApi(Resource): DatasetService.update_dataset_api_status(dataset_id_str, status == "enable") - return {"result": "success"}, 200 + return SimpleResultResponse(result="success").model_dump(mode="json"), 200 @console_ns.route("/datasets/api-base-info") @@ -1109,7 +1109,7 @@ class DatasetApiBaseUrlApi(Resource): @account_initialization_required def get(self): base = dify_config.SERVICE_API_URL or request.host_url.rstrip("/") - return {"api_base_url": normalize_api_base_url(base)} + return ApiBaseUrlResponse(api_base_url=normalize_api_base_url(base)).model_dump(mode="json") @console_ns.route("/datasets/retrieval-setting") diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index 07e150617bf..ccaaba4cf72 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -10,16 +10,17 @@ from uuid import UUID import sqlalchemy as sa from flask import request, send_file from flask_restx import Resource -from pydantic import BaseModel, Field, RootModel, field_validator +from pydantic import BaseModel, Field, JsonValue, field_validator from sqlalchemy import asc, desc, func, select from werkzeug.exceptions import Forbidden, NotFound import services from controllers.common.controller_schemas import DocumentBatchDownloadZipPayload -from controllers.common.fields import BinaryFileResponse, SimpleResultMessageResponse, SimpleResultResponse, UrlResponse +from controllers.common.fields import SimpleResultMessageResponse, SimpleResultResponse, UrlResponse from controllers.common.schema import register_response_schema_models, register_schema_models from controllers.console import console_ns from controllers.console.wraps import RBACPermission, RBACResourceScope, rbac_permission_required +from core.entities.knowledge_entities import IndexingEstimate from core.errors.error import ( LLMBadRequestError, ModelCurrentlyNotSupportError, @@ -29,6 +30,7 @@ from core.errors.error import ( from core.indexing_runner import IndexingRunner from core.model_manager import ModelManager from core.plugin.impl.exc import PluginDaemonClientSideError +from core.rag.entities import Rule from core.rag.extractor.entity.datasource_type import DatasourceType from core.rag.extractor.entity.extract_setting import ExtractSetting, NotionInfo, WebsiteInfo from core.rag.index_processor.constant.index_type import IndexTechniqueType @@ -48,7 +50,7 @@ from libs.helper import dump_response, to_timestamp from libs.login import login_required from models import Account, DatasetProcessRule, Document, DocumentSegment, UploadFile from models.dataset import DocumentPipelineExecutionLog -from models.enums import IndexingStatus, SegmentStatus +from models.enums import IndexingStatus, ProcessRuleMode, SegmentStatus from services.dataset_service import DatasetService, DocumentService from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig, ProcessRule, RetrievalModel from services.file_service import FileService @@ -146,8 +148,91 @@ class DocumentWithSegmentsListResponse(ResponseModel): page: int -class OpaqueObjectResponse(RootModel[dict[str, Any]]): - root: dict[str, Any] +class IndexingEstimateResponse(IndexingEstimate): + tokens: int + total_price: float | int + currency: str + + +class DocumentDetailResponse(ResponseModel): + id: str + position: int | None = None + data_source_type: str | None = None + data_source_info: Any = None + data_source_detail_dict: Any = None + dataset_process_rule_id: str | None = None + dataset_process_rule: Any = None + document_process_rule: Any = None + name: str | None = None + created_from: str | None = None + created_by: str | None = None + created_at: int | None = None + tokens: int | None = None + indexing_status: str | None = None + completed_at: int | None = None + updated_at: int | None = None + indexing_latency: float | None = None + error: str | None = None + enabled: bool | None = None + disabled_at: int | None = None + disabled_by: str | None = None + archived: bool | None = None + doc_type: str | None = None + doc_metadata: list[DocumentMetadataResponse] | None = None + segment_count: int | None = None + average_segment_length: float | None = None + hit_count: int | None = None + display_status: str | None = None + doc_form: str | None = None + doc_language: str | None = None + need_summary: bool | None = None + + @field_validator("data_source_type", "indexing_status", "display_status", "doc_form", mode="before") + @classmethod + def _normalize_enum_fields(cls, value: Any) -> Any: + return normalize_enum(value) + + +class SummaryStatusResponse(ResponseModel): + completed: int = 0 + generating: int = 0 + error: int = 0 + not_started: int = 0 + timeout: int = 0 + + +class SummaryEntryResponse(ResponseModel): + segment_id: str + segment_position: int + status: str + summary_preview: str | None = None + error: str | None = None + created_at: int | None = None + updated_at: int | None = None + + @field_validator("status", mode="before") + @classmethod + def _normalize_status(cls, value: Any) -> Any: + return normalize_enum(value) + + +class DocumentSummaryStatusResponse(ResponseModel): + total_segments: int + summary_status: SummaryStatusResponse + summaries: list[SummaryEntryResponse] + + +class ProcessRuleResponse(ResponseModel): + mode: ProcessRuleMode + rules: Rule | None = None + limits: dict[str, Any] + + +class DocumentPipelineExecutionLogResponse(ResponseModel): + datasource_info: JsonValue | None = None + datasource_type: str | None = None + input_data: JsonValue | None = None + datasource_node_id: str | None = None register_schema_models( @@ -163,7 +248,6 @@ register_schema_models( ) register_response_schema_models( console_ns, - BinaryFileResponse, SimpleResultMessageResponse, SimpleResultResponse, UrlResponse, @@ -173,7 +257,11 @@ register_response_schema_models( DocumentWithSegmentsResponse, DatasetAndDocumentResponse, DocumentWithSegmentsListResponse, - OpaqueObjectResponse, + IndexingEstimateResponse, + DocumentDetailResponse, + DocumentSummaryStatusResponse, + ProcessRuleResponse, + DocumentPipelineExecutionLogResponse, ) @@ -223,7 +311,7 @@ class GetProcessRuleApi(Resource): @console_ns.doc("get_process_rule") @console_ns.doc(description="Get dataset document processing rules") @console_ns.doc(params={"document_id": "Document ID (optional)"}) - @console_ns.response(200, "Process rules retrieved successfully", console_ns.models[OpaqueObjectResponse.__name__]) + @console_ns.response(200, "Process rules retrieved successfully", console_ns.models[ProcessRuleResponse.__name__]) @setup_required @login_required @account_initialization_required @@ -262,7 +350,7 @@ class GetProcessRuleApi(Resource): mode = dataset_process_rule.mode rules = dataset_process_rule.rules_dict - return {"mode": mode, "rules": rules, "limits": limits} + return dump_response(ProcessRuleResponse, {"mode": mode, "rules": rules, "limits": limits}) @console_ns.route("/datasets//documents") @@ -485,7 +573,7 @@ class DatasetInitApi(Resource): @console_ns.doc(description="Initialize dataset with documents") @console_ns.expect(console_ns.models[KnowledgeConfig.__name__]) @console_ns.response( - 201, "Dataset initialized successfully", console_ns.models[DatasetAndDocumentResponse.__name__] + 200, "Dataset initialized successfully", console_ns.models[DatasetAndDocumentResponse.__name__] ) @console_ns.response(400, "Invalid request parameters") @setup_required @@ -550,7 +638,7 @@ class DocumentIndexingEstimateApi(DocumentResource): @console_ns.response( 200, "Indexing estimate calculated successfully", - console_ns.models[OpaqueObjectResponse.__name__], + console_ns.models[IndexingEstimateResponse.__name__], ) @console_ns.response(404, "Document not found") @console_ns.response(400, "Document already finished") @@ -571,8 +659,6 @@ class DocumentIndexingEstimateApi(DocumentResource): data_process_rule = document.dataset_process_rule data_process_rule_dict = data_process_rule.to_dict() if data_process_rule else {} - response = {"tokens": 0, "total_price": 0, "currency": "USD", "total_segments": 0, "preview": []} - if document.data_source_type == "upload_file": data_source_info = document.data_source_info_dict if data_source_info and "upload_file_id" in data_source_info: @@ -603,7 +689,18 @@ class DocumentIndexingEstimateApi(DocumentResource): "English", dataset_id_str, ) - return estimate_response.model_dump(), 200 + return ( + # TODO: why using zero here? the same for the below endpoint + IndexingEstimateResponse( + tokens=0, + total_price=0, + currency="USD", + total_segments=estimate_response.total_segments, + preview=estimate_response.preview, + qa_preview=estimate_response.qa_preview, + ).model_dump(mode="json", exclude_none=True), + 200, + ) except LLMBadRequestError: raise ProviderNotInitializeError( "No Embedding Model available. Please configure a valid provider " @@ -616,15 +713,24 @@ class DocumentIndexingEstimateApi(DocumentResource): except Exception as e: raise IndexingEstimateError(str(e)) - return response, 200 + return ( + IndexingEstimateResponse( + tokens=0, + total_price=0, + currency="USD", + total_segments=0, + preview=[], + ).model_dump(mode="json", exclude_none=True), + 200, + ) @console_ns.route("/datasets//batch//indexing-estimate") class DocumentBatchIndexingEstimateApi(DocumentResource): @console_ns.response( 200, - "Batch indexing estimate calculated successfully", - console_ns.models[OpaqueObjectResponse.__name__], + "Indexing estimate calculated successfully", + console_ns.models[IndexingEstimateResponse.__name__], ) @setup_required @login_required @@ -636,7 +742,16 @@ class DocumentBatchIndexingEstimateApi(DocumentResource): dataset_id_str = str(dataset_id) documents = self.get_batch_documents(dataset_id_str, batch, current_user) if not documents: - return {"tokens": 0, "total_price": 0, "currency": "USD", "total_segments": 0, "preview": []}, 200 + return ( + IndexingEstimateResponse( + tokens=0, + total_price=0, + currency="USD", + total_segments=0, + preview=[], + ).model_dump(mode="json", exclude_none=True), + 200, + ) data_process_rule = documents[0].dataset_process_rule data_process_rule_dict = data_process_rule.to_dict() if data_process_rule else {} extract_settings = [] @@ -710,7 +825,17 @@ class DocumentBatchIndexingEstimateApi(DocumentResource): "English", dataset_id_str, ) - return response.model_dump(), 200 + return ( + IndexingEstimateResponse( + tokens=0, + total_price=0, + currency="USD", + total_segments=response.total_segments, + preview=response.preview, + qa_preview=response.qa_preview, + ).model_dump(mode="json", exclude_none=True), + 200, + ) except LLMBadRequestError: raise ProviderNotInitializeError( "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider." @@ -847,7 +972,7 @@ class DocumentApi(DocumentResource): "metadata": "Metadata inclusion (all/only/without)", } ) - @console_ns.response(200, "Document retrieved successfully", console_ns.models[OpaqueObjectResponse.__name__]) + @console_ns.response(200, "Document retrieved successfully", console_ns.models[DocumentDetailResponse.__name__]) @console_ns.response(404, "Document not found") @setup_required @login_required @@ -864,46 +989,21 @@ class DocumentApi(DocumentResource): if metadata not in self.METADATA_CHOICES: raise InvalidMetadataError(f"Invalid metadata value: {metadata}") + metadata_fields = {"doc_type", "doc_metadata"} if metadata == "only": - response = {"id": document.id, "doc_type": document.doc_type, "doc_metadata": document.doc_metadata_details} - elif metadata == "without": - dataset_process_rules = DatasetService.get_process_rules(dataset_id_str) - document_process_rules = document.dataset_process_rule.to_dict() if document.dataset_process_rule else {} - response = { - "id": document.id, - "position": document.position, - "data_source_type": document.data_source_type, - "data_source_info": document.data_source_info_dict, - "data_source_detail_dict": document.data_source_detail_dict, - "dataset_process_rule_id": document.dataset_process_rule_id, - "dataset_process_rule": dataset_process_rules, - "document_process_rule": document_process_rules, - "name": document.name, - "created_from": document.created_from, - "created_by": document.created_by, - "created_at": int(document.created_at.timestamp()), - "tokens": document.tokens, - "indexing_status": document.indexing_status, - "completed_at": int(document.completed_at.timestamp()) if document.completed_at else None, - "updated_at": int(document.updated_at.timestamp()) if document.updated_at else None, - "indexing_latency": document.indexing_latency, - "error": document.error, - "enabled": document.enabled, - "disabled_at": int(document.disabled_at.timestamp()) if document.disabled_at else None, - "disabled_by": document.disabled_by, - "archived": document.archived, - "segment_count": document.segment_count, - "average_segment_length": document.average_segment_length, - "hit_count": document.hit_count, - "display_status": document.display_status, - "doc_form": document.doc_form, - "doc_language": document.doc_language, - "need_summary": document.need_summary if document.need_summary is not None else False, - } - else: - dataset_process_rules = DatasetService.get_process_rules(dataset_id_str) - document_process_rules = document.dataset_process_rule.to_dict() if document.dataset_process_rule else {} - response = { + response = DocumentDetailResponse.model_validate( + { + "id": document.id, + "doc_type": document.doc_type, + "doc_metadata": document.doc_metadata_details, + } + ) + return response.model_dump(mode="json", include={"id", *metadata_fields}, exclude_unset=True), 200 + + dataset_process_rules = DatasetService.get_process_rules(dataset_id_str) + document_process_rules = document.dataset_process_rule.to_dict() if document.dataset_process_rule else {} + response = DocumentDetailResponse.model_validate( + { "id": document.id, "position": document.position, "data_source_type": document.data_source_type, @@ -936,8 +1036,9 @@ class DocumentApi(DocumentResource): "doc_language": document.doc_language, "need_summary": document.need_summary if document.need_summary is not None else False, } - - return response, 200 + ) + exclude = metadata_fields if metadata == "without" else None + return response.model_dump(mode="json", exclude=exclude, exclude_unset=True), 200 @setup_required @login_required @@ -983,7 +1084,7 @@ class DocumentDownloadApi(DocumentResource): def get(self, current_tenant_id: str, current_user: Account, dataset_id: UUID, document_id: UUID) -> dict[str, Any]: # Reuse the shared permission/tenant checks implemented in DocumentResource. document = self.get_document(str(dataset_id), str(document_id), current_user, current_tenant_id) - return {"url": DocumentService.get_document_download_url(document)} + return UrlResponse(url=DocumentService.get_document_download_url(document)).model_dump(mode="json") @console_ns.route("/datasets//documents/download-zip") @@ -992,7 +1093,7 @@ class DocumentBatchDownloadZipApi(DocumentResource): @console_ns.doc("download_dataset_documents_as_zip") @console_ns.doc(description="Download selected dataset documents as a single ZIP archive (upload-file only)") - @console_ns.response(200, "ZIP archive generated successfully", console_ns.models[BinaryFileResponse.__name__]) + @console_ns.response(200, "ZIP archive downloaded successfully") @setup_required @login_required @account_initialization_required @@ -1026,6 +1127,7 @@ class DocumentBatchDownloadZipApi(DocumentResource): ) cleanup = stack.pop_all() response.call_on_close(cleanup.close) + # response-contract:ignore binary ZIP download response return response @@ -1085,7 +1187,7 @@ class DocumentProcessingApi(DocumentResource): document.is_paused = False db.session.commit() - return {"result": "success"}, 200 + return SimpleResultResponse(result="success").model_dump(mode="json"), 200 @console_ns.route("/datasets//documents//metadata") @@ -1144,7 +1246,9 @@ class DocumentMetadataApi(DocumentResource): document.updated_at = naive_utc_now() db.session.commit() - return {"result": "success", "message": "Document metadata updated."}, 200 + return SimpleResultMessageResponse(result="success", message="Document metadata updated.").model_dump( + mode="json" + ), 200 @console_ns.route("/datasets//documents/status//batch") @@ -1186,7 +1290,7 @@ class DocumentStatusApi(DocumentResource): except NotFound as e: raise NotFound(str(e)) - return {"result": "success"}, 200 + return SimpleResultResponse(result="success").model_dump(mode="json"), 200 @console_ns.route("/datasets//documents//processing/pause") @@ -1355,15 +1459,15 @@ class WebsiteDocumentSyncApi(DocumentResource): # sync document DocumentService.sync_website_document(dataset_id_str, document) - return {"result": "success"}, 200 + return SimpleResultResponse(result="success").model_dump(mode="json"), 200 @console_ns.route("/datasets//documents//pipeline-execution-log") class DocumentPipelineExecutionLogApi(DocumentResource): @console_ns.response( 200, - "Document pipeline execution log retrieved successfully", - console_ns.models[OpaqueObjectResponse.__name__], + "Pipeline execution log retrieved successfully", + console_ns.models[DocumentPipelineExecutionLogResponse.__name__], ) @setup_required @login_required @@ -1386,18 +1490,16 @@ class DocumentPipelineExecutionLogApi(DocumentResource): .limit(1) ) if not log: - return { - "datasource_info": None, - "datasource_type": None, - "input_data": None, - "datasource_node_id": None, - }, 200 - return { - "datasource_info": json.loads(log.datasource_info), - "datasource_type": log.datasource_type, - "input_data": log.input_data, - "datasource_node_id": log.datasource_node_id, - }, 200 + return DocumentPipelineExecutionLogResponse().model_dump(mode="json"), 200 + return dump_response( + DocumentPipelineExecutionLogResponse, + { + "datasource_info": json.loads(log.datasource_info), + "datasource_type": log.datasource_type, + "input_data": log.input_data, + "datasource_node_id": log.datasource_node_id, + }, + ), 200 @console_ns.route("/datasets//documents/generate-summary") @@ -1499,7 +1601,7 @@ class DocumentGenerateSummaryApi(Resource): dataset_id_str, ) - return {"result": "success"}, 200 + return SimpleResultResponse(result="success").model_dump(mode="json"), 200 @console_ns.route("/datasets//documents//summary-status") @@ -1507,7 +1609,11 @@ class DocumentSummaryStatusApi(DocumentResource): @console_ns.doc("get_document_summary_status") @console_ns.doc(description="Get summary index generation status for a document") @console_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) - @console_ns.response(200, "Summary status retrieved successfully", console_ns.models[OpaqueObjectResponse.__name__]) + @console_ns.response( + 200, + "Summary status retrieved successfully", + console_ns.models[DocumentSummaryStatusResponse.__name__], + ) @console_ns.response(404, "Document not found") @setup_required @login_required @@ -1525,6 +1631,7 @@ class DocumentSummaryStatusApi(DocumentResource): - generating: Number of summaries being generated - error: Number of summaries with errors - not_started: Number of segments without summary records + - timeout: Number of summaries that timed out - summaries: List of summary records with status and content preview """ dataset_id_str = str(dataset_id) @@ -1549,4 +1656,4 @@ class DocumentSummaryStatusApi(DocumentResource): dataset_id=dataset_id_str, ) - return result, 200 + return dump_response(DocumentSummaryStatusResponse, result), 200 diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py index 4858b5ff6b0..cc94c3206a0 100644 --- a/api/controllers/console/datasets/datasets_segments.py +++ b/api/controllers/console/datasets/datasets_segments.py @@ -374,7 +374,7 @@ class DatasetDocumentSegmentApi(Resource): SegmentService.update_segments_status(segment_ids, action, dataset, document) except Exception as e: raise InvalidActionError(str(e)) - return dump_response(SimpleResultResponse, {"result": "success"}), 200 + return SimpleResultResponse(result="success").model_dump(mode="json"), 200 @console_ns.route("/datasets//documents//segment") diff --git a/api/controllers/console/datasets/external.py b/api/controllers/console/datasets/external.py index eb7b9aa84f8..ba9c66302f7 100644 --- a/api/controllers/console/datasets/external.py +++ b/api/controllers/console/datasets/external.py @@ -1,19 +1,15 @@ +from datetime import datetime from typing import Any from uuid import UUID from flask import request -from flask_restx import Resource, fields, marshal -from pydantic import BaseModel, Field, RootModel +from flask_restx import Resource +from pydantic import AliasChoices, BaseModel, Field, field_validator from werkzeug.exceptions import Forbidden, InternalServerError, NotFound import services from controllers.common.fields import UsageCountResponse -from controllers.common.schema import ( - get_or_create_model, - query_params_from_model, - register_response_schema_models, - register_schema_models, -) +from controllers.common.schema import query_params_from_model, register_response_schema_models, register_schema_models from controllers.console import console_ns from controllers.console.datasets.error import DatasetNameDuplicateError from controllers.console.wraps import ( @@ -28,19 +24,8 @@ from controllers.console.wraps import ( ) from extensions.ext_database import db from fields.base import ResponseModel -from fields.dataset_fields import ( - dataset_detail_fields, - dataset_retrieval_model_fields, - doc_metadata_fields, - external_knowledge_info_fields, - external_retrieval_model_fields, - icon_info_fields, - keyword_setting_fields, - reranking_model_fields, - tag_fields, - vector_setting_fields, - weighted_score_fields, -) +from fields.dataset_fields import DatasetDetailResponse +from libs.helper import dump_response from libs.login import login_required from models import Account from services.dataset_service import DatasetService @@ -49,50 +34,10 @@ from services.external_knowledge_service import ExternalDatasetService from services.hit_testing_service import HitTestingService from services.knowledge_service import BedrockRetrievalSetting, ExternalDatasetTestService -register_response_schema_models(console_ns, UsageCountResponse) - - -def _build_dataset_detail_model(): - keyword_setting_model = get_or_create_model("DatasetKeywordSetting", keyword_setting_fields) - vector_setting_model = get_or_create_model("DatasetVectorSetting", vector_setting_fields) - - weighted_score_fields_copy = weighted_score_fields.copy() - weighted_score_fields_copy["keyword_setting"] = fields.Nested(keyword_setting_model) - weighted_score_fields_copy["vector_setting"] = fields.Nested(vector_setting_model) - weighted_score_model = get_or_create_model("DatasetWeightedScore", weighted_score_fields_copy) - - reranking_model = get_or_create_model("DatasetRerankingModel", reranking_model_fields) - - dataset_retrieval_model_fields_copy = dataset_retrieval_model_fields.copy() - dataset_retrieval_model_fields_copy["reranking_model"] = fields.Nested(reranking_model) - dataset_retrieval_model_fields_copy["weights"] = fields.Nested(weighted_score_model, allow_null=True) - dataset_retrieval_model = get_or_create_model("DatasetRetrievalModel", dataset_retrieval_model_fields_copy) - - tag_model = get_or_create_model("Tag", tag_fields) - doc_metadata_model = get_or_create_model("DatasetDocMetadata", doc_metadata_fields) - external_knowledge_info_model = get_or_create_model("ExternalKnowledgeInfo", external_knowledge_info_fields) - external_retrieval_model = get_or_create_model("ExternalRetrievalModel", external_retrieval_model_fields) - icon_info_model = get_or_create_model("DatasetIconInfo", icon_info_fields) - - dataset_detail_fields_copy = dataset_detail_fields.copy() - dataset_detail_fields_copy["retrieval_model_dict"] = fields.Nested(dataset_retrieval_model) - dataset_detail_fields_copy["tags"] = fields.List(fields.Nested(tag_model)) - dataset_detail_fields_copy["external_knowledge_info"] = fields.Nested(external_knowledge_info_model) - dataset_detail_fields_copy["external_retrieval_model"] = fields.Nested(external_retrieval_model, allow_null=True) - dataset_detail_fields_copy["doc_metadata"] = fields.List(fields.Nested(doc_metadata_model)) - dataset_detail_fields_copy["icon_info"] = fields.Nested(icon_info_model) - return get_or_create_model("DatasetDetail", dataset_detail_fields_copy) - - -try: - dataset_detail_model = console_ns.models["DatasetDetail"] -except KeyError: - dataset_detail_model = _build_dataset_detail_model() - class ExternalKnowledgeApiPayload(BaseModel): name: str = Field(..., min_length=1, max_length=40) - settings: dict[str, object] + settings: dict[str, Any] class ExternalDatasetCreatePayload(BaseModel): @@ -100,15 +45,13 @@ class ExternalDatasetCreatePayload(BaseModel): external_knowledge_id: str name: str = Field(..., min_length=1, max_length=100) description: str | None = Field(None, max_length=400) - external_retrieval_model: dict[str, object] | None = Field(default=None) + external_retrieval_model: dict[str, Any] | None = None class ExternalHitTestingPayload(BaseModel): query: str - external_retrieval_model: dict[str, object] | None = Field(default=None) - metadata_filtering_conditions: dict[str, object] | None = Field( - default=None, - ) + external_retrieval_model: dict[str, Any] | None = None + metadata_filtering_conditions: dict[str, Any] | None = None class BedrockRetrievalPayload(BaseModel): @@ -123,7 +66,7 @@ class ExternalApiTemplateListQuery(BaseModel): keyword: str | None = Field(default=None, description="Search keyword") -class ExternalKnowledgeDatasetBindingResponse(ResponseModel): +class ExternalKnowledgeApiBindingResponse(ResponseModel): id: str name: str @@ -133,22 +76,52 @@ class ExternalKnowledgeApiResponse(ResponseModel): tenant_id: str name: str description: str - settings: dict[str, Any] | None = Field(default=None) - dataset_bindings: list[ExternalKnowledgeDatasetBindingResponse] = Field(default_factory=list) + settings: dict[str, Any] | None = Field(validation_alias=AliasChoices("settings_dict", "settings")) + dataset_bindings: list[ExternalKnowledgeApiBindingResponse] created_by: str created_at: str + @field_validator("created_at", mode="before") + @classmethod + def _normalize_created_at(cls, value: datetime | str) -> str: + if isinstance(value, datetime): + return value.isoformat() + return value + class ExternalKnowledgeApiListResponse(ResponseModel): data: list[ExternalKnowledgeApiResponse] has_more: bool limit: int - total: int + total: int | None page: int -class ExternalRetrievalTestResponse(RootModel[dict[str, Any] | list[dict[str, Any]]]): - root: dict[str, Any] | list[dict[str, Any]] +class ExternalHitTestingQueryResponse(ResponseModel): + content: str + + +class ExternalHitTestingRecordResponse(ResponseModel): + content: str | None = None + title: str | None = None + score: float | None = None + metadata: dict[str, Any] | None = None + + +class ExternalHitTestingResponse(ResponseModel): + query: ExternalHitTestingQueryResponse + records: list[ExternalHitTestingRecordResponse] + + +class BedrockRetrievalRecordResponse(ResponseModel): + metadata: dict[str, Any] | None = None + score: float + title: str | None = None + content: str | None = None + + +class BedrockRetrievalResponse(ResponseModel): + records: list[BedrockRetrievalRecordResponse] register_schema_models( @@ -161,9 +134,16 @@ register_schema_models( ) register_response_schema_models( console_ns, + UsageCountResponse, + DatasetDetailResponse, + ExternalKnowledgeApiBindingResponse, ExternalKnowledgeApiResponse, ExternalKnowledgeApiListResponse, - ExternalRetrievalTestResponse, + ExternalHitTestingQueryResponse, + ExternalHitTestingRecordResponse, + ExternalHitTestingResponse, + BedrockRetrievalRecordResponse, + BedrockRetrievalResponse, ) @@ -187,24 +167,26 @@ class ExternalApiTemplateListApi(Resource): external_knowledge_apis, total = ExternalDatasetService.get_external_knowledge_apis( query.page, query.limit, current_tenant_id, query.keyword ) - response = { - "data": [item.to_dict() for item in external_knowledge_apis], - "has_more": len(external_knowledge_apis) == query.limit, - "limit": query.limit, - "total": total, - "page": query.page, - } - return response, 200 + return ExternalKnowledgeApiListResponse( + data=[ExternalKnowledgeApiResponse.model_validate(item) for item in external_knowledge_apis], + has_more=len(external_knowledge_apis) == query.limit, + limit=query.limit, + total=total, + page=query.page, + ).model_dump(mode="json"), 200 - @setup_required - @login_required - @account_initialization_required + @console_ns.doc("create_external_api_template") + @console_ns.doc(description="Create external knowledge API template") @console_ns.expect(console_ns.models[ExternalKnowledgeApiPayload.__name__]) @console_ns.response( 201, "External API template created successfully", console_ns.models[ExternalKnowledgeApiResponse.__name__], ) + @console_ns.response(403, "Permission denied") + @setup_required + @login_required + @account_initialization_required @with_current_user @with_current_tenant_id def post(self, current_tenant_id: str, current_user: Account): @@ -223,7 +205,7 @@ class ExternalApiTemplateListApi(Resource): except services.errors.dataset.DatasetNameDuplicateError: raise DatasetNameDuplicateError() - return external_knowledge_api.to_dict(), 201 + return dump_response(ExternalKnowledgeApiResponse, external_knowledge_api), 201 @console_ns.route("/datasets/external-knowledge-api/") @@ -249,17 +231,21 @@ class ExternalApiTemplateApi(Resource): if external_knowledge_api is None: raise NotFound("API template not found.") - return external_knowledge_api.to_dict(), 200 + return dump_response(ExternalKnowledgeApiResponse, external_knowledge_api), 200 + @console_ns.doc("update_external_api_template") + @console_ns.doc(description="Update external knowledge API template") + @console_ns.doc(params={"external_knowledge_api_id": "External knowledge API ID"}) + @console_ns.expect(console_ns.models[ExternalKnowledgeApiPayload.__name__]) @console_ns.response( 200, "External API template updated successfully", console_ns.models[ExternalKnowledgeApiResponse.__name__], ) + @console_ns.response(404, "Template not found") @setup_required @login_required @account_initialization_required - @console_ns.expect(console_ns.models[ExternalKnowledgeApiPayload.__name__]) @with_current_user @with_current_tenant_id def patch(self, current_tenant_id: str, current_user: Account, external_knowledge_api_id: UUID): @@ -275,7 +261,7 @@ class ExternalApiTemplateApi(Resource): args=payload.model_dump(), ) - return external_knowledge_api.to_dict(), 200 + return dump_response(ExternalKnowledgeApiResponse, external_knowledge_api), 200 @setup_required @login_required @@ -309,7 +295,7 @@ class ExternalApiUseCheckApi(Resource): external_knowledge_api_is_using, count = ExternalDatasetService.external_knowledge_api_use_check( external_knowledge_api_id_str, current_tenant_id ) - return {"is_using": external_knowledge_api_is_using, "count": count}, 200 + return UsageCountResponse(is_using=external_knowledge_api_is_using, count=count).model_dump(mode="json"), 200 @console_ns.route("/datasets/external") @@ -317,7 +303,9 @@ class ExternalDatasetCreateApi(Resource): @console_ns.doc("create_external_dataset") @console_ns.doc(description="Create external knowledge dataset") @console_ns.expect(console_ns.models[ExternalDatasetCreatePayload.__name__]) - @console_ns.response(201, "External dataset created successfully", dataset_detail_model) + @console_ns.response( + 201, "External dataset created successfully", console_ns.models[DatasetDetailResponse.__name__] + ) @console_ns.response(400, "Invalid parameters") @console_ns.response(403, "Permission denied") @setup_required @@ -345,16 +333,15 @@ class ExternalDatasetCreateApi(Resource): except services.errors.dataset.DatasetNameDuplicateError: raise DatasetNameDuplicateError() - item = marshal(dataset, dataset_detail_fields) - dataset_id_str = item["id"] + dataset_id_str = str(dataset.id) permission_keys_map = enterprise_rbac_service.RBACService.DatasetPermissions.batch_get( str(current_tenant_id), current_user.id, [dataset_id_str], ) - item["permission_keys"] = permission_keys_map.get(dataset_id_str, []) - - return item, 201 + data = DatasetDetailResponse.model_validate(dataset).model_dump(mode="json") + data["permission_keys"] = permission_keys_map.get(dataset_id_str, []) + return data, 201 @console_ns.route("/datasets//external-hit-testing") @@ -366,7 +353,7 @@ class ExternalKnowledgeHitTestingApi(Resource): @console_ns.response( 200, "External hit testing completed successfully", - console_ns.models[ExternalRetrievalTestResponse.__name__], + console_ns.models[ExternalHitTestingResponse.__name__], ) @console_ns.response(404, "Dataset not found") @console_ns.response(400, "Invalid parameters") @@ -399,7 +386,7 @@ class ExternalKnowledgeHitTestingApi(Resource): metadata_filtering_conditions=payload.metadata_filtering_conditions, ) - return response + return dump_response(ExternalHitTestingResponse, response) except Exception as e: raise InternalServerError(str(e)) @@ -410,11 +397,7 @@ class BedrockRetrievalApi(Resource): @console_ns.doc("bedrock_retrieval_test") @console_ns.doc(description="Bedrock retrieval test (internal use only)") @console_ns.expect(console_ns.models[BedrockRetrievalPayload.__name__]) - @console_ns.response( - 200, - "Bedrock retrieval test completed", - console_ns.models[ExternalRetrievalTestResponse.__name__], - ) + @console_ns.response(200, "Bedrock retrieval test completed", console_ns.models[BedrockRetrievalResponse.__name__]) def post(self): payload = BedrockRetrievalPayload.model_validate(console_ns.payload or {}) @@ -422,4 +405,4 @@ class BedrockRetrievalApi(Resource): result = ExternalDatasetTestService.knowledge_retrieval( payload.retrieval_setting, payload.query, payload.knowledge_id ) - return result, 200 + return dump_response(BedrockRetrievalResponse, result), 200 diff --git a/api/controllers/console/explore/trial.py b/api/controllers/console/explore/trial.py index 6aef9129780..fd614ece87f 100644 --- a/api/controllers/console/explore/trial.py +++ b/api/controllers/console/explore/trial.py @@ -1,28 +1,24 @@ import logging -from typing import Any, Literal, cast +from typing import Literal from flask import request -from flask_restx import Resource, fields, marshal, marshal_with +from flask_restx import Resource from pydantic import BaseModel, Field from sqlalchemy import select from werkzeug.exceptions import Forbidden, InternalServerError, NotFound import services -from controllers.common.fields import ( - AudioBinaryResponse, - AudioTranscriptResponse, - GeneratedAppResponse, - SimpleResultResponse, -) +from controllers.common.fields import AudioBinaryResponse, AudioTranscriptResponse, SimpleResultResponse from controllers.common.fields import Parameters as ParametersResponse from controllers.common.fields import Site as SiteResponse from controllers.common.schema import ( - get_or_create_model, query_params_from_model, + query_params_from_request, register_response_schema_models, register_schema_models, ) from controllers.console import console_ns +from controllers.console.app.app import AppDetailWithSite from controllers.console.app.error import ( AppUnavailableError, AudioTooLargeError, @@ -36,6 +32,7 @@ from controllers.console.app.error import ( ProviderQuotaExceededError, UnsupportedAudioTypeError, ) +from controllers.console.app.workflow import WorkflowResponse from controllers.console.app.wraps import get_app_model_with_trial from controllers.console.explore.error import ( AppSuggestedQuestionsAfterAnswerDisabledError, @@ -56,26 +53,13 @@ from core.errors.error import ( ) from extensions.ext_database import db from extensions.ext_redis import redis_client -from fields.app_fields import ( - app_detail_fields_with_site, - deleted_tool_fields, - model_config_fields, - site_fields, - tag_fields, -) -from fields.dataset_fields import dataset_fields -from fields.member_fields import simple_account_fields +from fields.base import ResponseModel +from fields.dataset_fields import DatasetDetailResponse from fields.message_fields import SuggestedQuestionsResponse -from fields.workflow_fields import ( - conversation_variable_fields, - pipeline_variable_fields, - workflow_fields, - workflow_partial_fields, -) from graphon.graph_engine.manager import GraphEngineManager from graphon.model_runtime.errors.invoke import InvokeError from libs import helper -from libs.helper import uuid_value +from libs.helper import dump_response, uuid_value from models import Account from models.account import TenantStatus from models.model import AppMode, Site @@ -102,57 +86,42 @@ from services.recommended_app_service import RecommendedAppService logger = logging.getLogger(__name__) -model_config_model = get_or_create_model("TrialAppModelConfig", model_config_fields) -workflow_partial_model = get_or_create_model("TrialWorkflowPartial", workflow_partial_fields) -deleted_tool_model = get_or_create_model("TrialDeletedTool", deleted_tool_fields) -tag_model = get_or_create_model("TrialTag", tag_fields) -site_model = get_or_create_model("TrialSite", site_fields) +class TrialDatasetListItemResponse(DatasetDetailResponse): + pass -app_detail_fields_with_site_copy = app_detail_fields_with_site.copy() -app_detail_fields_with_site_copy["model_config"] = fields.Nested( - model_config_model, attribute="app_model_config", allow_null=True -) -app_detail_fields_with_site_copy["workflow"] = fields.Nested(workflow_partial_model, allow_null=True) -app_detail_fields_with_site_copy["deleted_tools"] = fields.List(fields.Nested(deleted_tool_model)) -app_detail_fields_with_site_copy["tags"] = fields.List(fields.Nested(tag_model)) -app_detail_fields_with_site_copy["site"] = fields.Nested(site_model) -app_detail_with_site_model = get_or_create_model("TrialAppDetailWithSite", app_detail_fields_with_site_copy) -simple_account_model = get_or_create_model("TrialSimpleAccount", simple_account_fields) -conversation_variable_model = get_or_create_model("TrialConversationVariable", conversation_variable_fields) -pipeline_variable_model = get_or_create_model("TrialPipelineVariable", pipeline_variable_fields) +class TrialDatasetListResponse(ResponseModel): + data: list[TrialDatasetListItemResponse] + has_more: bool + limit: int + total: int + page: int -workflow_fields_copy = workflow_fields.copy() -workflow_fields_copy["created_by"] = fields.Nested(simple_account_model, attribute="created_by_account") -workflow_fields_copy["updated_by"] = fields.Nested( - simple_account_model, attribute="updated_by_account", allow_null=True -) -workflow_fields_copy["conversation_variables"] = fields.List(fields.Nested(conversation_variable_model)) -workflow_fields_copy["rag_pipeline_variables"] = fields.List(fields.Nested(pipeline_variable_model)) -workflow_model = get_or_create_model("TrialWorkflow", workflow_fields_copy) -dataset_model = get_or_create_model("TrialDataset", dataset_fields) -dataset_list_model = get_or_create_model( - "TrialDatasetList", - { - "data": fields.List(fields.Nested(dataset_model)), - "has_more": fields.Boolean, - "limit": fields.Integer, - "total": fields.Integer, - "page": fields.Integer, - }, +register_response_schema_models( + console_ns, + ParametersResponse, + AppDetailWithSite, + AudioBinaryResponse, + AudioTranscriptResponse, + SimpleResultResponse, + SiteResponse, + SuggestedQuestionsResponse, + TrialDatasetListItemResponse, + TrialDatasetListResponse, + WorkflowResponse, ) class WorkflowRunRequest(BaseModel): inputs: dict - files: list | None = Field(default=None) + files: list | None = None class ChatRequest(BaseModel): inputs: dict query: str - files: list | None = Field(default=None) + files: list | None = None conversation_id: str | None = None parent_message_id: str | None = None retriever_from: str = "explore_app" @@ -168,7 +137,7 @@ class TextToSpeechRequest(BaseModel): class CompletionRequest(BaseModel): inputs: dict query: str = "" - files: list | None = Field(default=None) + files: list | None = None response_mode: Literal["blocking", "streaming"] | None = None retriever_from: str = "explore_app" @@ -187,23 +156,13 @@ register_schema_models( CompletionRequest, TrialDatasetListQuery, ) -register_response_schema_models( - console_ns, - ParametersResponse, - AudioBinaryResponse, - AudioTranscriptResponse, - GeneratedAppResponse, - SimpleResultResponse, - SiteResponse, - SuggestedQuestionsResponse, -) class TrialAppWorkflowRunApi(TrialAppResource): @trial_feature_enable - @console_ns.expect(console_ns.models[WorkflowRunRequest.__name__]) - @console_ns.response(200, "Success", console_ns.models[GeneratedAppResponse.__name__]) @with_current_user + @console_ns.expect(console_ns.models[WorkflowRunRequest.__name__]) + @console_ns.response(200, "Success") def post(self, current_user: Account, trial_app): """ Run workflow @@ -224,6 +183,7 @@ class TrialAppWorkflowRunApi(TrialAppResource): app_model=app_model, user=current_user, args=args, invoke_from=InvokeFrom.EXPLORE, streaming=True ) RecommendedAppService.add_trial_app_record(db.session, app_id, user_id) + # response-contract:ignore compact_generate_response return helper.compact_generate_response(response) except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) @@ -263,12 +223,12 @@ class TrialAppWorkflowTaskStopApi(TrialAppResource): # New graph engine command channel mechanism GraphEngineManager(redis_client).send_stop_command(task_id) - return {"result": "success"} + return SimpleResultResponse(result="success").model_dump(mode="json") class TrialChatApi(TrialAppResource): @console_ns.expect(console_ns.models[ChatRequest.__name__]) - @console_ns.response(200, "Success", console_ns.models[GeneratedAppResponse.__name__]) + @console_ns.response(200, "Success") @trial_feature_enable @with_current_user def post(self, current_user: Account, trial_app): @@ -297,6 +257,7 @@ class TrialChatApi(TrialAppResource): app_model=app_model, user=current_user, args=args, invoke_from=InvokeFrom.EXPLORE, streaming=True ) RecommendedAppService.add_trial_app_record(db.session, app_id, user_id) + # response-contract:ignore compact_generate_response return helper.compact_generate_response(response) except services.errors.conversation.ConversationNotExistsError: raise NotFound("Conversation Not Exists.") @@ -355,7 +316,7 @@ class TrialMessageSuggestedQuestionApi(TrialAppResource): logger.exception("internal server error.") raise InternalServerError() - return {"data": questions} + return dump_response(SuggestedQuestionsResponse, {"data": questions}) class TrialChatAudioApi(TrialAppResource): @@ -374,7 +335,7 @@ class TrialChatAudioApi(TrialAppResource): response = AudioService.transcript_asr(app_model=app_model, file=file, end_user=None) RecommendedAppService.add_trial_app_record(db.session, app_id, user_id) - return response + return dump_response(AudioTranscriptResponse, response) except services.errors.app_model_config.AppModelConfigBrokenError: logger.exception("App model config broken.") raise AppUnavailableError() @@ -427,6 +388,7 @@ class TrialChatTextApi(TrialAppResource): message_id=message_id, ) RecommendedAppService.add_trial_app_record(db.session, app_id, user_id) + # response-contract:ignore binary response return response except services.errors.app_model_config.AppModelConfigBrokenError: logger.exception("App model config broken.") @@ -456,7 +418,7 @@ class TrialChatTextApi(TrialAppResource): class TrialCompletionApi(TrialAppResource): @console_ns.expect(console_ns.models[CompletionRequest.__name__]) - @console_ns.response(200, "Success", console_ns.models[GeneratedAppResponse.__name__]) + @console_ns.response(200, "Success") @trial_feature_enable @with_current_user def post(self, current_user: Account, trial_app): @@ -480,6 +442,7 @@ class TrialCompletionApi(TrialAppResource): ) RecommendedAppService.add_trial_app_record(db.session, app_id, user_id) + # response-contract:ignore compact_generate_response return helper.compact_generate_response(response) except services.errors.conversation.ConversationNotExistsError: raise NotFound("Conversation Not Exists.") @@ -557,50 +520,49 @@ class TrialAppParameterApi(Resource): class AppApi(Resource): - @console_ns.response(200, "Success", app_detail_with_site_model) @get_app_model_with_trial(None) - @marshal_with(app_detail_with_site_model) + @console_ns.response(200, "App detail retrieved successfully", console_ns.models[AppDetailWithSite.__name__]) def get(self, app_model): """Get app detail""" app_service = AppService() app_model = app_service.get_app(app_model) - return app_model + return dump_response(AppDetailWithSite, app_model) class AppWorkflowApi(Resource): - @console_ns.response(200, "Success", workflow_model) @get_app_model_with_trial(None) - @marshal_with(workflow_model) + @console_ns.response(200, "Workflow detail retrieved successfully", console_ns.models[WorkflowResponse.__name__]) def get(self, app_model): """Get workflow detail""" if not app_model.workflow_id: raise AppUnavailableError() workflow = db.session.get(Workflow, app_model.workflow_id) - return workflow + return dump_response(WorkflowResponse, workflow) class DatasetListApi(Resource): @console_ns.doc(params=query_params_from_model(TrialDatasetListQuery)) - @console_ns.response(200, "Success", dataset_list_model) + @console_ns.response(200, "Success", console_ns.models[TrialDatasetListResponse.__name__]) @get_app_model_with_trial(None) def get(self, app_model): - page = request.args.get("page", default=1, type=int) - limit = request.args.get("limit", default=20, type=int) - ids = request.args.getlist("ids") + query = query_params_from_request( + TrialDatasetListQuery, + list_fields=("ids",), + use_defaults_for_malformed_ints=True, + ) tenant_id = app_model.tenant_id - if ids: - datasets, total = DatasetService.get_datasets_by_ids(ids, tenant_id) + if query.ids: + datasets, total = DatasetService.get_datasets_by_ids(query.ids, tenant_id) else: raise NeedAddIdsError() - data = cast(list[dict[str, Any]], marshal(datasets, dataset_fields)) - - response = {"data": data, "has_more": len(datasets) == limit, "limit": limit, "total": total, "page": page} - return response + return TrialDatasetListResponse( + data=datasets, has_more=len(datasets) == query.limit, limit=query.limit, total=total or 0, page=query.page + ).model_dump(mode="json") console_ns.add_resource(TrialChatApi, "/trial-apps//chat-messages", endpoint="trial_app_chat_completion") diff --git a/api/controllers/service_api/dataset/dataset.py b/api/controllers/service_api/dataset/dataset.py index 292c39f69bc..c12d02ae826 100644 --- a/api/controllers/service_api/dataset/dataset.py +++ b/api/controllers/service_api/dataset/dataset.py @@ -837,7 +837,7 @@ class DocumentStatusApi(DatasetApiResource): except ValueError as e: raise InvalidActionError(str(e)) - return dump_response(SimpleResultResponse, {"result": "success"}), 200 + return SimpleResultResponse(result="success").model_dump(mode="json"), 200 @service_api_ns.route("/datasets/tags") @@ -903,11 +903,8 @@ class DatasetTagsApi(DatasetApiResource): payload = TagCreatePayload.model_validate(service_api_ns.payload or {}) tag = TagService.save_tags(SaveTagPayload(name=payload.name, type=TagType.KNOWLEDGE), db.session) - response = dump_response( - KnowledgeTagResponse, - {"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": 0}, - ) - return response, 200 + response = KnowledgeTagResponse(id=tag.id, name=tag.name, type=tag.type, binding_count="0") + return response.model_dump(mode="json"), 200 @service_api_ns.doc( summary="Update Knowledge Tag", @@ -943,11 +940,8 @@ class DatasetTagsApi(DatasetApiResource): binding_count = TagService.get_tag_binding_count(tag_id, db.session) - response = dump_response( - KnowledgeTagResponse, - {"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": binding_count}, - ) - return response, 200 + response = KnowledgeTagResponse(id=tag.id, name=tag.name, type=tag.type, binding_count=str(binding_count)) + return response.model_dump(mode="json"), 200 @service_api_ns.doc( summary="Delete Knowledge Tag", @@ -1078,5 +1072,8 @@ class DatasetTagsBindingStatusApi(DatasetApiResource): tags = TagService.get_tags_by_target_id( "knowledge", current_user.current_tenant_id, str(dataset_id), db.session ) - tags_list = [{"id": tag.id, "name": tag.name} for tag in tags] - return dump_response(DatasetBoundTagListResponse, {"data": tags_list, "total": len(tags)}), 200 + response = DatasetBoundTagListResponse( + data=[DatasetBoundTagResponse(id=tag.id, name=tag.name) for tag in tags], + total=len(tags), + ) + return response.model_dump(mode="json"), 200 diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index 9bae862814a..08bc69bede4 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -6,14 +6,22 @@ deprecated in generated API docs so clients migrate toward the canonical paths. """ import json -from collections.abc import Mapping from contextlib import ExitStack from copy import deepcopy from typing import Annotated, Any, Literal, Self, override from uuid import UUID from flask import request, send_file -from pydantic import BaseModel, Field, GetJsonSchemaHandler, WithJsonSchema, field_validator, model_validator +from pydantic import ( + BaseModel, + Field, + GetJsonSchemaHandler, + ValidationError, + WithJsonSchema, + field_validator, + model_validator, +) +from pydantic.json_schema import SkipJsonSchema from sqlalchemy import desc, func, select from werkzeug.exceptions import Forbidden, NotFound @@ -26,9 +34,10 @@ from controllers.common.errors import ( TooManyFilesError, UnsupportedFileTypeError, ) -from controllers.common.fields import BinaryFileResponse, UrlResponse +from controllers.common.fields import UrlResponse from controllers.common.schema import ( query_params_from_model, + query_params_from_request, register_enum_models, register_response_schema_models, register_schema_models, @@ -56,6 +65,7 @@ from fields.document_fields import ( DocumentMetadataResponse, DocumentResponse, DocumentStatusListResponse, + normalize_enum, ) from libs.helper import dump_response from libs.login import current_user @@ -280,38 +290,44 @@ class DocumentAndBatchResponse(ResponseModel): batch: str +# Use SkipJsonSchema to support 3 metadata modes class DocumentDetailResponse(ResponseModel): id: str - position: int | None = None - data_source_type: str | None = None - data_source_info: dict[str, Any] | None = Field(default=None) + position: int | SkipJsonSchema[None] = None + data_source_type: str | SkipJsonSchema[None] = None + data_source_info: dict[str, Any] | SkipJsonSchema[None] = None dataset_process_rule_id: str | None = None - dataset_process_rule: dict[str, Any] | None = Field(default=None) - document_process_rule: dict[str, Any] | None = Field(default=None) - name: str | None = None - created_from: str | None = None - created_by: str | None = None - created_at: int | None = None + dataset_process_rule: dict[str, Any] | SkipJsonSchema[None] = None + document_process_rule: dict[str, Any] | SkipJsonSchema[None] = None + name: str | SkipJsonSchema[None] = None + created_from: str | SkipJsonSchema[None] = None + created_by: str | SkipJsonSchema[None] = None + created_at: int | SkipJsonSchema[None] = None tokens: int | None = None - indexing_status: str | None = None + indexing_status: str | SkipJsonSchema[None] = None completed_at: int | None = None updated_at: int | None = None indexing_latency: float | None = None error: str | None = None - enabled: bool | None = None + enabled: bool | SkipJsonSchema[None] = None disabled_at: int | None = None disabled_by: str | None = None - archived: bool | None = None + archived: bool | SkipJsonSchema[None] = None doc_type: str | None = None - doc_metadata: list[DocumentMetadataResponse] | None = None - segment_count: int | None = None - average_segment_length: float | None = None - hit_count: int | None = None + doc_metadata: list[DocumentMetadataResponse] | dict[str, Any] | None = None + segment_count: int | SkipJsonSchema[None] = None + average_segment_length: int | float | SkipJsonSchema[None] = None + hit_count: int | SkipJsonSchema[None] = None display_status: str | None = None - doc_form: str | None = None + doc_form: str | SkipJsonSchema[None] = None doc_language: str | None = None summary_index_status: str | None = None - need_summary: bool | None = None + need_summary: bool | SkipJsonSchema[None] = None + + @field_validator("data_source_type", "indexing_status", "display_status", "doc_form", mode="before") + @classmethod + def _normalize_enum_fields(cls, value: Any) -> Any: + return normalize_enum(value) register_enum_models(service_api_ns, RetrievalMethod) @@ -331,7 +347,6 @@ register_schema_models( ) register_response_schema_models( service_api_ns, - BinaryFileResponse, UrlResponse, DocumentResponse, DocumentAndBatchResponse, @@ -341,13 +356,13 @@ register_response_schema_models( ) -def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[str, object], int]: +def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Document, str]: """Create a document from text for both canonical and legacy routes.""" payload = DocumentTextCreatePayload.model_validate(service_api_ns.payload or {}) args = payload.model_dump(exclude_none=True) dataset_id_str = str(dataset_id) - tenant_id_str = str(tenant_id) + tenant_id_str = tenant_id dataset = db.session.scalar( select(Dataset).where(Dataset.tenant_id == tenant_id_str, Dataset.id == dataset_id_str).limit(1) ) @@ -405,10 +420,10 @@ def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[ raise ProviderNotInitializeError(ex.description) document = documents[0] - return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200 + return document, batch -def _update_document_by_text(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]: +def _update_document_by_text(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Document, str]: """Update a document from text for both canonical and legacy routes.""" payload = DocumentTextUpdate.model_validate(service_api_ns.payload or {}) dataset = db.session.scalar( @@ -464,7 +479,7 @@ def _update_document_by_text(tenant_id: str, dataset_id: UUID, document_id: UUID raise ProviderNotInitializeError(ex.description) document = documents[0] - return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200 + return document, batch @service_api_ns.route("/datasets//document/create-by-text") @@ -508,7 +523,8 @@ class DocumentAddByTextApi(DatasetApiResource): @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id: str, dataset_id: UUID): """Create document by text.""" - return _create_document_by_text(tenant_id=tenant_id, dataset_id=dataset_id) + document, batch = _create_document_by_text(tenant_id=tenant_id, dataset_id=dataset_id) + return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200 @service_api_ns.route("/datasets//document/create_by_text") @@ -540,7 +556,8 @@ class DeprecatedDocumentAddByTextApi(DatasetApiResource): @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id: str, dataset_id: UUID): """Create document by text through the deprecated underscore alias.""" - return _create_document_by_text(tenant_id=tenant_id, dataset_id=dataset_id) + document, batch = _create_document_by_text(tenant_id=tenant_id, dataset_id=dataset_id) + return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200 @service_api_ns.route("/datasets//documents//update-by-text") @@ -584,7 +601,8 @@ class DocumentUpdateByTextApi(DatasetApiResource): @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID): """Update document by text.""" - return _update_document_by_text(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id) + document, batch = _update_document_by_text(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id) + return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200 @service_api_ns.route("/datasets//documents//update_by_text") @@ -615,7 +633,8 @@ class DeprecatedDocumentUpdateByTextApi(DatasetApiResource): @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID): """Update document by text through the deprecated underscore alias.""" - return _update_document_by_text(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id) + document, batch = _update_document_by_text(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id) + return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200 @service_api_ns.route( @@ -763,10 +782,10 @@ class DocumentAddByFileApi(DatasetApiResource): return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200 -def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]: +def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Document, str]: """Update a document from an uploaded file for canonical and deprecated routes.""" dataset_id_str = str(dataset_id) - tenant_id_str = str(tenant_id) + tenant_id_str = tenant_id dataset = db.session.scalar( select(Dataset).where(Dataset.tenant_id == tenant_id_str, Dataset.id == dataset_id_str).limit(1) ) @@ -836,7 +855,7 @@ def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) document = documents[0] - return dump_response(DocumentAndBatchResponse, {"document": document, "batch": document.batch}), 200 + return document, document.batch @service_api_ns.route( @@ -890,7 +909,8 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource): @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID): """Update document by file through the deprecated file-update aliases.""" - return _update_document_by_file(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id) + document, batch = _update_document_by_file(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id) + return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200 @service_api_ns.route("/datasets//documents") @@ -923,7 +943,7 @@ class DocumentListApi(DatasetApiResource): def get(self, tenant_id, dataset_id: UUID): dataset_id_str = str(dataset_id) tenant_id = str(tenant_id) - query_params = DocumentListQuery.model_validate(request.args.to_dict()) + query_params = query_params_from_request(DocumentListQuery) dataset = db.session.scalar( select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str).limit(1) ) @@ -1014,6 +1034,7 @@ class DocumentBatchDownloadZipApi(DatasetApiResource): ) cleanup = stack.pop_all() response.call_on_close(cleanup.close) + # response-contract:ignore binary send_file response return response @@ -1142,7 +1163,7 @@ class DocumentDownloadApi(DatasetApiResource): if document.tenant_id != str(tenant_id): raise Forbidden("No permission.") - return {"url": DocumentService.get_document_download_url(document)} + return UrlResponse(url=DocumentService.get_document_download_url(document)).model_dump(mode="json") @service_api_ns.route("/datasets//documents/") @@ -1169,8 +1190,13 @@ class DocumentApi(DatasetApiResource): ) @service_api_ns.doc("get_document") @service_api_ns.doc(description="Get a specific document by ID") - @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."}) - @service_api_ns.doc(params=query_params_from_model(DocumentGetQuery)) + @service_api_ns.doc( + params={ + "dataset_id": "Knowledge base ID.", + "document_id": "Document ID.", + **query_params_from_model(DocumentGetQuery), + } + ) @service_api_ns.doc( responses={ 200: "Document retrieved successfully", @@ -1198,9 +1224,14 @@ class DocumentApi(DatasetApiResource): if document.tenant_id != str(tenant_id): raise Forbidden("No permission.") - metadata = request.args.get("metadata", "all") - if metadata not in self.METADATA_CHOICES: - raise InvalidMetadataError(f"Invalid metadata value: {metadata}") + try: + query_params = query_params_from_request(DocumentGetQuery) + except ValidationError as exc: + metadata = request.args.get("metadata", "all") + raise InvalidMetadataError(f"Invalid metadata value: {metadata}") from exc + metadata = query_params.metadata + response_include: set[str] | None = None + response_exclude: set[str] | None = None # Calculate summary_index_status if needed summary_index_status = None @@ -1213,8 +1244,10 @@ class DocumentApi(DatasetApiResource): ) if metadata == "only": + response_include = {"id", "doc_type", "doc_metadata"} response = {"id": document.id, "doc_type": document.doc_type, "doc_metadata": document.doc_metadata_details} elif metadata == "without": + response_exclude = {"doc_type", "doc_metadata"} dataset_process_rules = DatasetService.get_process_rules(dataset_id_str) document_process_rules = document.dataset_process_rule.to_dict() if document.dataset_process_rule else {} data_source_info = document.data_source_detail_dict @@ -1287,8 +1320,33 @@ class DocumentApi(DatasetApiResource): "need_summary": document.need_summary if document.need_summary is not None else False, } - return response + return DocumentDetailResponse.model_validate(response).model_dump( + mode="json", + include=response_include, + exclude=response_exclude, + ) + @service_api_ns.doc( + summary="Update Document by File", + description=( + "Update an existing document by uploading a new file. Re-triggers indexing — use the returned " + "`batch` ID with [Get Document Indexing Status](/api-reference/documents/" + "get-document-indexing-status) to track progress." + ), + tags=["Documents"], + responses={ + 200: "Document updated successfully.", + 400: ( + "- `too_many_files` : Only one file is allowed.\n" + "- `filename_not_exists_error` : The specified filename does not exist.\n" + "- `provider_not_initialize` : No valid model provider credentials found. Please go to " + "Settings -> Model Provider to complete your provider credentials.\n" + "- `invalid_param` : Knowledge base does not exist, external datasets not supported, " + "file too large, unsupported file type, or invalid doc_form (must be `text_model`, " + "`hierarchical_model`, or `qa_model`)." + ), + }, + ) @service_api_ns.doc("update_document_by_file") @service_api_ns.doc(description="Update an existing document by uploading a file") @service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_UPDATE_BY_FILE_PARAMS) @@ -1306,7 +1364,8 @@ class DocumentApi(DatasetApiResource): @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def patch(self, tenant_id: str, dataset_id: UUID, document_id: UUID): """Update document by file on the canonical document resource.""" - return _update_document_by_file(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id) + document, batch = _update_document_by_file(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id) + return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200 @service_api_ns.doc( summary="Delete Document", diff --git a/api/core/rag/entities/processing_entities.py b/api/core/rag/entities/processing_entities.py index 46360ec086f..0677e4014e8 100644 --- a/api/core/rag/entities/processing_entities.py +++ b/api/core/rag/entities/processing_entities.py @@ -1,7 +1,7 @@ from enum import StrEnum from typing import Annotated, Literal -from pydantic import BaseModel, Field, WithJsonSchema +from pydantic import AliasChoices, BaseModel, Field, WithJsonSchema class ParentMode(StrEnum): @@ -26,7 +26,14 @@ class PreProcessingRule(BaseModel): class Segmentation(BaseModel): - separator: str = Field(default="\n", description="Custom separator for splitting text.") + # TODO: there are internally mismatched / inconsistent naming + # between `separator`` and `delimiter` across the codebase. + # Taking `separator` as the canonical. + separator: str = Field( + default="\n", + description="Custom separator for splitting text.", + validation_alias=AliasChoices("separator", "delimiter"), + ) max_tokens: int = Field(description="Maximum token count per chunk.") chunk_overlap: int = Field(default=0, description="Token overlap between chunks.") diff --git a/api/core/rag/extractor/extract_processor.py b/api/core/rag/extractor/extract_processor.py index 4d11ebe5005..36d879427a5 100644 --- a/api/core/rag/extractor/extract_processor.py +++ b/api/core/rag/extractor/extract_processor.py @@ -1,7 +1,7 @@ import re import tempfile from pathlib import Path -from typing import Union +from typing import Literal, overload from urllib.parse import unquote from configs import dify_config @@ -40,10 +40,22 @@ USER_AGENT = ( class ExtractProcessor: + @overload + @classmethod + def load_from_upload_file( + cls, upload_file: UploadFile, return_text: Literal[True], is_automatic: bool = False + ) -> str: ... + + @overload + @classmethod + def load_from_upload_file( + cls, upload_file: UploadFile, return_text: Literal[False] = False, is_automatic: bool = False + ) -> list[Document]: ... + @classmethod def load_from_upload_file( cls, upload_file: UploadFile, return_text: bool = False, is_automatic: bool = False - ) -> Union[list[Document], str]: + ) -> list[Document] | str: extract_setting = ExtractSetting( datasource_type=DatasourceType.FILE, upload_file=upload_file, document_model="text_model" ) @@ -53,8 +65,16 @@ class ExtractProcessor: else: return cls.extract(extract_setting, is_automatic) + @overload @classmethod - def load_from_url(cls, url: str, return_text: bool = False) -> Union[list[Document], str]: + def load_from_url(cls, url: str, return_text: Literal[True]) -> str: ... + + @overload + @classmethod + def load_from_url(cls, url: str, return_text: Literal[False] = False) -> list[Document]: ... + + @classmethod + def load_from_url(cls, url: str, return_text: bool = False) -> list[Document] | str: response = remote_fetcher.make_request("GET", url, headers={"User-Agent": USER_AGENT}) with tempfile.TemporaryDirectory() as temp_dir: diff --git a/api/fields/dataset_fields.py b/api/fields/dataset_fields.py index ea506d2a7e4..f97f5b79460 100644 --- a/api/fields/dataset_fields.py +++ b/api/fields/dataset_fields.py @@ -1,22 +1,9 @@ from datetime import datetime -from flask_restx import fields from pydantic import Field, field_validator from fields.base import ResponseModel -from libs.helper import TimestampField, to_timestamp - -dataset_fields = { - "id": fields.String, - "name": fields.String, - "description": fields.String, - "permission": fields.String, - "data_source_type": fields.String, - "indexing_technique": fields.String, - "created_by": fields.String, - "created_at": TimestampField, - "permission_keys": fields.List(fields.String()), -} +from libs.helper import to_timestamp class DatasetMetadataResponse(ResponseModel): @@ -50,104 +37,6 @@ class DatasetMetadataActionResponse(ResponseModel): result: str -reranking_model_fields = {"reranking_provider_name": fields.String, "reranking_model_name": fields.String} - -keyword_setting_fields = {"keyword_weight": fields.Float} - -vector_setting_fields = { - "vector_weight": fields.Float, - "embedding_model_name": fields.String, - "embedding_provider_name": fields.String, -} - -weighted_score_fields = { - "weight_type": fields.String, - "keyword_setting": fields.Nested(keyword_setting_fields), - "vector_setting": fields.Nested(vector_setting_fields), -} - -dataset_retrieval_model_fields = { - "search_method": fields.String, - "reranking_enable": fields.Boolean, - "reranking_mode": fields.String, - "reranking_model": fields.Nested(reranking_model_fields), - "weights": fields.Nested(weighted_score_fields, allow_null=True), - "top_k": fields.Integer, - "score_threshold_enabled": fields.Boolean, - "score_threshold": fields.Float, -} - -dataset_summary_index_fields = { - "enable": fields.Boolean, - "model_name": fields.String, - "model_provider_name": fields.String, - "summary_prompt": fields.String, -} - -external_retrieval_model_fields = { - "top_k": fields.Integer, - "score_threshold": fields.Float, - "score_threshold_enabled": fields.Boolean, -} - -tag_fields = {"id": fields.String, "name": fields.String, "type": fields.String} - -external_knowledge_info_fields = { - "external_knowledge_id": fields.String, - "external_knowledge_api_id": fields.String, - "external_knowledge_api_name": fields.String, - "external_knowledge_api_endpoint": fields.String, -} - -doc_metadata_fields = {"id": fields.String, "name": fields.String, "type": fields.String} - -icon_info_fields = { - "icon_type": fields.String, - "icon": fields.String, - "icon_background": fields.String, - "icon_url": fields.String, -} - -dataset_detail_fields = { - "id": fields.String, - "name": fields.String, - "description": fields.String, - "provider": fields.String, - "permission": fields.String, - "data_source_type": fields.String, - "indexing_technique": fields.String, - "app_count": fields.Integer, - "document_count": fields.Integer, - "word_count": fields.Integer, - "created_by": fields.String, - "author_name": fields.String, - "created_at": TimestampField, - "updated_by": fields.String, - "updated_at": TimestampField, - "embedding_model": fields.String, - "embedding_model_provider": fields.String, - "embedding_available": fields.Boolean, - "retrieval_model_dict": fields.Nested(dataset_retrieval_model_fields), - "summary_index_setting": fields.Nested(dataset_summary_index_fields), - "tags": fields.List(fields.Nested(tag_fields)), - "doc_form": fields.String, - "external_knowledge_info": fields.Nested(external_knowledge_info_fields), - "external_retrieval_model": fields.Nested(external_retrieval_model_fields, allow_null=True), - "doc_metadata": fields.List(fields.Nested(doc_metadata_fields)), - "built_in_field_enabled": fields.Boolean, - "pipeline_id": fields.String, - "runtime_mode": fields.String, - "chunk_structure": fields.String, - "icon_info": fields.Nested(icon_info_fields), - "is_published": fields.Boolean, - "total_documents": fields.Integer, - "total_available_documents": fields.Integer, - "enable_api": fields.Boolean, - "is_multimodal": fields.Boolean, - "permission_keys": fields.List(fields.String()), -} - - class DatasetRerankingModelResponse(ResponseModel): reranking_provider_name: str | None = None reranking_model_name: str | None = None diff --git a/api/openapi/markdown/console-openapi.md b/api/openapi/markdown/console-openapi.md index b3a0b8a6a71..74f042f94b7 100644 --- a/api/openapi/markdown/console-openapi.md +++ b/api/openapi/markdown/console-openapi.md @@ -4882,7 +4882,7 @@ Create external knowledge dataset | Code | Description | Schema | | ---- | ----------- | ------ | -| 201 | External dataset created successfully | **application/json**: [DatasetDetail](#datasetdetail)
| +| 201 | External dataset created successfully | **application/json**: [DatasetDetailResponse](#datasetdetailresponse)
| | 400 | Invalid parameters | | | 403 | Permission denied | | @@ -4904,6 +4904,8 @@ Get external knowledge API templates | 200 | External API templates retrieved successfully | **application/json**: [ExternalKnowledgeApiListResponse](#externalknowledgeapilistresponse)
| ### [POST] /datasets/external-knowledge-api +Create external knowledge API template + #### Request Body | Required | Schema | @@ -4915,6 +4917,7 @@ Get external knowledge API templates | Code | Description | Schema | | ---- | ----------- | ------ | | 201 | External API template created successfully | **application/json**: [ExternalKnowledgeApiResponse](#externalknowledgeapiresponse)
| +| 403 | Permission denied | | ### [DELETE] /datasets/external-knowledge-api/{external_knowledge_api_id} #### Parameters @@ -4946,11 +4949,13 @@ Get external knowledge API template details | 404 | Template not found | | ### [PATCH] /datasets/external-knowledge-api/{external_knowledge_api_id} +Update external knowledge API template + #### Parameters | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| external_knowledge_api_id | path | | Yes | string (uuid) | +| external_knowledge_api_id | path | External knowledge API ID | Yes | string (uuid) | #### Request Body @@ -4963,6 +4968,7 @@ Get external knowledge API template details | Code | Description | Schema | | ---- | ----------- | ------ | | 200 | External API template updated successfully | **application/json**: [ExternalKnowledgeApiResponse](#externalknowledgeapiresponse)
| +| 404 | Template not found | | ### [GET] /datasets/external-knowledge-api/{external_knowledge_api_id}/use-check Check if external knowledge API is being used @@ -5007,7 +5013,7 @@ Initialize dataset with documents | Code | Description | Schema | | ---- | ----------- | ------ | -| 201 | Dataset initialized successfully | **application/json**: [DatasetAndDocumentResponse](#datasetanddocumentresponse)
| +| 200 | Dataset initialized successfully | **application/json**: [DatasetAndDocumentResponse](#datasetanddocumentresponse)
| | 400 | Invalid request parameters | | ### [GET] /datasets/metadata/built-in @@ -5043,7 +5049,7 @@ Get dataset document processing rules | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Process rules retrieved successfully | **application/json**: [OpaqueObjectResponse](#opaqueobjectresponse)
| +| 200 | Process rules retrieved successfully | **application/json**: [ProcessRuleResponse](#processruleresponse)
| ### [GET] /datasets/retrieval-setting Get dataset retrieval settings @@ -5164,7 +5170,7 @@ Get dataset auto disable logs | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Batch indexing estimate calculated successfully | **application/json**: [OpaqueObjectResponse](#opaqueobjectresponse)
| +| 200 | Indexing estimate calculated successfully | **application/json**: [IndexingEstimateResponse](#indexingestimateresponse)
| ### [GET] /datasets/{dataset_id}/batch/{batch}/indexing-status #### Parameters @@ -5252,9 +5258,9 @@ Download selected dataset documents as a single ZIP archive (upload-file only) #### Responses -| Code | Description | Schema | -| ---- | ----------- | ------ | -| 200 | ZIP archive generated successfully | **application/json**: [BinaryFileResponse](#binaryfileresponse)
| +| Code | Description | +| ---- | ----------- | +| 200 | ZIP archive downloaded successfully | ### [POST] /datasets/{dataset_id}/documents/generate-summary **Generate summary index for specified documents** @@ -5347,7 +5353,7 @@ Get document details | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Document retrieved successfully | **application/json**: [OpaqueObjectResponse](#opaqueobjectresponse)
| +| 200 | Document retrieved successfully | **application/json**: [DocumentDetailResponse](#documentdetailresponse)
| | 404 | Document not found | | ### [GET] /datasets/{dataset_id}/documents/{document_id}/download @@ -5380,7 +5386,7 @@ Estimate document indexing cost | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Indexing estimate calculated successfully | **application/json**: [OpaqueObjectResponse](#opaqueobjectresponse)
| +| 200 | Indexing estimate calculated successfully | **application/json**: [IndexingEstimateResponse](#indexingestimateresponse)
| | 400 | Document already finished | | | 404 | Document not found | | @@ -5451,7 +5457,7 @@ Update document metadata | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Document pipeline execution log retrieved successfully | **application/json**: [OpaqueObjectResponse](#opaqueobjectresponse)
| +| 200 | Pipeline execution log retrieved successfully | **application/json**: [DocumentPipelineExecutionLogResponse](#documentpipelineexecutionlogresponse)
| ### [PATCH] /datasets/{dataset_id}/documents/{document_id}/processing/pause **pause document** @@ -5774,6 +5780,7 @@ Returns: - generating: Number of summaries being generated - error: Number of summaries with errors - not_started: Number of segments without summary records + - timeout: Number of summaries that timed out - summaries: List of summary records with status and content preview #### Parameters @@ -5787,7 +5794,7 @@ Returns: | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Summary status retrieved successfully | **application/json**: [OpaqueObjectResponse](#opaqueobjectresponse)
| +| 200 | Summary status retrieved successfully | **application/json**: [DocumentSummaryStatusResponse](#documentsummarystatusresponse)
| | 404 | Document not found | | ### [GET] /datasets/{dataset_id}/documents/{document_id}/website-sync @@ -5841,7 +5848,7 @@ Test external knowledge retrieval for dataset | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | External hit testing completed successfully | **application/json**: [ExternalRetrievalTestResponse](#externalretrievaltestresponse)
| +| 200 | External hit testing completed successfully | **application/json**: [ExternalHitTestingResponse](#externalhittestingresponse)
| | 400 | Invalid parameters | | | 404 | Dataset not found | | @@ -8745,7 +8752,7 @@ Bedrock retrieval test (internal use only) | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Bedrock retrieval test completed | **application/json**: [ExternalRetrievalTestResponse](#externalretrievaltestresponse)
| +| 200 | Bedrock retrieval test completed | **application/json**: [BedrockRetrievalResponse](#bedrockretrievalresponse)
| ### [GET] /trial-apps/{app_id} **Get app detail** @@ -8760,7 +8767,7 @@ Bedrock retrieval test (internal use only) | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Success | **application/json**: [TrialAppDetailWithSite](#trialappdetailwithsite)
| +| 200 | App detail retrieved successfully | **application/json**: [AppDetailWithSite](#appdetailwithsite)
| ### [POST] /trial-apps/{app_id}/audio-to-text #### Parameters @@ -8790,9 +8797,9 @@ Bedrock retrieval test (internal use only) #### Responses -| Code | Description | Schema | -| ---- | ----------- | ------ | -| 200 | Success | **application/json**: [GeneratedAppResponse](#generatedappresponse)
| +| Code | Description | +| ---- | ----------- | +| 200 | Success | ### [POST] /trial-apps/{app_id}/completion-messages #### Parameters @@ -8809,9 +8816,9 @@ Bedrock retrieval test (internal use only) #### Responses -| Code | Description | Schema | -| ---- | ----------- | ------ | -| 200 | Success | **application/json**: [GeneratedAppResponse](#generatedappresponse)
| +| Code | Description | +| ---- | ----------- | +| 200 | Success | ### [GET] /trial-apps/{app_id}/datasets #### Parameters @@ -8827,7 +8834,7 @@ Bedrock retrieval test (internal use only) | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Success | **application/json**: [TrialDatasetList](#trialdatasetlist)
| +| 200 | Success | **application/json**: [TrialDatasetListResponse](#trialdatasetlistresponse)
| ### [GET] /trial-apps/{app_id}/messages/{message_id}/suggested-questions #### Parameters @@ -8907,7 +8914,7 @@ Returns the site configuration for the application including theme, icons, and t | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Success | **application/json**: [TrialWorkflow](#trialworkflow)
| +| 200 | Workflow detail retrieved successfully | **application/json**: [WorkflowResponse](#workflowresponse)
| ### [POST] /trial-apps/{app_id}/workflows/run **Run workflow** @@ -8926,9 +8933,9 @@ Returns the site configuration for the application including theme, icons, and t #### Responses -| Code | Description | Schema | -| ---- | ----------- | ------ | -| 200 | Success | **application/json**: [GeneratedAppResponse](#generatedappresponse)
| +| Code | Description | +| ---- | ----------- | +| 200 | Success | ### [POST] /trial-apps/{app_id}/workflows/tasks/{task_id}/stop **Stop workflow task** @@ -14061,6 +14068,21 @@ AppMCPServer Status Enum | query | string | | Yes | | retrieval_setting | [BedrockRetrievalSetting](#bedrockretrievalsetting) | | Yes | +#### BedrockRetrievalRecordResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| content | string | | No | +| metadata | object | | No | +| score | number | | Yes | +| title | string | | No | + +#### BedrockRetrievalResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| records | [ [BedrockRetrievalRecordResponse](#bedrockretrievalrecordresponse) ] | | Yes | + #### BedrockRetrievalSetting Retrieval settings for Amazon Bedrock knowledge base queries. @@ -14930,47 +14952,6 @@ Model class for provider custom model configuration. | permission | [PermissionEnum](#permissionenum) | | No | | provider | string,
**Default:** vendor | | No | -#### DatasetDetail - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| app_count | integer | | No | -| author_name | string | | No | -| built_in_field_enabled | boolean | | No | -| chunk_structure | string | | No | -| created_at | long | | No | -| created_by | string | | No | -| data_source_type | string | | No | -| description | string | | No | -| doc_form | string | | No | -| doc_metadata | [ [DatasetDocMetadata](#datasetdocmetadata) ] | | No | -| document_count | integer | | No | -| embedding_available | boolean | | No | -| embedding_model | string | | No | -| embedding_model_provider | string | | No | -| enable_api | boolean | | No | -| external_knowledge_info | [ExternalKnowledgeInfo](#externalknowledgeinfo) | | No | -| external_retrieval_model | [ExternalRetrievalModel](#externalretrievalmodel) | | No | -| icon_info | [DatasetIconInfo](#dataseticoninfo) | | No | -| id | string | | No | -| indexing_technique | string | | No | -| is_multimodal | boolean | | No | -| is_published | boolean | | No | -| name | string | | No | -| permission | string | | No | -| permission_keys | [ string ] | | No | -| pipeline_id | string | | No | -| provider | string | | No | -| retrieval_model_dict | [DatasetRetrievalModel](#datasetretrievalmodel) | | No | -| runtime_mode | string | | No | -| summary_index_setting | [_AnonymousInlineModel_b1954337d565](#_anonymousinlinemodel_b1954337d565) | | No | -| tags | [ [Tag](#tag) ] | | No | -| total_available_documents | integer | | No | -| total_documents | integer | | No | -| updated_at | long | | No | -| updated_by | string | | No | -| word_count | integer | | No | - #### DatasetDetailResponse | Name | Type | Description | Required | @@ -15056,14 +15037,6 @@ Model class for provider custom model configuration. | updated_by | string | | Yes | | word_count | integer | | Yes | -#### DatasetDocMetadata - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| id | string | | No | -| name | string | | No | -| type | string | | No | - #### DatasetDocMetadataResponse | Name | Type | Description | Required | @@ -15089,15 +15062,6 @@ Model class for provider custom model configuration. | score_threshold_enabled | boolean | | No | | top_k | integer | | Yes | -#### DatasetIconInfo - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| icon | string | | No | -| icon_background | string | | No | -| icon_type | string | | No | -| icon_url | string | | No | - #### DatasetIconInfoResponse | Name | Type | Description | Required | @@ -15107,12 +15071,6 @@ Model class for provider custom model configuration. | icon_type | string | | No | | icon_url | string | | No | -#### DatasetKeywordSetting - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| keyword_weight | number | | No | - #### DatasetKeywordSettingResponse | Name | Type | Description | Required | @@ -15250,13 +15208,6 @@ Model class for provider custom model configuration. | page | integer | | Yes | | total | integer | | Yes | -#### DatasetRerankingModel - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| reranking_model_name | string | | No | -| reranking_provider_name | string | | No | - #### DatasetRerankingModelResponse | Name | Type | Description | Required | @@ -15277,19 +15228,6 @@ Model class for provider custom model configuration. | name | string | | Yes | | permission | string | | No | -#### DatasetRetrievalModel - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| reranking_enable | boolean | | No | -| reranking_mode | string | | No | -| reranking_model | [DatasetRerankingModel](#datasetrerankingmodel) | | No | -| score_threshold | number | | No | -| score_threshold_enabled | boolean | | No | -| search_method | string | | No | -| top_k | integer | | No | -| weights | [DatasetWeightedScore](#datasetweightedscore) | | No | - #### DatasetRetrievalModelResponse | Name | Type | Description | Required | @@ -15339,14 +15277,6 @@ Model class for provider custom model configuration. | retrieval_model | object | | No | | summary_index_setting | object | | No | -#### DatasetVectorSetting - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| embedding_model_name | string | | No | -| embedding_provider_name | string | | No | -| vector_weight | number | | No | - #### DatasetVectorSettingResponse | Name | Type | Description | Required | @@ -15355,14 +15285,6 @@ Model class for provider custom model configuration. | embedding_provider_name | string | | No | | vector_weight | number | | No | -#### DatasetWeightedScore - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| keyword_setting | [DatasetKeywordSetting](#datasetkeywordsetting) | | No | -| vector_setting | [DatasetVectorSetting](#datasetvectorsetting) | | No | -| weight_type | string | | No | - #### DatasetWeightedScoreResponse | Name | Type | Description | Required | @@ -15598,6 +15520,42 @@ Request payload for bulk downloading documents as a zip archive. | ---- | ---- | ----------- | -------- | | document_ids | [ string (uuid) ] | List of document IDs to include in the ZIP download. | Yes | +#### DocumentDetailResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| archived | boolean | | No | +| average_segment_length | number | | No | +| completed_at | integer | | No | +| created_at | integer | | No | +| created_by | string | | No | +| created_from | string | | No | +| data_source_detail_dict | | | No | +| data_source_info | | | No | +| data_source_type | string | | No | +| dataset_process_rule | | | No | +| dataset_process_rule_id | string | | No | +| disabled_at | integer | | No | +| disabled_by | string | | No | +| display_status | string | | No | +| doc_form | string | | No | +| doc_language | string | | No | +| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No | +| doc_type | string | | No | +| document_process_rule | | | No | +| enabled | boolean | | No | +| error | string | | No | +| hit_count | integer | | No | +| id | string | | Yes | +| indexing_latency | number | | No | +| indexing_status | string | | No | +| name | string | | No | +| need_summary | boolean | | No | +| position | integer | | No | +| segment_count | integer | | No | +| tokens | integer | | No | +| updated_at | integer | | No | + #### DocumentMetadataOperation | Name | Type | Description | Required | @@ -15622,6 +15580,15 @@ Request payload for bulk downloading documents as a zip archive. | doc_metadata | | | No | | doc_type | string | | No | +#### DocumentPipelineExecutionLogResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| datasource_info | [JsonValue](#jsonvalue) | | No | +| datasource_node_id | string | | No | +| datasource_type | string | | No | +| input_data | [JsonValue](#jsonvalue) | | No | + #### DocumentRenamePayload | Name | Type | Description | Required | @@ -15686,6 +15653,14 @@ Request payload for bulk downloading documents as a zip archive. | stopped_at | integer | | Yes | | total_segments | integer | | No | +#### DocumentSummaryStatusResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| summaries | [ [SummaryEntryResponse](#summaryentryresponse) ] | | Yes | +| summary_status | [SummaryStatusResponse](#summarystatusresponse) | | Yes | +| total_segments | integer | | Yes | + #### DocumentWithSegmentsListResponse | Name | Type | Description | Required | @@ -16075,6 +16050,35 @@ Request payload for bulk downloading documents as a zip archive. | metadata_filtering_conditions | object | | No | | query | string | | Yes | +#### ExternalHitTestingQueryResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| content | string | | Yes | + +#### ExternalHitTestingRecordResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| content | string | | No | +| metadata | object | | No | +| score | number | | No | +| title | string | | No | + +#### ExternalHitTestingResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| query | [ExternalHitTestingQueryResponse](#externalhittestingqueryresponse) | | Yes | +| records | [ [ExternalHitTestingRecordResponse](#externalhittestingrecordresponse) ] | | Yes | + +#### ExternalKnowledgeApiBindingResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| id | string | | Yes | +| name | string | | Yes | + #### ExternalKnowledgeApiListResponse | Name | Type | Description | Required | @@ -16098,43 +16102,13 @@ Request payload for bulk downloading documents as a zip archive. | ---- | ---- | ----------- | -------- | | created_at | string | | Yes | | created_by | string | | Yes | -| dataset_bindings | [ [ExternalKnowledgeDatasetBindingResponse](#externalknowledgedatasetbindingresponse) ] | | No | +| dataset_bindings | [ [ExternalKnowledgeApiBindingResponse](#externalknowledgeapibindingresponse) ] | | Yes | | description | string | | Yes | | id | string | | Yes | | name | string | | Yes | -| settings | object | | No | +| settings | object | | Yes | | tenant_id | string | | Yes | -#### ExternalKnowledgeDatasetBindingResponse - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| id | string | | Yes | -| name | string | | Yes | - -#### ExternalKnowledgeInfo - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| external_knowledge_api_endpoint | string | | No | -| external_knowledge_api_id | string | | No | -| external_knowledge_api_name | string | | No | -| external_knowledge_id | string | | No | - -#### ExternalRetrievalModel - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| score_threshold | number | | No | -| score_threshold_enabled | boolean | | No | -| top_k | integer | | No | - -#### ExternalRetrievalTestResponse - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| ExternalRetrievalTestResponse | object
[ object ] | | | - #### FeatureModel | Name | Type | Description | Required | @@ -16639,27 +16613,15 @@ Query parameter for including secret variables in export. | info_list | object | | Yes | | process_rule | object | | Yes | -#### IndexingEstimatePreviewItemResponse - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| child_chunks | [ string ] | | No | -| content | string | | Yes | -| summary | string | | No | - -#### IndexingEstimateQaPreviewItemResponse - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| answer | string | | Yes | -| question | string | | Yes | - #### IndexingEstimateResponse | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| preview | [ [IndexingEstimatePreviewItemResponse](#indexingestimatepreviewitemresponse) ] | | Yes | -| qa_preview | [ [IndexingEstimateQaPreviewItemResponse](#indexingestimateqapreviewitemresponse) ] | | No | +| currency | string | | Yes | +| preview | [ [PreviewDetail](#previewdetail) ] | | Yes | +| qa_preview | [ [QAPreviewDetail](#qapreviewdetail) ] | | No | +| tokens | integer | | Yes | +| total_price | number
integer | | Yes | | total_segments | integer | | Yes | #### InfoList @@ -17181,10 +17143,30 @@ Metadata operation data | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| completion_params | object | | No | -| mode | [LLMMode](#llmmode) | | Yes | -| name | string | | Yes | -| provider | string | | Yes | +| agent_mode | [JSONValue](#jsonvalue) | | No | +| annotation_reply | [JSONValue](#jsonvalue) | | No | +| chat_prompt_config | [JSONValue](#jsonvalue) | | No | +| completion_prompt_config | [JSONValue](#jsonvalue) | | No | +| created_at | integer | | No | +| created_by | string | | No | +| dataset_configs | [JSONValue](#jsonvalue) | | No | +| dataset_query_variable | string | | No | +| external_data_tools | [JSONValue](#jsonvalue) | | No | +| file_upload | [JSONValue](#jsonvalue) | | No | +| model | [JSONValue](#jsonvalue) | | No | +| more_like_this | [JSONValue](#jsonvalue) | | No | +| opening_statement | string | | No | +| pre_prompt | string | | No | +| prompt_type | string | | No | +| retriever_resource | [JSONValue](#jsonvalue) | | No | +| sensitive_word_avoidance | [JSONValue](#jsonvalue) | | No | +| speech_to_text | [JSONValue](#jsonvalue) | | No | +| suggested_questions | [JSONValue](#jsonvalue) | | No | +| suggested_questions_after_answer | [JSONValue](#jsonvalue) | | No | +| text_to_speech | [JSONValue](#jsonvalue) | | No | +| updated_at | integer | | No | +| updated_by | string | | No | +| user_input_form | [JSONValue](#jsonvalue) | | No | #### ModelConfigPartial @@ -17582,12 +17564,6 @@ Coarse node-level status used by Inspector to pick a banner. | redirect_uri | string | | No | | refresh_token | string | | No | -#### OpaqueObjectResponse - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| OpaqueObjectResponse | object | | | - #### OutputErrorStrategy Per-output failure handling strategy. @@ -18429,6 +18405,14 @@ Dataset Process Rule Mode | ---- | ---- | ----------- | -------- | | ProcessRuleMode | string | Dataset Process Rule Mode | | +#### ProcessRuleResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| limits | object | | Yes | +| mode | [ProcessRuleMode](#processrulemode) | | Yes | +| rules | [Rule](#rule) | | No | + #### ProviderCredentialResponse | Name | Type | Description | Required | @@ -19552,6 +19536,28 @@ Default configuration for form inputs. | ---- | ---- | ----------- | -------- | | data | [ string ] | | Yes | +#### SummaryEntryResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| created_at | integer | | No | +| error | string | | No | +| segment_id | string | | Yes | +| segment_position | integer | | Yes | +| status | string | | Yes | +| summary_preview | string | | No | +| updated_at | integer | | No | + +#### SummaryStatusResponse + +| Name | Type | Description | Required | +| ---- | ---- | ----------- | -------- | +| completed | integer | | No | +| error | integer | | No | +| generating | integer | | No | +| not_started | integer | | No | +| timeout | integer | | No | + #### SwitchWorkspacePayload | Name | Type | Description | Required | @@ -19869,97 +19875,47 @@ Enum class for tool provider | ---- | ---- | ----------- | -------- | | tracing_provider | string | Tracing provider name | Yes | -#### TrialAppDetailWithSite +#### TrialDatasetListItemResponse | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| access_mode | string | | No | -| api_base_url | string | | No | -| created_at | long | | No | -| created_by | string | | No | -| deleted_tools | [ [TrialDeletedTool](#trialdeletedtool) ] | | No | -| description | string | | No | -| enable_api | boolean | | No | -| enable_site | boolean | | No | -| icon | string | | No | -| icon_background | string | | No | -| icon_type | string | | No | -| icon_url | string | | No | -| id | string | | No | -| max_active_requests | integer | | No | -| mode | string | | No | -| model_config | [TrialAppModelConfig](#trialappmodelconfig) | | No | -| name | string | | No | +| app_count | integer | | Yes | +| author_name | string | | Yes | +| built_in_field_enabled | boolean | | Yes | +| chunk_structure | string | | Yes | +| created_at | integer | | Yes | +| created_by | string | | Yes | +| data_source_type | string | | Yes | +| description | string | | Yes | +| doc_form | string | | Yes | +| doc_metadata | [ [DatasetDocMetadataResponse](#datasetdocmetadataresponse) ] | | Yes | +| document_count | integer | | Yes | +| embedding_available | boolean | | No | +| embedding_model | string | | Yes | +| embedding_model_provider | string | | Yes | +| enable_api | boolean | | Yes | +| external_knowledge_info | [DatasetExternalKnowledgeInfoResponse](#datasetexternalknowledgeinforesponse) | | No | +| external_retrieval_model | [DatasetExternalRetrievalModelResponse](#datasetexternalretrievalmodelresponse) | | Yes | +| icon_info | [DatasetIconInfoResponse](#dataseticoninforesponse) | | No | +| id | string | | Yes | +| indexing_technique | string | | Yes | +| is_multimodal | boolean | | Yes | +| is_published | boolean | | Yes | +| maintainer | string | | No | +| name | string | | Yes | +| permission | string | | Yes | | permission_keys | [ string ] | | No | -| site | [TrialSite](#trialsite) | | No | -| tags | [ [TrialTag](#trialtag) ] | | No | -| updated_at | long | | No | -| updated_by | string | | No | -| use_icon_as_answer_icon | boolean | | No | -| workflow | [TrialWorkflowPartial](#trialworkflowpartial) | | No | - -#### TrialAppModelConfig - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| agent_mode | object | | No | -| annotation_reply | object | | No | -| chat_prompt_config | object | | No | -| completion_prompt_config | object | | No | -| created_at | long | | No | -| created_by | string | | No | -| dataset_configs | object | | No | -| dataset_query_variable | string | | No | -| external_data_tools | [ object ] | | No | -| file_upload | object | | No | -| model | object | | No | -| more_like_this | object | | No | -| opening_statement | string | | No | -| pre_prompt | string | | No | -| prompt_type | string | | No | -| retriever_resource | object | | No | -| sensitive_word_avoidance | object | | No | -| speech_to_text | object | | No | -| suggested_questions | [ string ] | | No | -| suggested_questions_after_answer | object | | No | -| text_to_speech | object | | No | -| updated_at | long | | No | -| updated_by | string | | No | -| user_input_form | [ object ] | | No | - -#### TrialConversationVariable - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| description | string | | No | -| id | string | | No | -| name | string | | No | -| value | string
integer
number
boolean
object
[ object ] | | No | -| value_type | string | | No | - -#### TrialDataset - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| created_at | long | | No | -| created_by | string | | No | -| data_source_type | string | | No | -| description | string | | No | -| id | string | | No | -| indexing_technique | string | | No | -| name | string | | No | -| permission | string | | No | -| permission_keys | [ string ] | | No | - -#### TrialDatasetList - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| data | [ [TrialDataset](#trialdataset) ] | | No | -| has_more | boolean | | No | -| limit | integer | | No | -| page | integer | | No | -| total | integer | | No | +| pipeline_id | string | | Yes | +| provider | string | | Yes | +| retrieval_model_dict | [DatasetRetrievalModelResponse](#datasetretrievalmodelresponse) | | Yes | +| runtime_mode | string | | Yes | +| summary_index_setting | [DatasetSummaryIndexSettingResponse](#datasetsummaryindexsettingresponse) | | No | +| tags | [ [DatasetTagResponse](#datasettagresponse) ] | | Yes | +| total_available_documents | integer | | Yes | +| total_documents | integer | | Yes | +| updated_at | integer | | Yes | +| updated_by | string | | Yes | +| word_count | integer | | Yes | #### TrialDatasetListQuery @@ -19969,13 +19925,15 @@ Enum class for tool provider | limit | integer,
**Default:** 20 | Number of items per page | No | | page | integer,
**Default:** 1 | Page number | No | -#### TrialDeletedTool +#### TrialDatasetListResponse | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| provider_id | string | | No | -| tool_name | string | | No | -| type | string | | No | +| data | [ [TrialDatasetListItemResponse](#trialdatasetlistitemresponse) ] | | Yes | +| has_more | boolean | | Yes | +| limit | integer | | Yes | +| page | integer | | Yes | +| total | integer | | Yes | #### TrialModelsResponse @@ -19983,100 +19941,6 @@ Enum class for tool provider | ---- | ---- | ----------- | -------- | | trial_models | [ string ] | | Yes | -#### TrialPipelineVariable - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| allow_file_extension | [ string ] | | No | -| allow_file_upload_methods | [ string ] | | No | -| allowed_file_types | [ string ] | | No | -| belong_to_node_id | string | | No | -| default_value | string
integer
number
boolean
object
[ object ] | | No | -| label | string | | No | -| max_length | integer | | No | -| options | [ string ] | | No | -| placeholder | string | | No | -| required | boolean | | No | -| tooltips | string | | No | -| type | string | | No | -| unit | string | | No | -| variable | string | | No | - -#### TrialSimpleAccount - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| email | string | | No | -| id | string | | No | -| name | string | | No | - -#### TrialSite - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| access_token | string | | No | -| app_base_url | string | | No | -| chat_color_theme | string | | No | -| chat_color_theme_inverted | boolean | | No | -| code | string | | No | -| copyright | string | | No | -| created_at | long | | No | -| created_by | string | | No | -| custom_disclaimer | string | | No | -| customize_domain | string | | No | -| customize_token_strategy | string | | No | -| default_language | string | | No | -| description | string | | No | -| icon | string | | No | -| icon_background | string | | No | -| icon_type | string | | No | -| icon_url | string | | No | -| privacy_policy | string | | No | -| prompt_public | boolean | | No | -| show_workflow_steps | boolean | | No | -| title | string | | No | -| updated_at | long | | No | -| updated_by | string | | No | -| use_icon_as_answer_icon | boolean | | No | - -#### TrialTag - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| id | string | | No | -| name | string | | No | -| type | string | | No | - -#### TrialWorkflow - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| conversation_variables | [ [TrialConversationVariable](#trialconversationvariable) ] | | No | -| created_at | long | | No | -| created_by | [TrialSimpleAccount](#trialsimpleaccount) | | No | -| environment_variables | [ object ] | | No | -| features | object | | No | -| graph | object | | No | -| hash | string | | No | -| id | string | | No | -| marked_comment | string | | No | -| marked_name | string | | No | -| rag_pipeline_variables | [ [TrialPipelineVariable](#trialpipelinevariable) ] | | No | -| tool_published | boolean | | No | -| updated_at | long | | No | -| updated_by | [TrialSimpleAccount](#trialsimpleaccount) | | No | -| version | string | | No | - -#### TrialWorkflowPartial - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| created_at | long | | No | -| created_by | string | | No | -| id | string | | No | -| updated_at | long | | No | -| updated_by | string | | No | - #### TriggerOAuthAuthorizeResponse | Name | Type | Description | Required | @@ -21281,15 +21145,6 @@ Workflow tool configuration | id | string | | No | | name | string | | No | -#### _AnonymousInlineModel_b1954337d565 - -| Name | Type | Description | Required | -| ---- | ---- | ----------- | -------- | -| enable | boolean | | No | -| model_name | string | | No | -| model_provider_name | string | | No | -| summary_prompt | string | | No | - #### _MembersInRoleList | Name | Type | Description | Required | diff --git a/api/openapi/markdown/service-openapi.md b/api/openapi/markdown/service-openapi.md index 8fc5e75e3cf..1c09804ca5c 100644 --- a/api/openapi/markdown/service-openapi.md +++ b/api/openapi/markdown/service-openapi.md @@ -46,76 +46,6 @@ Deprecated legacy alias for creating a new document by providing text content. U | 401 | Unauthorized - invalid API token | | | 403 | Forbidden - dataset API access or workspace access denied | | -### [DELETE] /datasets/{dataset_id}/documents/{document_id} -**Delete Document** - -Permanently delete a document and all its chunks from the knowledge base. - -#### Parameters - -| Name | Located in | Description | Required | Schema | -| ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | -| document_id | path | Document ID. | Yes | string (uuid) | - -#### Responses - -| Code | Description | -| ---- | ----------- | -| 204 | Success. | -| 400 | `document_indexing` : Cannot delete document during indexing. | -| 401 | Unauthorized - invalid API token | -| 403 | `archived_document_immutable` : The archived document is not editable. | -| 404 | `not_found` : Document Not Exists. | - -### [GET] /datasets/{dataset_id}/documents/{document_id} -**Get Document** - -Retrieve detailed information about a specific document, including its indexing status, metadata, and processing statistics. - -#### Parameters - -| Name | Located in | Description | Required | Schema | -| ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | -| document_id | path | Document ID. | Yes | string (uuid) | -| metadata | query | `all` returns all fields including metadata. `only` returns only `id`, `doc_type`, and `doc_metadata`. `without` returns all fields except `doc_metadata`. | No | string,
**Available values:** "all", "only", "without",
**Default:** all | - -#### Responses - -| Code | Description | Schema | -| ---- | ----------- | ------ | -| 200 | Document details. The response shape varies based on the `metadata` query parameter. When `metadata` is `only`, only `id`, `doc_type`, and `doc_metadata` are returned. When `metadata` is `without`, `doc_type` and `doc_metadata` are omitted. | **application/json**: [DocumentDetailResponse](#documentdetailresponse)
| -| 400 | `invalid_metadata` : Invalid metadata value for the specified key. | | -| 401 | Unauthorized - invalid API token | | -| 403 | `forbidden` : No permission. | | -| 404 | `not_found` : Document not found. | | - -### [PATCH] /datasets/{dataset_id}/documents/{document_id} -Update an existing document by uploading a file - -#### Parameters - -| Name | Located in | Description | Required | Schema | -| ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | -| document_id | path | Document ID. | Yes | string (uuid) | - -#### Request Body - -| Required | Schema | -| -------- | ------ | -| No | **multipart/form-data**: { **"data"**: string, **"file"**: binary }
| - -#### Responses - -| Code | Description | Schema | -| ---- | ----------- | ------ | -| 200 | Document updated successfully | **application/json**: [DocumentAndBatchResponse](#documentandbatchresponse)
| -| 401 | Unauthorized - invalid API token | | -| 403 | Forbidden - dataset API access or workspace access denied | | -| 404 | Document not found | | - ### ~~[POST] /datasets/{dataset_id}/documents/{document_id}/update_by_text~~ ***DEPRECATED*** @@ -1439,7 +1369,9 @@ Retrieve detailed information about a specific document, including its indexing | 404 | `not_found` : Document not found. | | ### [PATCH] /datasets/{dataset_id}/documents/{document_id} -Update an existing document by uploading a file +**Update Document by File** + +Update an existing document by uploading a new file. Re-triggers indexing — use the returned `batch` ID with [Get Document Indexing Status](/api-reference/documents/get-document-indexing-status) to track progress. #### Parameters @@ -1458,7 +1390,8 @@ Update an existing document by uploading a file | Code | Description | Schema | | ---- | ----------- | ------ | -| 200 | Document updated successfully | **application/json**: [DocumentAndBatchResponse](#documentandbatchresponse)
| +| 200 | Document updated successfully. | **application/json**: [DocumentAndBatchResponse](#documentandbatchresponse)
| +| 400 | - `too_many_files` : Only one file is allowed. - `filename_not_exists_error` : The specified filename does not exist. - `provider_not_initialize` : No valid model provider credentials found. Please go to Settings -> Model Provider to complete your provider credentials. - `invalid_param` : Knowledge base does not exist, external datasets not supported, file too large, unsupported file type, or invalid doc_form (must be `text_model`, `hierarchical_model`, or `qa_model`). | | | 401 | Unauthorized - invalid API token | | | 403 | Forbidden - dataset API access or workspace access denied | | | 404 | Document not found | | @@ -2994,7 +2927,7 @@ Request payload for bulk downloading documents as a zip archive. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | | archived | boolean | | No | -| average_segment_length | number | | No | +| average_segment_length | integer
number | | No | | completed_at | integer | | No | | created_at | integer | | No | | created_by | string | | No | @@ -3008,7 +2941,7 @@ Request payload for bulk downloading documents as a zip archive. | display_status | string | | No | | doc_form | string | | No | | doc_language | string | | No | -| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No | +| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ]
object | | No | | doc_type | string | | No | | document_process_rule | object | | No | | enabled | boolean | | No | diff --git a/api/tests/unit_tests/controllers/console/datasets/test_datasets.py b/api/tests/unit_tests/controllers/console/datasets/test_datasets.py index 76a09558987..57388c10417 100644 --- a/api/tests/unit_tests/controllers/console/datasets/test_datasets.py +++ b/api/tests/unit_tests/controllers/console/datasets/test_datasets.py @@ -31,6 +31,7 @@ from controllers.console.datasets.datasets import ( DatasetUseCheckApi, ) from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError +from core.entities.knowledge_entities import IndexingEstimate from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.provider_manager import ProviderManager from core.rag.index_processor.constant.index_type import IndexStructureType @@ -1379,8 +1380,7 @@ class TestDatasetIndexingEstimateApi: mock_file = self._upload_file() - mock_response = MagicMock() - mock_response.model_dump.return_value = {"tokens": 100} + mock_response = IndexingEstimate(total_segments=100, preview=[]) with ( app.test_request_context("/"), @@ -1406,7 +1406,13 @@ class TestDatasetIndexingEstimateApi: response, status = method(api, "tenant-1") assert status == 200 - assert response == {"tokens": 100} + assert response == { + "tokens": 0, + "total_price": 0, + "currency": "USD", + "total_segments": 100, + "preview": [], + } def test_post_file_not_found(self, app: Flask): api = DatasetIndexingEstimateApi() diff --git a/api/tests/unit_tests/controllers/console/datasets/test_datasets_document.py b/api/tests/unit_tests/controllers/console/datasets/test_datasets_document.py index 0b4ce39bafb..f4e240f41b8 100644 --- a/api/tests/unit_tests/controllers/console/datasets/test_datasets_document.py +++ b/api/tests/unit_tests/controllers/console/datasets/test_datasets_document.py @@ -1,3 +1,4 @@ +import datetime import inspect from unittest.mock import MagicMock, patch @@ -34,6 +35,7 @@ from controllers.console.datasets.error import ( InvalidActionError, InvalidMetadataError, ) +from core.entities.knowledge_entities import IndexingEstimate from core.rag.index_processor.constant.index_type import IndexStructureType from models.dataset import Dataset from models.dataset import Document as DatasetDocument @@ -77,6 +79,46 @@ def make_serializable_document(**overrides): return document +def make_document_detail(**overrides): + attrs = { + "id": "doc-1", + "position": 1, + "data_source_type": "upload_file", + "data_source_info_dict": {"upload_file_id": "file-1"}, + "data_source_detail_dict": {}, + "dataset_process_rule_id": None, + "dataset_process_rule": None, + "name": "Document", + "created_from": "web", + "created_by": "u1", + "created_at": datetime.datetime(2024, 1, 1, tzinfo=datetime.UTC), + "tokens": 10, + "indexing_status": "completed", + "completed_at": None, + "updated_at": None, + "indexing_latency": None, + "error": None, + "enabled": True, + "disabled_at": None, + "disabled_by": None, + "archived": False, + "doc_type": "others", + "doc_metadata_details": [], + "segment_count": 0, + "average_segment_length": 0, + "hit_count": 0, + "display_status": "available", + "doc_form": "text_model", + "doc_language": "English", + "need_summary": False, + } + attrs.update(overrides) + document = MagicMock(spec_set=list(attrs)) + for name, value in attrs.items(): + setattr(document, name, value) + return document + + def make_dataset(**overrides): attrs = { "id": "ds-1", @@ -172,6 +214,42 @@ class TestGetProcessRuleApi: assert "rules" in response + def test_get_with_document_preserves_legacy_segmentation_delimiter(self, app: Flask, patch_tenant): + api = GetProcessRuleApi() + method = inspect.unwrap(api.get) + user, _ = patch_tenant + + document = MagicMock(dataset_id="ds-1") + process_rule = MagicMock( + mode="custom", + rules_dict={"segmentation": {"delimiter": "---", "max_tokens": 123}}, + ) + + with ( + app.test_request_context("/?document_id=doc-1"), + patch( + "controllers.console.datasets.datasets_document.db.get_or_404", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.db.session.scalar", + return_value=process_rule, + ), + ): + response = method(api, user) + + assert response["rules"]["segmentation"]["separator"] == "---" + assert response["rules"]["segmentation"]["max_tokens"] == 123 + assert "delimiter" not in response["rules"]["segmentation"] + def test_get_with_document_dataset_not_found(self, app: Flask, patch_tenant): api = GetProcessRuleApi() method = inspect.unwrap(api.get) @@ -413,7 +491,7 @@ class TestDocumentApi: method = inspect.unwrap(api.get) user, tenant_id = patch_tenant - document = MagicMock(dataset_process_rule=None) + document = make_document_detail() with ( app.test_request_context("/"), @@ -925,12 +1003,24 @@ class TestDocumentSummaryStatusApi: ), patch( "services.summary_index_service.SummaryIndexService.get_document_summary_status_detail", - return_value={"total_segments": 0}, + return_value={ + "total_segments": 1, + "summary_status": {"timeout": 1}, + "summaries": [ + { + "segment_id": "segment-1", + "segment_position": 1, + "status": "timeout", + } + ], + }, ), ): response, status = method(api, user, "ds-1", "doc-1") assert status == 200 + assert response["summary_status"]["timeout"] == 1 + assert response["summaries"][0]["status"] == "timeout" class TestDocumentIndexingEstimateApi: @@ -1102,7 +1192,7 @@ class TestDocumentBatchIndexingEstimateApi: patch.object(api, "get_batch_documents", return_value=[doc]), patch( "controllers.console.datasets.datasets_document.IndexingRunner.indexing_estimate", - return_value=MagicMock(model_dump=lambda: {"tokens": 2}), + return_value=IndexingEstimate(total_segments=2, preview=[]), ), ): resp, status = method(api, tenant_id, user, "ds-1", "batch-1") @@ -1131,7 +1221,7 @@ class TestDocumentBatchIndexingEstimateApi: patch.object(api, "get_batch_documents", return_value=[doc]), patch( "controllers.console.datasets.datasets_document.IndexingRunner.indexing_estimate", - return_value=MagicMock(model_dump=lambda: {"tokens": 1}), + return_value=IndexingEstimate(total_segments=1, preview=[]), ), ): resp, status = method(api, tenant_id, user, "ds-1", "batch-1") @@ -1313,7 +1403,7 @@ class TestDocumentApiMetadata: method = inspect.unwrap(api.get) user, tenant_id = patch_tenant - document = MagicMock(dataset_process_rule=None, doc_metadata_details=[]) + document = make_document_detail(doc_metadata_details=[]) with ( app.test_request_context("/?metadata=only"), @@ -1333,7 +1423,7 @@ class TestDocumentApiMetadata: method = inspect.unwrap(api.get) user, tenant_id = patch_tenant - document = MagicMock(dataset_process_rule=None) + document = make_document_detail() with ( app.test_request_context("/?metadata=without"), @@ -1610,7 +1700,7 @@ class TestDocumentIndexingEdgeCases: ), patch( "controllers.console.datasets.datasets_document.IndexingRunner.indexing_estimate", - return_value=MagicMock(model_dump=lambda: {"tokens": 5}), + return_value=IndexingEstimate(total_segments=5, preview=[]), ), ): response, status = method(api, tenant_id, user, "ds-1", "doc-1") diff --git a/api/tests/unit_tests/controllers/console/datasets/test_external.py b/api/tests/unit_tests/controllers/console/datasets/test_external.py index b7e16b91fb7..12671458f70 100644 --- a/api/tests/unit_tests/controllers/console/datasets/test_external.py +++ b/api/tests/unit_tests/controllers/console/datasets/test_external.py @@ -1,4 +1,6 @@ import inspect +from types import SimpleNamespace +from typing import Any from unittest.mock import MagicMock, PropertyMock, patch import pytest @@ -16,6 +18,7 @@ from controllers.console.datasets.external import ( ExternalDatasetCreateApi, ExternalKnowledgeHitTestingApi, ) +from extensions.ext_database import db from models.account import Account, TenantAccountRole from services.dataset_service import DatasetService from services.external_knowledge_service import ExternalDatasetService @@ -38,28 +41,183 @@ def current_user() -> Account: return user +def _external_api_dict(api_id: str = "api-1") -> dict: + return { + "id": api_id, + "tenant_id": "tenant-1", + "name": f"External API {api_id}", + "description": f"Description for {api_id}", + "settings": { + "endpoint": f"https://external.example.com/{api_id}", + "api_key": "secret", + "headers": {"X-Source": "unit-test"}, + "timeout": 30, + }, + "dataset_bindings": [ + {"id": f"dataset-{api_id}", "name": f"Dataset {api_id}"}, + ], + "created_by": "user-1", + "created_at": "2024-01-01T00:00:00", + } + + +def _external_api_object(api_id: str = "api-1") -> SimpleNamespace: + payload = _external_api_dict(api_id) + return SimpleNamespace( + **{ + **payload, + "dataset_bindings": [SimpleNamespace(**binding) for binding in payload["dataset_bindings"]], + } + ) + + +def _expected_dataset_detail_payload() -> dict[str, Any]: + return { + "id": "dataset-1", + "name": "Support knowledge", + "description": "External support articles", + "provider": "external", + "permission": "only_me", + "data_source_type": "external", + "indexing_technique": "economy", + "app_count": 2, + "document_count": 7, + "word_count": 2048, + "created_by": "user-1", + "author_name": "Test User", + "created_at": 1710000000, + "updated_by": "user-2", + "updated_at": 1710003600, + "embedding_model": None, + "embedding_model_provider": None, + "embedding_available": False, + "retrieval_model_dict": { + "search_method": "semantic_search", + "reranking_enable": False, + "reranking_mode": None, + "reranking_model": {"reranking_provider_name": None, "reranking_model_name": None}, + "weights": None, + "top_k": 4, + "score_threshold_enabled": True, + "score_threshold": 0.5, + }, + "summary_index_setting": { + "enable": True, + "model_name": "summary-model", + "model_provider_name": "provider-a", + "summary_prompt": "Summarize this.", + }, + "tags": [{"id": "tag-1", "name": "Support", "type": "knowledge"}], + "doc_form": "text_model", + "external_knowledge_info": { + "external_knowledge_id": "knowledge-1", + "external_knowledge_api_id": "api-1", + "external_knowledge_api_name": "External API api-1", + "external_knowledge_api_endpoint": "https://external.example.com/api-1", + }, + "external_retrieval_model": { + "top_k": 4, + "score_threshold": 0.5, + "score_threshold_enabled": True, + }, + "doc_metadata": [{"id": "metadata-1", "name": "source", "type": "string"}], + "built_in_field_enabled": True, + "pipeline_id": None, + "runtime_mode": "external", + "chunk_structure": "general", + "icon_info": { + "icon_type": "emoji", + "icon": "book", + "icon_background": "#FFF4ED", + "icon_url": None, + }, + "is_published": True, + "total_documents": 7, + "total_available_documents": 6, + "enable_api": True, + "is_multimodal": False, + "maintainer": None, + "permission_keys": [], + } + + +def _dataset_detail_object() -> SimpleNamespace: + payload = _expected_dataset_detail_payload() + return SimpleNamespace( + **{ + **payload, + "summary_index_setting": SimpleNamespace(**payload["summary_index_setting"]), + "tags": [SimpleNamespace(**tag) for tag in payload["tags"]], + "external_knowledge_info": SimpleNamespace(**payload["external_knowledge_info"]), + "external_retrieval_model": SimpleNamespace(**payload["external_retrieval_model"]), + "doc_metadata": [SimpleNamespace(**item) for item in payload["doc_metadata"]], + "icon_info": SimpleNamespace(**payload["icon_info"]), + } + ) + + class TestExternalApiTemplateListApi: def test_get_success(self, app: Flask): api = ExternalApiTemplateListApi() method = inspect.unwrap(api.get) - api_item = MagicMock() - api_item.to_dict.return_value = {"id": "1"} + api_item = _external_api_object("api-1") with ( - app.test_request_context("/?page=1&limit=20"), + app.test_request_context("/?page=2&limit=1&keyword=vector"), patch.object( ExternalDatasetService, "get_external_knowledge_apis", - return_value=([api_item], 1), + return_value=([api_item], 3), ) as get_external_knowledge_apis, ): resp, status = method(api, "tenant-1") assert status == 200 - assert resp["total"] == 1 - assert resp["data"][0]["id"] == "1" - get_external_knowledge_apis.assert_called_once_with(1, 20, "tenant-1", None) + assert resp == { + "data": [_external_api_dict("api-1")], + "has_more": True, + "limit": 1, + "total": 3, + "page": 2, + } + get_external_knowledge_apis.assert_called_once_with(2, 1, "tenant-1", "vector") + + def test_post_success_uses_validated_payload_and_returns_template(self, app: Flask, current_user: Account): + api = ExternalApiTemplateListApi() + method = inspect.unwrap(api.post) + + payload = { + "name": "Vendor Search", + "settings": { + "endpoint": "https://external.example.com/search", + "api_key": "secret", + "headers": {"X-Source": "unit-test"}, + "timeout": 30, + }, + } + created = _external_api_object("api-created") + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", new_callable=PropertyMock, return_value=payload), + patch.object(ExternalDatasetService, "validate_api_list") as validate_api_list, + patch.object( + ExternalDatasetService, + "create_external_knowledge_api", + return_value=created, + ) as create_external_knowledge_api, + ): + resp, status = method(api, "tenant-1", current_user) + + assert status == 201 + assert resp == _external_api_dict("api-created") + validate_api_list.assert_called_once_with(payload["settings"]) + create_external_knowledge_api.assert_called_once_with( + tenant_id="tenant-1", + user_id="user-1", + args=payload, + ) def test_post_forbidden(self, app: Flask, current_user: Account): current_user.role = TenantAccountRole.NORMAL @@ -97,6 +255,25 @@ class TestExternalApiTemplateListApi: class TestExternalApiTemplateApi: + def test_get_success_returns_template_contract(self, app: Flask): + api = ExternalApiTemplateApi() + method = inspect.unwrap(api.get) + template = _external_api_object("api-detail") + + with ( + app.test_request_context("/"), + patch.object( + ExternalDatasetService, + "get_external_knowledge_api", + return_value=template, + ) as get_external_knowledge_api, + ): + resp, status = method(api, "tenant-1", "api-detail") + + assert status == 200 + assert resp == _external_api_dict("api-detail") + get_external_knowledge_api.assert_called_once_with("api-detail", "tenant-1") + def test_get_not_found(self, app: Flask): api = ExternalApiTemplateApi() method = inspect.unwrap(api.get) @@ -112,6 +289,42 @@ class TestExternalApiTemplateApi: with pytest.raises(NotFound): method(api, "tenant-1", "api-id") + def test_patch_success_uses_validated_payload_and_returns_template(self, app: Flask, current_user: Account): + api = ExternalApiTemplateApi() + method = inspect.unwrap(api.patch) + + payload = { + "name": "Updated API", + "settings": { + "endpoint": "https://external.example.com/updated", + "api_key": "new-secret", + "headers": {"X-Version": "2"}, + }, + } + updated = _external_api_object("api-updated") + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", new_callable=PropertyMock, return_value=payload), + patch.object(ExternalDatasetService, "validate_api_list") as validate_api_list, + patch.object( + ExternalDatasetService, + "update_external_knowledge_api", + return_value=updated, + ) as update_external_knowledge_api, + ): + resp, status = method(api, "tenant-1", current_user, "api-updated") + + assert status == 200 + assert resp == _external_api_dict("api-updated") + validate_api_list.assert_called_once_with(payload["settings"]) + update_external_knowledge_api.assert_called_once_with( + tenant_id="tenant-1", + user_id="user-1", + external_knowledge_api_id="api-updated", + args=payload, + ) + def test_delete_forbidden(self, app: Flask, current_user: Account): current_user.role = TenantAccountRole.NORMAL @@ -149,45 +362,37 @@ class TestExternalDatasetCreateApi: method = inspect.unwrap(api.post) payload = { - "external_knowledge_api_id": "api", - "external_knowledge_id": "kid", - "name": "dataset", + "external_knowledge_api_id": "api-1", + "external_knowledge_id": "knowledge-1", + "name": "Support knowledge", + "description": "External support articles", + "external_retrieval_model": { + "top_k": 4, + "score_threshold": 0.5, + "score_threshold_enabled": True, + }, } - dataset = MagicMock() - - dataset.embedding_available = False - dataset.built_in_field_enabled = False - dataset.is_published = False - dataset.enable_api = False - dataset.enable_qa = False - dataset.enable_vector_store = False - dataset.vector_store_setting = None - dataset.is_multimodal = False - - dataset.retrieval_model_dict = {} - dataset.tags = [] - dataset.external_knowledge_info = None - dataset.external_retrieval_model = None - dataset.doc_metadata = [] - dataset.icon_info = None - dataset.permission_keys = [] - - dataset.summary_index_setting = MagicMock() - dataset.summary_index_setting.enable = False + dataset = _dataset_detail_object() with ( - app.test_request_context("/"), + app.test_request_context("/", json=payload), patch.object(type(console_ns), "payload", new_callable=PropertyMock, return_value=payload), patch.object( ExternalDatasetService, "create_external_dataset", return_value=dataset, - ), + ) as create_external_dataset, ): - _, status = method(api, "tenant-1", current_user) + resp, status = method(api, "tenant-1", current_user) assert status == 201 + assert resp == _expected_dataset_detail_payload() + create_external_dataset.assert_called_once_with( + tenant_id="tenant-1", + user_id="user-1", + args=payload, + ) def test_create_forbidden(self, app: Flask, current_user: Account): current_user.role = TenantAccountRole.NORMAL @@ -228,24 +433,58 @@ class TestExternalKnowledgeHitTestingApi: api = ExternalKnowledgeHitTestingApi() method = inspect.unwrap(api.post) - payload = {"query": "hello"} + payload = { + "query": "hello", + "external_retrieval_model": { + "top_k": 3, + "score_threshold": 0.25, + "score_threshold_enabled": True, + }, + "metadata_filtering_conditions": { + "logical_operator": "and", + "conditions": [{"name": "source", "comparison_operator": "contains", "value": "external"}], + }, + } dataset = MagicMock() + retrieve_response = { + "query": {"content": "hello"}, + "records": [ + { + "content": "answer", + "title": "doc", + "score": 0.9, + "metadata": {"source": "external", "page": 2}, + } + ], + } with ( - app.test_request_context("/"), + app.test_request_context("/", json=payload), patch.object(type(console_ns), "payload", new_callable=PropertyMock, return_value=payload), patch.object(DatasetService, "get_dataset", return_value=dataset), - patch.object(DatasetService, "check_dataset_permission"), + patch.object(DatasetService, "check_dataset_permission") as check_dataset_permission, + patch.object(HitTestingService, "hit_testing_args_check") as hit_testing_args_check, patch.object( HitTestingService, "external_retrieve", - return_value={"ok": True}, - ), + return_value=retrieve_response, + ) as external_retrieve, + patch("controllers.console.datasets.external.dump_response", side_effect=lambda _model, value: value), ): resp = method(api, current_user, "dataset-id") - assert resp["ok"] is True + assert resp == retrieve_response + check_dataset_permission.assert_called_once_with(dataset, current_user) + hit_testing_args_check.assert_called_once_with(payload) + external_retrieve.assert_called_once_with( + session=db.session, + dataset=dataset, + query="hello", + account=current_user, + external_retrieval_model=payload["external_retrieval_model"], + metadata_filtering_conditions=payload["metadata_filtering_conditions"], + ) class TestBedrockRetrievalApi: @@ -254,24 +493,44 @@ class TestBedrockRetrievalApi: method = inspect.unwrap(api.post) payload = { - "retrieval_setting": {}, - "query": "hello", - "knowledge_id": "kid", + "retrieval_setting": {"top_k": 5, "score_threshold": 0.72}, + "query": "hello bedrock", + "knowledge_id": "knowledge-base-1", + } + retrieval_response = { + "records": [ + { + "metadata": {"source": "bedrock", "uri": "s3://bucket/doc.txt"}, + "score": 0.8, + "title": "doc", + "content": "answer", + }, + { + "metadata": {"source": "bedrock", "uri": "s3://bucket/other.txt"}, + "score": 0.65, + "title": None, + "content": None, + }, + ] } with ( - app.test_request_context("/"), + app.test_request_context("/", json=payload), patch.object(type(console_ns), "payload", new_callable=PropertyMock, return_value=payload), patch.object( ExternalDatasetTestService, "knowledge_retrieval", - return_value={"ok": True}, - ), + return_value=retrieval_response, + ) as knowledge_retrieval, ): resp, status = method() assert status == 200 - assert resp["ok"] is True + assert resp == retrieval_response + retrieval_setting, query, knowledge_id = knowledge_retrieval.call_args.args + assert retrieval_setting.model_dump() == payload["retrieval_setting"] + assert query == "hello bedrock" + assert knowledge_id == "knowledge-base-1" class TestExternalApiTemplateListApiAdvanced: @@ -297,10 +556,10 @@ class TestExternalApiTemplateListApiAdvanced: api = ExternalApiTemplateListApi() method = inspect.unwrap(api.get) - templates = [MagicMock(id=f"api-{i}") for i in range(3)] + templates = [_external_api_object(f"api-{i}") for i in range(3)] with ( - app.test_request_context("/?page=1&limit=20"), + app.test_request_context("/?page=2&limit=3"), patch( "controllers.console.datasets.external.ExternalDatasetService.get_external_knowledge_apis", return_value=(templates, 25), @@ -309,9 +568,14 @@ class TestExternalApiTemplateListApiAdvanced: resp, status = method(api, "tenant-1") assert status == 200 - assert resp["total"] == 25 - assert len(resp["data"]) == 3 - get_external_knowledge_apis.assert_called_once_with(1, 20, "tenant-1", None) + assert resp == { + "data": [_external_api_dict(f"api-{i}") for i in range(3)], + "has_more": True, + "limit": 3, + "total": 25, + "page": 2, + } + get_external_knowledge_apis.assert_called_once_with(2, 3, "tenant-1", None) class TestExternalDatasetCreateApiAdvanced: @@ -374,15 +638,46 @@ class TestExternalKnowledgeHitTestingApiAdvanced: "controllers.console.datasets.external.DatasetService.get_dataset", return_value=dataset, ), - patch("controllers.console.datasets.external.DatasetService.check_dataset_permission"), + patch("controllers.console.datasets.external.DatasetService.check_dataset_permission") as check_permission, + patch("controllers.console.datasets.external.HitTestingService.hit_testing_args_check") as args_check, patch( "controllers.console.datasets.external.HitTestingService.external_retrieve", - return_value={"results": []}, - ), + return_value={ + "query": {"content": "test query"}, + "records": [ + { + "content": None, + "title": "metadata-only", + "score": None, + "metadata": {"status": "active"}, + } + ], + }, + ) as external_retrieve, ): resp = method(api, current_user, "ds-1") - assert resp["results"] == [] + assert resp == { + "query": {"content": "test query"}, + "records": [ + { + "content": None, + "title": "metadata-only", + "score": None, + "metadata": {"status": "active"}, + } + ], + } + check_permission.assert_called_once_with(dataset, current_user) + args_check.assert_called_once_with(payload) + external_retrieve.assert_called_once_with( + session=db.session, + dataset=dataset, + query="test query", + account=current_user, + external_retrieval_model={"type": "bm25"}, + metadata_filtering_conditions={"status": "active"}, + ) class TestBedrockRetrievalApiAdvanced: diff --git a/api/tests/unit_tests/controllers/console/explore/test_trial.py b/api/tests/unit_tests/controllers/console/explore/test_trial.py index be68a3beed6..2ac9fc978d8 100644 --- a/api/tests/unit_tests/controllers/console/explore/test_trial.py +++ b/api/tests/unit_tests/controllers/console/explore/test_trial.py @@ -1,4 +1,4 @@ -from inspect import unwrap as inspect_unwrap +from inspect import unwrap from io import BytesIO from typing import Any from unittest.mock import MagicMock, patch @@ -35,24 +35,16 @@ from models.model import AppMode from services.errors.conversation import ConversationNotExistsError from services.errors.llm import InvokeRateLimitError -unwrap: Any = inspect_unwrap - @pytest.fixture -def account() -> Account: - acc = Account(name="User", email="user@example.com") +def account(): + acc = MagicMock(spec=Account) acc.id = "u1" return acc -def _file_data() -> Any: - file_data: Any = BytesIO(b"fake audio data") - file_data.filename = "test.wav" - return file_data - - @pytest.fixture -def trial_app_chat() -> MagicMock: +def trial_app_chat(): app = MagicMock() app.id = "a-chat" app.mode = AppMode.CHAT @@ -60,7 +52,7 @@ def trial_app_chat() -> MagicMock: @pytest.fixture -def trial_app_completion() -> MagicMock: +def trial_app_completion(): app = MagicMock() app.id = "a-comp" app.mode = AppMode.COMPLETION @@ -68,7 +60,7 @@ def trial_app_completion() -> MagicMock: @pytest.fixture -def trial_app_workflow() -> MagicMock: +def trial_app_workflow(): app = MagicMock() app.id = "a-workflow" app.mode = AppMode.WORKFLOW @@ -76,7 +68,7 @@ def trial_app_workflow() -> MagicMock: @pytest.fixture -def valid_parameters() -> dict[str, object]: +def valid_parameters(): return { "user_input_form": [], "system_parameters": {}, @@ -92,13 +84,54 @@ def valid_parameters() -> dict[str, object]: } -def test_trial_workflow_uses_trial_scoped_simple_account_model() -> None: - assert module.simple_account_model.name == "TrialSimpleAccount" - assert hasattr(module.simple_account_model, "items") +def test_trial_workflow_registers_normalized_simple_account_response_model(): + assert "SimpleAccountResponse" in module.console_ns.models + + +def _response_model_name(entry: object) -> str: + assert isinstance(entry, tuple) + assert len(entry) >= 2 + model = entry[1] + name = getattr(model, "name", None) + assert isinstance(name, str) + return name + + +def test_trial_endpoints_keep_response_and_query_docs(): + untyped_generated_response_views = [ + module.TrialAppWorkflowRunApi.post, + module.TrialChatApi.post, + module.TrialCompletionApi.post, + ] + for view in untyped_generated_response_views: + apidoc = getattr(view, "__apidoc__", {}) + assert apidoc.get("responses", {})["200"] == ("Success", None, {}) + + cases = [ + (module.TrialMessageSuggestedQuestionApi.get, module.SuggestedQuestionsResponse.__name__), + (module.TrialChatAudioApi.post, module.AudioTranscriptResponse.__name__), + (module.TrialChatTextApi.post, module.AudioBinaryResponse.__name__), + (module.TrialSitApi.get, module.SiteResponse.__name__), + (module.TrialAppParameterApi.get, module.ParametersResponse.__name__), + (module.AppApi.get, module.AppDetailWithSite.__name__), + (module.AppWorkflowApi.get, module.WorkflowResponse.__name__), + (module.DatasetListApi.get, module.TrialDatasetListResponse.__name__), + ] + + for view, model_name in cases: + apidoc = getattr(view, "__apidoc__", {}) + responses = apidoc.get("responses", {}) + assert _response_model_name(responses["200"]) == model_name + + dataset_params = module.DatasetListApi.get.__apidoc__["params"] + assert dataset_params["ids"]["in"] == "query" + assert dataset_params["ids"]["type"] == "array" + assert dataset_params["page"]["default"] == 1 + assert dataset_params["limit"]["default"] == 20 class TestTrialAppWorkflowRunApi: - def test_not_workflow_app(self, app: Flask, account: Account) -> None: + def test_not_workflow_app(self, app: Flask, account): api = module.TrialAppWorkflowRunApi() method = unwrap(api.post) @@ -106,7 +139,7 @@ class TestTrialAppWorkflowRunApi: with pytest.raises(NotWorkflowAppError): method(api, account, MagicMock(mode=AppMode.CHAT)) - def test_success(self, app: Flask, trial_app_workflow: MagicMock, account: Account) -> None: + def test_success(self, app: Flask, trial_app_workflow, account): api = module.TrialAppWorkflowRunApi() method = unwrap(api.post) @@ -119,7 +152,7 @@ class TestTrialAppWorkflowRunApi: assert result is not None - def test_workflow_provider_not_init(self, app: Flask, trial_app_workflow: MagicMock, account: Account) -> None: + def test_workflow_provider_not_init(self, app: Flask, trial_app_workflow, account): api = module.TrialAppWorkflowRunApi() method = unwrap(api.post) @@ -134,7 +167,7 @@ class TestTrialAppWorkflowRunApi: with pytest.raises(ProviderNotInitializeError): method(api, account, trial_app_workflow) - def test_workflow_quota_exceeded(self, app: Flask, trial_app_workflow: MagicMock, account: Account) -> None: + def test_workflow_quota_exceeded(self, app: Flask, trial_app_workflow, account): api = module.TrialAppWorkflowRunApi() method = unwrap(api.post) @@ -149,7 +182,7 @@ class TestTrialAppWorkflowRunApi: with pytest.raises(ProviderQuotaExceededError): method(api, account, trial_app_workflow) - def test_workflow_model_not_support(self, app: Flask, trial_app_workflow: MagicMock, account: Account) -> None: + def test_workflow_model_not_support(self, app: Flask, trial_app_workflow, account): api = module.TrialAppWorkflowRunApi() method = unwrap(api.post) @@ -164,7 +197,7 @@ class TestTrialAppWorkflowRunApi: with pytest.raises(ProviderModelCurrentlyNotSupportError): method(api, account, trial_app_workflow) - def test_workflow_invoke_error(self, app: Flask, trial_app_workflow: MagicMock, account: Account) -> None: + def test_workflow_invoke_error(self, app: Flask, trial_app_workflow, account): api = module.TrialAppWorkflowRunApi() method = unwrap(api.post) @@ -179,7 +212,7 @@ class TestTrialAppWorkflowRunApi: with pytest.raises(CompletionRequestError): method(api, account, trial_app_workflow) - def test_workflow_rate_limit_error(self, app: Flask, trial_app_workflow: MagicMock, account: Account) -> None: + def test_workflow_rate_limit_error(self, app: Flask, trial_app_workflow, account): api = module.TrialAppWorkflowRunApi() method = unwrap(api.post) @@ -194,7 +227,7 @@ class TestTrialAppWorkflowRunApi: with pytest.raises(InvokeRateLimitHttpError): method(api, account, trial_app_workflow) - def test_workflow_value_error(self, app: Flask, trial_app_workflow: MagicMock, account: Account) -> None: + def test_workflow_value_error(self, app: Flask, trial_app_workflow, account): api = module.TrialAppWorkflowRunApi() method = unwrap(api.post) @@ -209,7 +242,7 @@ class TestTrialAppWorkflowRunApi: with pytest.raises(ValueError): method(api, account, trial_app_workflow) - def test_workflow_generic_exception(self, app: Flask, trial_app_workflow: MagicMock, account: Account) -> None: + def test_workflow_generic_exception(self, app: Flask, trial_app_workflow, account): api = module.TrialAppWorkflowRunApi() method = unwrap(api.post) @@ -226,7 +259,7 @@ class TestTrialAppWorkflowRunApi: class TestTrialChatApi: - def test_not_chat_app(self, app: Flask, account: Account) -> None: + def test_not_chat_app(self, app: Flask, account): api = module.TrialChatApi() method = unwrap(api.post) @@ -234,7 +267,7 @@ class TestTrialChatApi: with pytest.raises(NotChatAppError): method(api, account, MagicMock(mode="completion")) - def test_success(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_success(self, app: Flask, trial_app_chat, account): api = module.TrialChatApi() method = unwrap(api.post) @@ -247,7 +280,7 @@ class TestTrialChatApi: assert result is not None - def test_chat_conversation_not_exists(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_chat_conversation_not_exists(self, app: Flask, trial_app_chat, account): api = module.TrialChatApi() method = unwrap(api.post) @@ -262,7 +295,7 @@ class TestTrialChatApi: with pytest.raises(NotFound): method(api, account, trial_app_chat) - def test_chat_conversation_completed(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_chat_conversation_completed(self, app: Flask, trial_app_chat, account): api = module.TrialChatApi() method = unwrap(api.post) @@ -277,7 +310,7 @@ class TestTrialChatApi: with pytest.raises(ConversationCompletedError): method(api, account, trial_app_chat) - def test_chat_app_config_broken(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_chat_app_config_broken(self, app: Flask, trial_app_chat, account): api = module.TrialChatApi() method = unwrap(api.post) @@ -292,7 +325,7 @@ class TestTrialChatApi: with pytest.raises(AppUnavailableError): method(api, account, trial_app_chat) - def test_chat_provider_not_init(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_chat_provider_not_init(self, app: Flask, trial_app_chat, account): api = module.TrialChatApi() method = unwrap(api.post) @@ -307,7 +340,7 @@ class TestTrialChatApi: with pytest.raises(ProviderNotInitializeError): method(api, account, trial_app_chat) - def test_chat_quota_exceeded(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_chat_quota_exceeded(self, app: Flask, trial_app_chat, account): api = module.TrialChatApi() method = unwrap(api.post) @@ -322,7 +355,7 @@ class TestTrialChatApi: with pytest.raises(ProviderQuotaExceededError): method(api, account, trial_app_chat) - def test_chat_model_not_support(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_chat_model_not_support(self, app: Flask, trial_app_chat, account): api = module.TrialChatApi() method = unwrap(api.post) @@ -337,7 +370,7 @@ class TestTrialChatApi: with pytest.raises(ProviderModelCurrentlyNotSupportError): method(api, account, trial_app_chat) - def test_chat_invoke_error(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_chat_invoke_error(self, app: Flask, trial_app_chat, account): api = module.TrialChatApi() method = unwrap(api.post) @@ -352,7 +385,7 @@ class TestTrialChatApi: with pytest.raises(CompletionRequestError): method(api, account, trial_app_chat) - def test_chat_rate_limit_error(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_chat_rate_limit_error(self, app: Flask, trial_app_chat, account): api = module.TrialChatApi() method = unwrap(api.post) @@ -367,7 +400,7 @@ class TestTrialChatApi: with pytest.raises(InvokeRateLimitHttpError): method(api, account, trial_app_chat) - def test_chat_value_error(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_chat_value_error(self, app: Flask, trial_app_chat, account): api = module.TrialChatApi() method = unwrap(api.post) @@ -382,7 +415,7 @@ class TestTrialChatApi: with pytest.raises(ValueError): method(api, account, trial_app_chat) - def test_chat_generic_exception(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_chat_generic_exception(self, app: Flask, trial_app_chat, account): api = module.TrialChatApi() method = unwrap(api.post) @@ -399,7 +432,7 @@ class TestTrialChatApi: class TestTrialCompletionApi: - def test_not_completion_app(self, app: Flask, account: Account) -> None: + def test_not_completion_app(self, app: Flask, account): api = module.TrialCompletionApi() method = unwrap(api.post) @@ -407,7 +440,7 @@ class TestTrialCompletionApi: with pytest.raises(NotCompletionAppError): method(api, account, MagicMock(mode=AppMode.CHAT)) - def test_success(self, app: Flask, trial_app_completion: MagicMock, account: Account) -> None: + def test_success(self, app: Flask, trial_app_completion, account): api = module.TrialCompletionApi() method = unwrap(api.post) @@ -420,7 +453,7 @@ class TestTrialCompletionApi: assert result is not None - def test_completion_app_config_broken(self, app: Flask, trial_app_completion: MagicMock, account: Account) -> None: + def test_completion_app_config_broken(self, app: Flask, trial_app_completion, account): api = module.TrialCompletionApi() method = unwrap(api.post) @@ -435,7 +468,7 @@ class TestTrialCompletionApi: with pytest.raises(AppUnavailableError): method(api, account, trial_app_completion) - def test_completion_provider_not_init(self, app: Flask, trial_app_completion: MagicMock, account: Account) -> None: + def test_completion_provider_not_init(self, app: Flask, trial_app_completion, account): api = module.TrialCompletionApi() method = unwrap(api.post) @@ -450,7 +483,7 @@ class TestTrialCompletionApi: with pytest.raises(ProviderNotInitializeError): method(api, account, trial_app_completion) - def test_completion_quota_exceeded(self, app: Flask, trial_app_completion: MagicMock, account: Account) -> None: + def test_completion_quota_exceeded(self, app: Flask, trial_app_completion, account): api = module.TrialCompletionApi() method = unwrap(api.post) @@ -465,7 +498,7 @@ class TestTrialCompletionApi: with pytest.raises(ProviderQuotaExceededError): method(api, account, trial_app_completion) - def test_completion_model_not_support(self, app: Flask, trial_app_completion: MagicMock, account: Account) -> None: + def test_completion_model_not_support(self, app: Flask, trial_app_completion, account): api = module.TrialCompletionApi() method = unwrap(api.post) @@ -480,7 +513,7 @@ class TestTrialCompletionApi: with pytest.raises(ProviderModelCurrentlyNotSupportError): method(api, account, trial_app_completion) - def test_completion_invoke_error(self, app: Flask, trial_app_completion: MagicMock, account: Account) -> None: + def test_completion_invoke_error(self, app: Flask, trial_app_completion, account): api = module.TrialCompletionApi() method = unwrap(api.post) @@ -495,7 +528,7 @@ class TestTrialCompletionApi: with pytest.raises(CompletionRequestError): method(api, account, trial_app_completion) - def test_completion_rate_limit_error(self, app: Flask, trial_app_completion: MagicMock, account: Account) -> None: + def test_completion_rate_limit_error(self, app: Flask, trial_app_completion, account): api = module.TrialCompletionApi() method = unwrap(api.post) @@ -510,7 +543,7 @@ class TestTrialCompletionApi: with pytest.raises(InternalServerError): method(api, account, trial_app_completion) - def test_completion_value_error(self, app: Flask, trial_app_completion: MagicMock, account: Account) -> None: + def test_completion_value_error(self, app: Flask, trial_app_completion, account): api = module.TrialCompletionApi() method = unwrap(api.post) @@ -525,7 +558,7 @@ class TestTrialCompletionApi: with pytest.raises(ValueError): method(api, account, trial_app_completion) - def test_completion_generic_exception(self, app: Flask, trial_app_completion: MagicMock, account: Account) -> None: + def test_completion_generic_exception(self, app: Flask, trial_app_completion, account): api = module.TrialCompletionApi() method = unwrap(api.post) @@ -542,7 +575,7 @@ class TestTrialCompletionApi: class TestTrialMessageSuggestedQuestionApi: - def test_not_chat_app(self, app: Flask, account: Account) -> None: + def test_not_chat_app(self, app: Flask, account): api = module.TrialMessageSuggestedQuestionApi() method = unwrap(api.get) @@ -550,7 +583,7 @@ class TestTrialMessageSuggestedQuestionApi: with pytest.raises(NotChatAppError): method(api, account, MagicMock(mode="completion"), str(uuid4())) - def test_success(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_success(self, app: Flask, trial_app_chat, account): api = module.TrialMessageSuggestedQuestionApi() method = unwrap(api.get) @@ -566,7 +599,7 @@ class TestTrialMessageSuggestedQuestionApi: assert result == {"data": ["q1", "q2"]} - def test_conversation_not_exists(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_conversation_not_exists(self, app: Flask, trial_app_chat, account): api = module.TrialMessageSuggestedQuestionApi() method = unwrap(api.get) @@ -583,14 +616,14 @@ class TestTrialMessageSuggestedQuestionApi: class TestTrialAppParameterApi: - def test_app_unavailable(self) -> None: + def test_app_unavailable(self): api = module.TrialAppParameterApi() method = unwrap(api.get) with pytest.raises(AppUnavailableError): method(api, None) - def test_success_non_workflow(self, valid_parameters: dict[str, object]) -> None: + def test_success_non_workflow(self, valid_parameters): api = module.TrialAppParameterApi() method = unwrap(api.get) @@ -617,11 +650,12 @@ class TestTrialAppParameterApi: class TestTrialChatAudioApi: - def test_success(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_success(self, app: Flask, trial_app_chat, account): api = module.TrialChatAudioApi() method = unwrap(api.post) - file_data = _file_data() + file_data: Any = BytesIO(b"fake audio data") + file_data.filename = "test.wav" with ( app.test_request_context( @@ -634,11 +668,12 @@ class TestTrialChatAudioApi: assert result == {"text": "hello"} - def test_app_config_broken(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_app_config_broken(self, app: Flask, trial_app_chat, account): api = module.TrialChatAudioApi() method = unwrap(api.post) - file_data = _file_data() + file_data: Any = BytesIO(b"fake audio data") + file_data.filename = "test.wav" with ( app.test_request_context( @@ -653,11 +688,12 @@ class TestTrialChatAudioApi: with pytest.raises(module.AppUnavailableError): method(api, account, trial_app_chat) - def test_no_audio_uploaded(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_no_audio_uploaded(self, app: Flask, trial_app_chat, account): api = module.TrialChatAudioApi() method = unwrap(api.post) - file_data = _file_data() + file_data: Any = BytesIO(b"fake audio data") + file_data.filename = "test.wav" with ( app.test_request_context( @@ -672,11 +708,12 @@ class TestTrialChatAudioApi: with pytest.raises(module.NoAudioUploadedError): method(api, account, trial_app_chat) - def test_audio_too_large(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_audio_too_large(self, app: Flask, trial_app_chat, account): api = module.TrialChatAudioApi() method = unwrap(api.post) - file_data = _file_data() + file_data: Any = BytesIO(b"fake audio data") + file_data.filename = "test.wav" with ( app.test_request_context( @@ -691,11 +728,12 @@ class TestTrialChatAudioApi: with pytest.raises(module.AudioTooLargeError): method(api, account, trial_app_chat) - def test_unsupported_audio_type(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_unsupported_audio_type(self, app: Flask, trial_app_chat, account): api = module.TrialChatAudioApi() method = unwrap(api.post) - file_data = _file_data() + file_data: Any = BytesIO(b"fake audio data") + file_data.filename = "test.wav" with ( app.test_request_context( @@ -710,11 +748,12 @@ class TestTrialChatAudioApi: with pytest.raises(module.UnsupportedAudioTypeError): method(api, account, trial_app_chat) - def test_provider_not_support_tts(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_provider_not_support_tts(self, app: Flask, trial_app_chat, account): api = module.TrialChatAudioApi() method = unwrap(api.post) - file_data = _file_data() + file_data: Any = BytesIO(b"fake audio data") + file_data.filename = "test.wav" with ( app.test_request_context( @@ -729,11 +768,12 @@ class TestTrialChatAudioApi: with pytest.raises(module.ProviderNotSupportSpeechToTextError): method(api, account, trial_app_chat) - def test_provider_not_init(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_provider_not_init(self, app: Flask, trial_app_chat, account): api = module.TrialChatAudioApi() method = unwrap(api.post) - file_data = _file_data() + file_data: Any = BytesIO(b"fake audio data") + file_data.filename = "test.wav" with ( app.test_request_context( @@ -744,11 +784,12 @@ class TestTrialChatAudioApi: with pytest.raises(ProviderNotInitializeError): method(api, account, trial_app_chat) - def test_quota_exceeded(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_quota_exceeded(self, app: Flask, trial_app_chat, account): api = module.TrialChatAudioApi() method = unwrap(api.post) - file_data = _file_data() + file_data: Any = BytesIO(b"fake audio data") + file_data.filename = "test.wav" with ( app.test_request_context( @@ -761,7 +802,7 @@ class TestTrialChatAudioApi: class TestTrialChatTextApi: - def test_success(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_success(self, app: Flask, trial_app_chat, account): api = module.TrialChatTextApi() method = unwrap(api.post) @@ -774,7 +815,7 @@ class TestTrialChatTextApi: assert result == {"audio": "base64_data"} - def test_app_config_broken(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_app_config_broken(self, app: Flask, trial_app_chat, account): api = module.TrialChatTextApi() method = unwrap(api.post) @@ -789,7 +830,7 @@ class TestTrialChatTextApi: with pytest.raises(module.AppUnavailableError): method(api, account, trial_app_chat) - def test_provider_not_support(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_provider_not_support(self, app: Flask, trial_app_chat, account): api = module.TrialChatTextApi() method = unwrap(api.post) @@ -804,7 +845,7 @@ class TestTrialChatTextApi: with pytest.raises(module.ProviderNotSupportSpeechToTextError): method(api, account, trial_app_chat) - def test_audio_too_large(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_audio_too_large(self, app: Flask, trial_app_chat, account): api = module.TrialChatTextApi() method = unwrap(api.post) @@ -819,7 +860,7 @@ class TestTrialChatTextApi: with pytest.raises(module.AudioTooLargeError): method(api, account, trial_app_chat) - def test_no_audio_uploaded(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_no_audio_uploaded(self, app: Flask, trial_app_chat, account): api = module.TrialChatTextApi() method = unwrap(api.post) @@ -834,7 +875,7 @@ class TestTrialChatTextApi: with pytest.raises(module.NoAudioUploadedError): method(api, account, trial_app_chat) - def test_provider_not_init(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_provider_not_init(self, app: Flask, trial_app_chat, account): api = module.TrialChatTextApi() method = unwrap(api.post) @@ -845,7 +886,7 @@ class TestTrialChatTextApi: with pytest.raises(ProviderNotInitializeError): method(api, account, trial_app_chat) - def test_quota_exceeded(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_quota_exceeded(self, app: Flask, trial_app_chat, account): api = module.TrialChatTextApi() method = unwrap(api.post) @@ -856,7 +897,7 @@ class TestTrialChatTextApi: with pytest.raises(ProviderQuotaExceededError): method(api, account, trial_app_chat) - def test_model_not_support(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_model_not_support(self, app: Flask, trial_app_chat, account): api = module.TrialChatTextApi() method = unwrap(api.post) @@ -867,7 +908,7 @@ class TestTrialChatTextApi: with pytest.raises(ProviderModelCurrentlyNotSupportError): method(api, account, trial_app_chat) - def test_invoke_error(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_invoke_error(self, app: Flask, trial_app_chat, account): api = module.TrialChatTextApi() method = unwrap(api.post) @@ -880,7 +921,7 @@ class TestTrialChatTextApi: class TestTrialAppWorkflowTaskStopApi: - def test_not_workflow_app(self, app: Flask, trial_app_chat: MagicMock) -> None: + def test_not_workflow_app(self, app: Flask, trial_app_chat): api = module.TrialAppWorkflowTaskStopApi() method = unwrap(api.post) @@ -888,7 +929,7 @@ class TestTrialAppWorkflowTaskStopApi: with pytest.raises(NotWorkflowAppError): method(api, trial_app_chat, str(uuid4())) - def test_success(self, app: Flask, trial_app_workflow: MagicMock) -> None: + def test_success(self, app: Flask, trial_app_workflow, account): api = module.TrialAppWorkflowTaskStopApi() method = unwrap(api.post) @@ -906,7 +947,7 @@ class TestTrialAppWorkflowTaskStopApi: class TestTrialSitApi: - def test_no_site(self, app: Flask) -> None: + def test_no_site(self, app: Flask): api = module.TrialSitApi() method = unwrap(api.get) app_model = MagicMock() @@ -917,7 +958,7 @@ class TestTrialSitApi: with pytest.raises(Forbidden): method(api, app_model) - def test_archived_tenant(self, app: Flask) -> None: + def test_archived_tenant(self, app: Flask): api = module.TrialSitApi() method = unwrap(api.get) @@ -932,7 +973,7 @@ class TestTrialSitApi: with pytest.raises(Forbidden): method(api, app_model) - def test_success(self, app: Flask) -> None: + def test_success(self, app: Flask): api = module.TrialSitApi() method = unwrap(api.get) @@ -957,11 +998,12 @@ class TestTrialSitApi: class TestTrialChatAudioApiExceptionHandlers: - def test_provider_not_init(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_provider_not_init(self, app: Flask, trial_app_chat, account): api = module.TrialChatAudioApi() method = unwrap(api.post) - file_data = _file_data() + file_data: Any = BytesIO(b"fake audio data") + file_data.filename = "test.wav" with ( app.test_request_context( @@ -976,11 +1018,12 @@ class TestTrialChatAudioApiExceptionHandlers: with pytest.raises(ProviderNotInitializeError): method(api, account, trial_app_chat) - def test_quota_exceeded(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_quota_exceeded(self, app: Flask, trial_app_chat, account): api = module.TrialChatAudioApi() method = unwrap(api.post) - file_data = _file_data() + file_data: Any = BytesIO(b"fake audio data") + file_data.filename = "test.wav" with ( app.test_request_context( @@ -995,11 +1038,12 @@ class TestTrialChatAudioApiExceptionHandlers: with pytest.raises(ProviderQuotaExceededError): method(api, account, trial_app_chat) - def test_invoke_error(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_invoke_error(self, app: Flask, trial_app_chat, account): api = module.TrialChatAudioApi() method = unwrap(api.post) - file_data = _file_data() + file_data: Any = BytesIO(b"fake audio data") + file_data.filename = "test.wav" with ( app.test_request_context( @@ -1016,7 +1060,7 @@ class TestTrialChatAudioApiExceptionHandlers: class TestTrialChatTextApiExceptionHandlers: - def test_app_config_broken(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_app_config_broken(self, app: Flask, trial_app_chat, account): api = module.TrialChatTextApi() method = unwrap(api.post) @@ -1031,7 +1075,7 @@ class TestTrialChatTextApiExceptionHandlers: with pytest.raises(module.AppUnavailableError): method(api, account, trial_app_chat) - def test_unsupported_audio_type(self, app: Flask, trial_app_chat: MagicMock, account: Account) -> None: + def test_unsupported_audio_type(self, app: Flask, trial_app_chat, account): api = module.TrialChatTextApi() method = unwrap(api.post) diff --git a/api/tests/unit_tests/controllers/service_api/dataset/test_document.py b/api/tests/unit_tests/controllers/service_api/dataset/test_document.py index 16b54acd8c6..58d534ea27c 100644 --- a/api/tests/unit_tests/controllers/service_api/dataset/test_document.py +++ b/api/tests/unit_tests/controllers/service_api/dataset/test_document.py @@ -15,7 +15,10 @@ Focus on: - API endpoint business logic and error handling """ +import json import uuid +from dataclasses import dataclass, field +from datetime import UTC, datetime from unittest.mock import Mock, patch import pytest @@ -39,46 +42,151 @@ from controllers.service_api.dataset.document import ( ) from controllers.service_api.dataset.error import ArchivedDocumentImmutableError from core.rag.index_processor.constant.index_type import IndexStructureType -from models.enums import IndexingStatus +from models.dataset import Dataset, Document +from models.enums import DataSourceType, DocumentCreatedFrom, DocumentDocType, IndexingStatus from services.dataset_service import DocumentService from services.entities.knowledge_entities.knowledge_entities import ProcessRule, RetrievalModel -def make_serializable_document(**overrides: object) -> Mock: - attrs: dict[str, object] = { - "id": str(uuid.uuid4()), - "position": 1, - "data_source_type": "upload_file", - "data_source_info_dict": {"upload_file_id": "file-1"}, - "data_source_detail_dict": {}, - "dataset_process_rule_id": None, - "batch": "batch-1", - "name": "Test Document", - "created_from": "api", - "created_by": "user-1", - "created_at": None, - "tokens": None, - "indexing_status": "completed", - "error": None, - "enabled": True, - "disabled_at": None, - "disabled_by": None, - "archived": False, - "display_status": "available", - "word_count": None, - "hit_count": 0, - "doc_form": "text_model", - "doc_metadata_details": None, - "summary_index_status": None, - "need_summary": False, - } - attrs.update(overrides) - document = Mock(spec_set=list(attrs)) - for name, value in attrs.items(): +def _document_data_source_info() -> dict[str, str]: + return {"type": "website_crawl", "url": "https://example.com/docs", "title": "Docs"} + + +@dataclass +class _DocumentModelSessionStub: + scalar_values: list[object] = field(default_factory=list) + + def scalar(self, *args: object, **kwargs: object) -> object: + if self.scalar_values: + return self.scalar_values.pop(0) + return None + + def scalars(self, *args: object, **kwargs: object) -> object: + result = Mock() + result.all.return_value = [] + return result + + def get(self, *args: object, **kwargs: object) -> None: + return None + + +@dataclass +class _DocumentModelDbStub: + session: _DocumentModelSessionStub = field(default_factory=_DocumentModelSessionStub) + + +@dataclass +class _PaginationRecord: + items: list[Document] + total: int + + +@pytest.fixture +def mock_tenant() -> str: + return str(uuid.uuid4()) + + +@pytest.fixture +def mock_dataset(mock_tenant: str) -> Dataset: + return make_dataset(tenant_id=mock_tenant) + + +@pytest.fixture +def mock_document(mock_dataset: Dataset) -> Document: + return make_serializable_document() + + +def make_dataset(**overrides: object) -> Dataset: + dataset = Dataset( + id=str(uuid.uuid4()), + tenant_id=str(uuid.uuid4()), + name="Test Dataset", + data_source_type=DataSourceType.WEBSITE_CRAWL, + indexing_technique="economy", + created_by="user-1", + maintainer="user-1", + provider="vendor", + summary_index_setting=None, + ) + for name, value in overrides.items(): + setattr(dataset, name, value) + return dataset + + +def make_serializable_document(**overrides: object) -> Document: + data_source_info = overrides.pop("data_source_info", _document_data_source_info()) + document = Document( + id=str(uuid.uuid4()), + tenant_id=str(uuid.uuid4()), + dataset_id=str(uuid.uuid4()), + position=1, + data_source_type=DataSourceType.WEBSITE_CRAWL, + data_source_info=json.dumps(data_source_info), + dataset_process_rule_id=None, + batch="batch-1", + name="Test Document", + created_from=DocumentCreatedFrom.API, + created_by="user-1", + created_at=datetime(2021, 1, 1, tzinfo=UTC), + tokens=100, + indexing_status=IndexingStatus.COMPLETED, + completed_at=datetime(2021, 1, 1, 0, 0, 1, tzinfo=UTC), + updated_at=datetime(2021, 1, 1, 0, 0, 2, tzinfo=UTC), + indexing_latency=0.5, + error=None, + enabled=True, + disabled_at=None, + disabled_by=None, + archived=False, + doc_type=DocumentDocType.BOOK, + doc_metadata=None, + doc_form=IndexStructureType.PARAGRAPH_INDEX, + doc_language="English", + need_summary=False, + is_paused=False, + processing_started_at=datetime(2021, 1, 1, tzinfo=UTC), + parsing_completed_at=datetime(2021, 1, 1, 0, 0, 1, tzinfo=UTC), + cleaning_completed_at=datetime(2021, 1, 1, 0, 0, 2, tzinfo=UTC), + splitting_completed_at=datetime(2021, 1, 1, 0, 0, 3, tzinfo=UTC), + paused_at=None, + stopped_at=None, + word_count=None, + ) + document.summary_index_status = None # type: ignore[attr-defined] + for name, value in overrides.items(): setattr(document, name, value) return document +def _expected_document_response(document: Document) -> dict[str, object]: + return { + "id": document.id, + "position": document.position, + "data_source_type": document.data_source_type, + "data_source_info": document.data_source_info_dict, + "data_source_detail_dict": document.data_source_detail_dict, + "dataset_process_rule_id": document.dataset_process_rule_id, + "name": document.name, + "created_from": document.created_from, + "created_by": document.created_by, + "created_at": int(document.created_at.timestamp()) if document.created_at else None, + "tokens": document.tokens, + "indexing_status": document.indexing_status, + "error": document.error, + "enabled": document.enabled, + "disabled_at": int(document.disabled_at.timestamp()) if document.disabled_at else None, + "disabled_by": document.disabled_by, + "archived": document.archived, + "display_status": document.display_status, + "word_count": document.word_count, + "hit_count": 0, + "doc_form": document.doc_form, + "doc_metadata": [], + "summary_index_status": getattr(document, "summary_index_status", None), + "need_summary": document.need_summary, + } + + class TestDocumentTextCreatePayload: """Test suite for DocumentTextCreatePayload Pydantic model.""" @@ -263,23 +371,21 @@ class TestDocumentService: @patch.object(DocumentService, "get_document") def test_get_document_returns_document(self, mock_get: Mock) -> None: """Test get_document returns document object.""" - mock_doc = Mock() - mock_doc.id = str(uuid.uuid4()) - mock_doc.name = "Test Document" - mock_doc.indexing_status = "completed" - mock_get.return_value = mock_doc + document = make_serializable_document(name="Test Document", indexing_status="completed") + mock_get.return_value = document result = DocumentService.get_document(dataset_id="dataset_id", document_id="doc_id") assert result is not None + assert result == document assert result.name == "Test Document" assert result.indexing_status == "completed" @patch.object(DocumentService, "delete_document") def test_delete_document_called(self, mock_delete): """Test delete_document is called with document.""" - mock_doc = Mock() - DocumentService.delete_document(document=mock_doc) - mock_delete.assert_called_once_with(document=mock_doc) + document = make_serializable_document() + DocumentService.delete_document(document=document) + mock_delete.assert_called_once_with(document=document) class TestDocumentIndexingStatus: @@ -531,9 +637,9 @@ class TestDocumentServiceSaveValidation: # These tests call controller methods directly, bypassing the # ``DatasetApiResource.method_decorators`` (``validate_dataset_token``) by # invoking the *undecorated* method on the class instance. Every external -# dependency (``db``, service classes, ``marshal``, ``current_user``, …) is -# patched at the module where it is looked up so the real SQLAlchemy / Flask -# extensions are never touched. +# dependency (``db``, service classes, ``current_user``, …) is patched at the +# module where it is looked up so the real SQLAlchemy / Flask extensions are +# never touched. # ============================================================================= @@ -546,58 +652,33 @@ class TestDocumentApiGet: """ @pytest.fixture - def mock_doc_detail(self, mock_tenant: Mock) -> Mock: - """A document mock with every attribute ``DocumentApi.get`` reads.""" - doc = Mock() - doc.id = str(uuid.uuid4()) - doc.tenant_id = mock_tenant.id - doc.name = "test_document.txt" - doc.indexing_status = "completed" - doc.enabled = True - doc.doc_form = IndexStructureType.PARAGRAPH_INDEX - doc.doc_language = "English" - doc.doc_type = "book" - doc.doc_metadata_details = {"source": "upload"} - doc.position = 1 - doc.data_source_type = "upload_file" - doc.data_source_detail_dict = {"type": "upload_file"} - doc.dataset_process_rule_id = str(uuid.uuid4()) - doc.dataset_process_rule = None - doc.created_from = "api" - doc.created_by = str(uuid.uuid4()) - doc.created_at = Mock() - doc.created_at.timestamp.return_value = 1609459200 - doc.tokens = 100 - doc.completed_at = Mock() - doc.completed_at.timestamp.return_value = 1609459200 - doc.updated_at = Mock() - doc.updated_at.timestamp.return_value = 1609459200 - doc.indexing_latency = 0.5 - doc.error = None - doc.disabled_at = None - doc.disabled_by = None - doc.archived = False - doc.segment_count = 5 - doc.average_segment_length = 20 - doc.hit_count = 0 - doc.display_status = "available" - doc.need_summary = False - return doc + def mock_doc_detail(self, mock_tenant: str) -> Document: + """A concrete document record with every attribute ``DocumentApi.get`` reads.""" + return make_serializable_document( + id=str(uuid.uuid4()), + tenant_id=mock_tenant, + name="test_document.txt", + dataset_process_rule_id=str(uuid.uuid4()), + word_count=100, + ) @patch("controllers.service_api.dataset.document.DatasetService") @patch("controllers.service_api.dataset.document.DocumentService") def test_get_document_success_with_all_metadata( - self, mock_doc_svc: Mock, mock_dataset_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock + self, + mock_doc_svc: Mock, + mock_dataset_svc: Mock, + app: Flask, + mock_tenant: str, + mock_doc_detail: Document, ) -> None: """Test successful document retrieval with metadata='all'.""" # Arrange dataset_id = str(uuid.uuid4()) - mock_dataset = Mock() - mock_dataset.id = dataset_id - mock_dataset.summary_index_setting = None + mock_dataset = make_dataset(id=dataset_id, tenant_id=mock_tenant, summary_index_setting=None) mock_doc_svc.get_document.return_value = mock_doc_detail - mock_dataset_svc.get_process_rules.return_value = [] + mock_dataset_svc.get_process_rules.return_value = {"mode": "automatic", "rules": {}} # Act with app.test_request_context( @@ -605,23 +686,54 @@ class TestDocumentApiGet: method="GET", ): api = DocumentApi() - with patch.object(api, "get_dataset", return_value=mock_dataset): - response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) + with ( + patch.object(api, "get_dataset", return_value=mock_dataset), + patch("models.dataset.db", _DocumentModelDbStub(_DocumentModelSessionStub([5, 5, 5, 5, 0]))), + ): + response = api.get(tenant_id=mock_tenant, dataset_id=dataset_id, document_id=mock_doc_detail.id) # Assert - assert response["id"] == mock_doc_detail.id - assert response["name"] == mock_doc_detail.name - assert response["indexing_status"] == mock_doc_detail.indexing_status - assert "doc_type" in response - assert "doc_metadata" in response + assert response == { + "id": mock_doc_detail.id, + "position": 1, + "data_source_type": "website_crawl", + "data_source_info": {"type": "website_crawl", "url": "https://example.com/docs", "title": "Docs"}, + "dataset_process_rule_id": mock_doc_detail.dataset_process_rule_id, + "dataset_process_rule": {"mode": "automatic", "rules": {}}, + "document_process_rule": {}, + "name": "test_document.txt", + "created_from": "api", + "created_by": "user-1", + "created_at": 1609459200, + "tokens": 100, + "indexing_status": "completed", + "completed_at": 1609459201, + "updated_at": 1609459202, + "indexing_latency": 0.5, + "error": None, + "enabled": True, + "disabled_at": None, + "disabled_by": None, + "archived": False, + "doc_type": "book", + "doc_metadata": None, + "segment_count": 5, + "average_segment_length": 20, + "hit_count": 0, + "display_status": "available", + "doc_form": "text_model", + "doc_language": "English", + "summary_index_status": None, + "need_summary": False, + } + assert response["summary_index_status"] is None @patch("controllers.service_api.dataset.document.DocumentService") - def test_get_document_not_found(self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock) -> None: + def test_get_document_not_found(self, mock_doc_svc: Mock, app: Flask, mock_tenant: str) -> None: """Test 404 when document is not found.""" # Arrange dataset_id = str(uuid.uuid4()) - mock_dataset = Mock() - mock_dataset.id = dataset_id + mock_dataset = make_dataset(id=dataset_id, tenant_id=mock_tenant) mock_doc_svc.get_document.return_value = None @@ -633,17 +745,16 @@ class TestDocumentApiGet: api = DocumentApi() with patch.object(api, "get_dataset", return_value=mock_dataset): with pytest.raises(NotFound): - api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id="nonexistent") + api.get(tenant_id=mock_tenant, dataset_id=dataset_id, document_id="nonexistent") @patch("controllers.service_api.dataset.document.DocumentService") def test_get_document_forbidden_wrong_tenant( - self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock + self, mock_doc_svc: Mock, app: Flask, mock_tenant: str, mock_doc_detail: Document ) -> None: """Test 403 when document tenant doesn't match request tenant.""" # Arrange dataset_id = str(uuid.uuid4()) - mock_dataset = Mock() - mock_dataset.id = dataset_id + mock_dataset = make_dataset(id=dataset_id, tenant_id=mock_tenant) mock_doc_detail.tenant_id = "different-tenant-id" mock_doc_svc.get_document.return_value = mock_doc_detail @@ -656,18 +767,16 @@ class TestDocumentApiGet: api = DocumentApi() with patch.object(api, "get_dataset", return_value=mock_dataset): with pytest.raises(Forbidden): - api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) + api.get(tenant_id=mock_tenant, dataset_id=dataset_id, document_id=mock_doc_detail.id) @patch("controllers.service_api.dataset.document.DocumentService") def test_get_document_metadata_only( - self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock + self, mock_doc_svc: Mock, app: Flask, mock_tenant: str, mock_doc_detail: Document ) -> None: """Test document retrieval with metadata='only'.""" # Arrange dataset_id = str(uuid.uuid4()) - mock_dataset = Mock() - mock_dataset.id = dataset_id - mock_dataset.summary_index_setting = None + mock_dataset = make_dataset(id=dataset_id, tenant_id=mock_tenant, summary_index_setting=None) mock_doc_svc.get_document.return_value = mock_doc_detail @@ -678,28 +787,33 @@ class TestDocumentApiGet: ): api = DocumentApi() with patch.object(api, "get_dataset", return_value=mock_dataset): - response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) + response = api.get(tenant_id=mock_tenant, dataset_id=dataset_id, document_id=mock_doc_detail.id) # Assert — metadata='only' returns only id, doc_type, doc_metadata assert response["id"] == mock_doc_detail.id - assert "doc_type" in response - assert "doc_metadata" in response - assert "name" not in response + assert response == { + "id": mock_doc_detail.id, + "doc_type": mock_doc_detail.doc_type, + "doc_metadata": None, + } @patch("controllers.service_api.dataset.document.DatasetService") @patch("controllers.service_api.dataset.document.DocumentService") def test_get_document_metadata_without( - self, mock_doc_svc: Mock, mock_dataset_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock + self, + mock_doc_svc: Mock, + mock_dataset_svc: Mock, + app: Flask, + mock_tenant: str, + mock_doc_detail: Document, ) -> None: """Test document retrieval with metadata='without'.""" # Arrange dataset_id = str(uuid.uuid4()) - mock_dataset = Mock() - mock_dataset.id = dataset_id - mock_dataset.summary_index_setting = None + mock_dataset = make_dataset(id=dataset_id, tenant_id=mock_tenant, summary_index_setting=None) mock_doc_svc.get_document.return_value = mock_doc_detail - mock_dataset_svc.get_process_rules.return_value = [] + mock_dataset_svc.get_process_rules.return_value = {"mode": "automatic", "rules": {}} # Act with app.test_request_context( @@ -707,25 +821,60 @@ class TestDocumentApiGet: method="GET", ): api = DocumentApi() - with patch.object(api, "get_dataset", return_value=mock_dataset): - response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) + with ( + patch.object(api, "get_dataset", return_value=mock_dataset), + patch("models.dataset.db", _DocumentModelDbStub(_DocumentModelSessionStub([5, 5, 5, 5, 0]))), + ): + response = api.get(tenant_id=mock_tenant, dataset_id=dataset_id, document_id=mock_doc_detail.id) # Assert — metadata='without' omits doc_type / doc_metadata assert response["id"] == mock_doc_detail.id assert "doc_type" not in response assert "doc_metadata" not in response assert "name" in response + assert set(response) == { + "id", + "position", + "data_source_type", + "data_source_info", + "dataset_process_rule_id", + "dataset_process_rule", + "document_process_rule", + "name", + "created_from", + "created_by", + "created_at", + "tokens", + "indexing_status", + "completed_at", + "updated_at", + "indexing_latency", + "error", + "enabled", + "disabled_at", + "disabled_by", + "archived", + "segment_count", + "average_segment_length", + "hit_count", + "display_status", + "doc_form", + "doc_language", + "summary_index_status", + "need_summary", + } + assert response["error"] is None + assert response["disabled_at"] is None + assert response["disabled_by"] is None @patch("controllers.service_api.dataset.document.DocumentService") def test_get_document_invalid_metadata_value( - self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock + self, mock_doc_svc: Mock, app: Flask, mock_tenant: str, mock_doc_detail: Document ) -> None: """Test error when metadata parameter has invalid value.""" # Arrange dataset_id = str(uuid.uuid4()) - mock_dataset = Mock() - mock_dataset.id = dataset_id - mock_dataset.summary_index_setting = None + mock_dataset = make_dataset(id=dataset_id, tenant_id=mock_tenant, summary_index_setting=None) mock_doc_svc.get_document.return_value = mock_doc_detail @@ -737,7 +886,7 @@ class TestDocumentApiGet: api = DocumentApi() with patch.object(api, "get_dataset", return_value=mock_dataset): with pytest.raises(InvalidMetadataError): - api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id) + api.get(tenant_id=mock_tenant, dataset_id=dataset_id, document_id=mock_doc_detail.id) class TestDocumentApiDelete: @@ -762,8 +911,7 @@ class TestDocumentApiDelete: """Test successful document deletion.""" # Arrange dataset_id = str(uuid.uuid4()) - mock_dataset = Mock() - mock_dataset.id = dataset_id + mock_dataset = make_dataset(id=dataset_id, tenant_id=mock_tenant) mock_db.session.scalar.return_value = mock_dataset mock_doc_svc.get_document.return_value = mock_document @@ -777,7 +925,7 @@ class TestDocumentApiDelete: ): api = DocumentApi() response = self._call_delete( - api, tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_document.id + api, tenant_id=mock_tenant, dataset_id=dataset_id, document_id=mock_document.id ) # Assert @@ -791,8 +939,7 @@ class TestDocumentApiDelete: # Arrange dataset_id = str(uuid.uuid4()) document_id = str(uuid.uuid4()) - mock_dataset = Mock() - mock_dataset.id = dataset_id + mock_dataset = make_dataset(id=dataset_id, tenant_id=mock_tenant) mock_db.session.scalar.return_value = mock_dataset mock_doc_svc.get_document.return_value = None @@ -804,7 +951,7 @@ class TestDocumentApiDelete: ): api = DocumentApi() with pytest.raises(NotFound): - self._call_delete(api, tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=document_id) + self._call_delete(api, tenant_id=mock_tenant, dataset_id=dataset_id, document_id=document_id) @patch("controllers.service_api.dataset.document.DocumentService") @patch("controllers.service_api.dataset.document.db") @@ -812,8 +959,7 @@ class TestDocumentApiDelete: """Test ArchivedDocumentImmutableError when deleting archived document.""" # Arrange dataset_id = str(uuid.uuid4()) - mock_dataset = Mock() - mock_dataset.id = dataset_id + mock_dataset = make_dataset(id=dataset_id, tenant_id=mock_tenant) mock_db.session.scalar.return_value = mock_dataset mock_doc_svc.get_document.return_value = mock_document @@ -826,7 +972,7 @@ class TestDocumentApiDelete: ): api = DocumentApi() with pytest.raises(ArchivedDocumentImmutableError): - self._call_delete(api, tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_document.id) + self._call_delete(api, tenant_id=mock_tenant, dataset_id=dataset_id, document_id=mock_document.id) @patch("controllers.service_api.dataset.document.DocumentService") @patch("controllers.service_api.dataset.document.db") @@ -844,7 +990,7 @@ class TestDocumentApiDelete: ): api = DocumentApi() with pytest.raises(ValueError, match="Dataset does not exist."): - self._call_delete(api, tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=document_id) + self._call_delete(api, tenant_id=mock_tenant, dataset_id=dataset_id, document_id=document_id) class TestDocumentListApi: @@ -857,17 +1003,18 @@ class TestDocumentListApi: # Arrange mock_db.session.scalar.return_value = mock_dataset - mock_pagination = Mock() - mock_pagination.items = [ + documents = [ make_serializable_document( id="doc-1", name="Document 1", - doc_metadata_details=[{"id": "meta-1", "name": "amount", "type": "number", "value": 42}], + tenant_id=mock_tenant, + dataset_id=mock_dataset.id, + ), + make_serializable_document( + id="doc-2", name="Document 2", tenant_id=mock_tenant, dataset_id=mock_dataset.id ), - make_serializable_document(id="doc-2", name="Document 2"), ] - mock_pagination.total = 2 - mock_db.paginate.return_value = mock_pagination + mock_db.paginate.return_value = _PaginationRecord(items=documents, total=2) mock_doc_svc.enrich_documents_with_summary_index_status.return_value = None @@ -877,17 +1024,17 @@ class TestDocumentListApi: method="GET", ): api = DocumentListApi() - response = api.get(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id) + with patch("models.dataset.db", _DocumentModelDbStub(_DocumentModelSessionStub([0, 0]))): + response = api.get(tenant_id=mock_tenant, dataset_id=mock_dataset.id) # Assert - assert "data" in response - assert "total" in response - assert response["page"] == 1 - assert response["limit"] == 20 - assert response["total"] == 2 - assert response["data"][0]["id"] == "doc-1" - assert response["data"][0]["data_source_info"] == {"upload_file_id": "file-1"} - assert response["data"][0]["doc_metadata"][0]["value"] == 42 + assert response == { + "data": [_expected_document_response(documents[0]), _expected_document_response(documents[1])], + "has_more": False, + "limit": 20, + "total": 2, + "page": 1, + } assert "data_source_info_dict" not in response["data"][0] assert "doc_metadata_details" not in response["data"][0] @@ -904,7 +1051,7 @@ class TestDocumentListApi: ): api = DocumentListApi() with pytest.raises(NotFound): - api.get(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id) + api.get(tenant_id=mock_tenant, dataset_id=mock_dataset.id) class TestDocumentIndexingStatusApi: @@ -916,20 +1063,14 @@ class TestDocumentIndexingStatusApi: """Test successful indexing status retrieval.""" # Arrange batch_id = "batch_123" - mock_doc = Mock() - mock_doc.id = str(uuid.uuid4()) - mock_doc.is_paused = False - mock_doc.indexing_status = "completed" - mock_doc.processing_started_at = None - mock_doc.parsing_completed_at = None - mock_doc.cleaning_completed_at = None - mock_doc.splitting_completed_at = None - mock_doc.completed_at = None - mock_doc.paused_at = None - mock_doc.error = None - mock_doc.stopped_at = None + document = make_serializable_document( + id=str(uuid.uuid4()), + tenant_id=mock_tenant, + dataset_id=mock_dataset.id, + completed_at=datetime(2021, 1, 1, 0, 0, 4, tzinfo=UTC), + ) - mock_doc_svc.get_batch_documents.return_value = [mock_doc] + mock_doc_svc.get_batch_documents.return_value = [document] # scalar() called 3 times: dataset lookup, completed_segments count, total_segments count mock_db.session.scalar.side_effect = [mock_dataset, 5, 5] @@ -940,17 +1081,27 @@ class TestDocumentIndexingStatusApi: method="GET", ): api = DocumentIndexingStatusApi() - response = api.get(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id, batch=batch_id) + response = api.get(tenant_id=mock_tenant, dataset_id=mock_dataset.id, batch=batch_id) # Assert - assert "data" in response - assert len(response["data"]) == 1 - item = response["data"][0] - assert item["id"] == mock_doc.id - assert item["indexing_status"] == "completed" - assert item["completed_segments"] == 5 - assert item["total_segments"] == 5 - assert item["processing_started_at"] is None + assert response == { + "data": [ + { + "id": document.id, + "indexing_status": "completed", + "processing_started_at": 1609459200, + "parsing_completed_at": 1609459201, + "cleaning_completed_at": 1609459202, + "splitting_completed_at": 1609459203, + "completed_at": 1609459204, + "paused_at": None, + "error": None, + "stopped_at": None, + "completed_segments": 5, + "total_segments": 5, + } + ] + } @patch("controllers.service_api.dataset.document.db") def test_get_indexing_status_dataset_not_found(self, mock_db, app: Flask, mock_tenant, mock_dataset): @@ -966,7 +1117,7 @@ class TestDocumentIndexingStatusApi: ): api = DocumentIndexingStatusApi() with pytest.raises(NotFound): - api.get(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id, batch=batch_id) + api.get(tenant_id=mock_tenant, dataset_id=mock_dataset.id, batch=batch_id) @patch("controllers.service_api.dataset.document.DocumentService") @patch("controllers.service_api.dataset.document.db") @@ -986,7 +1137,7 @@ class TestDocumentIndexingStatusApi: ): api = DocumentIndexingStatusApi() with pytest.raises(NotFound): - api.get(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id, batch=batch_id) + api.get(tenant_id=mock_tenant, dataset_id=mock_dataset.id, batch=batch_id) class TestDocumentAddByTextApi: @@ -1049,7 +1200,7 @@ class TestDocumentAddByTextApi: ): """Test successful document creation by text.""" # Arrange — neutralise billing decorators - self._setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + self._setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_db.session.scalar.return_value = mock_dataset mock_dataset.indexing_technique = "economy" @@ -1080,16 +1231,14 @@ class TestDocumentAddByTextApi: headers={"Authorization": "Bearer test_token"}, ): api = DocumentAddByTextApi() - response, status = api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id) + with patch("models.dataset.db", _DocumentModelDbStub(_DocumentModelSessionStub([object(), 0]))): + response, status = api.post(tenant_id=mock_tenant, dataset_id=mock_dataset.id) # Assert - assert status == 200 - assert "document" in response - assert "batch" in response - assert response["batch"] == "batch_123" - assert response["document"]["id"] == "doc-create-text" - assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"} - assert response["document"]["doc_metadata"] == [] + assert (response, status) == ( + {"document": _expected_document_response(mock_doc), "batch": "batch_123"}, + 200, + ) assert "data_source_info_dict" not in response["document"] @patch("controllers.service_api.wraps.FeatureService") @@ -1100,7 +1249,7 @@ class TestDocumentAddByTextApi: ): """Test ValueError when dataset not found.""" # Arrange — neutralise billing decorators - self._setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + self._setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_db.session.scalar.return_value = None @@ -1113,7 +1262,7 @@ class TestDocumentAddByTextApi: ): api = DocumentAddByTextApi() with pytest.raises(ValueError, match="Dataset does not exist."): - api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id) + api.post(tenant_id=mock_tenant, dataset_id=mock_dataset.id) @patch("controllers.service_api.wraps.FeatureService") @patch("controllers.service_api.wraps.validate_and_get_api_token") @@ -1128,7 +1277,7 @@ class TestDocumentAddByTextApi: document creation paths instead of leaking a ``KeyError`` from the dumped payload dict. """ # Arrange — neutralise billing decorators - self._setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + self._setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_dataset.indexing_technique = None mock_db.session.scalar.return_value = mock_dataset @@ -1142,7 +1291,7 @@ class TestDocumentAddByTextApi: ): api = DocumentAddByTextApi() with pytest.raises(ValueError, match="indexing_technique is required."): - api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id) + api.post(tenant_id=mock_tenant, dataset_id=mock_dataset.id) class TestArchivedDocumentImmutableError: @@ -1235,9 +1384,8 @@ class TestDocumentUpdateByTextApiPost: mock_dataset, ): """Test successful document update by text.""" - _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_dataset.indexing_technique = "economy" - mock_dataset.latest_process_rule = Mock() mock_db.session.scalar.return_value = mock_dataset mock_current_user.id = "user-1" @@ -1257,17 +1405,17 @@ class TestDocumentUpdateByTextApiPost: headers={"Authorization": "Bearer test_token"}, ): api = DocumentUpdateByTextApi() - response, status = api.post( - tenant_id=mock_tenant.id, - dataset_id=mock_dataset.id, - document_id=doc_id, - ) + with patch("models.dataset.db", _DocumentModelDbStub(_DocumentModelSessionStub([object(), 0]))): + response, status = api.post( + tenant_id=mock_tenant, + dataset_id=mock_dataset.id, + document_id=doc_id, + ) - assert status == 200 - assert "document" in response - assert response["batch"] == "batch-1" - assert response["document"]["id"] == "doc-update-text" - assert response["document"]["doc_metadata"] == [] + assert (response, status) == ( + {"document": _expected_document_response(mock_document), "batch": "batch-1"}, + 200, + ) @patch("controllers.service_api.dataset.document.db") @patch("controllers.service_api.wraps.FeatureService") @@ -1282,7 +1430,7 @@ class TestDocumentUpdateByTextApiPost: mock_dataset, ): """Test ValueError when dataset not found.""" - _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_db.session.scalar.return_value = None doc_id = str(uuid.uuid4()) @@ -1295,7 +1443,7 @@ class TestDocumentUpdateByTextApiPost: api = DocumentUpdateByTextApi() with pytest.raises(ValueError, match="Dataset does not exist"): api.post( - tenant_id=mock_tenant.id, + tenant_id=mock_tenant, dataset_id=mock_dataset.id, document_id=doc_id, ) @@ -1327,12 +1475,10 @@ class TestDocumentAddByFileApiPost: mock_dataset, ): """Test successful document creation by file.""" - _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_dataset.provider = "vendor" mock_dataset.indexing_technique = "economy" mock_dataset.chunk_structure = None - mock_dataset.latest_process_rule = Mock() - mock_dataset.created_by_account = Mock() mock_db.session.scalar.return_value = mock_dataset mock_current_user.id = "user-1" @@ -1355,13 +1501,13 @@ class TestDocumentAddByFileApiPost: headers={"Authorization": "Bearer test_token"}, ): api = DocumentAddByFileApi() - response, status = api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id) + with patch("models.dataset.db", _DocumentModelDbStub(_DocumentModelSessionStub([object(), 0]))): + response, status = api.post(tenant_id=mock_tenant, dataset_id=mock_dataset.id) - assert status == 200 - assert response["batch"] == "batch-file" - assert response["document"]["id"] == "doc-create-file" - assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"} - assert response["document"]["doc_metadata"] == [] + assert (response, status) == ( + {"document": _expected_document_response(mock_document), "batch": "batch-file"}, + 200, + ) @patch("controllers.service_api.dataset.document.db") @patch("controllers.service_api.wraps.FeatureService") @@ -1376,7 +1522,7 @@ class TestDocumentAddByFileApiPost: mock_dataset, ): """Test ValueError when dataset not found.""" - _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_db.session.scalar.return_value = None from io import BytesIO @@ -1391,7 +1537,7 @@ class TestDocumentAddByFileApiPost: ): api = DocumentAddByFileApi() with pytest.raises(ValueError, match="Dataset does not exist"): - api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id) + api.post(tenant_id=mock_tenant, dataset_id=mock_dataset.id) @patch("controllers.service_api.dataset.document.db") @patch("controllers.service_api.wraps.FeatureService") @@ -1406,7 +1552,7 @@ class TestDocumentAddByFileApiPost: mock_dataset, ): """Test ValueError when dataset is external.""" - _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_dataset.provider = "external" mock_db.session.scalar.return_value = mock_dataset @@ -1422,7 +1568,7 @@ class TestDocumentAddByFileApiPost: ): api = DocumentAddByFileApi() with pytest.raises(ValueError, match="External datasets"): - api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id) + api.post(tenant_id=mock_tenant, dataset_id=mock_dataset.id) @patch("controllers.service_api.dataset.document.db") @patch("controllers.service_api.wraps.FeatureService") @@ -1439,7 +1585,7 @@ class TestDocumentAddByFileApiPost: """Test NoFileUploadedError when no file in request.""" from controllers.common.errors import NoFileUploadedError - _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_dataset.provider = "vendor" mock_dataset.indexing_technique = "economy" mock_dataset.chunk_structure = None @@ -1454,7 +1600,7 @@ class TestDocumentAddByFileApiPost: ): api = DocumentAddByFileApi() with pytest.raises(NoFileUploadedError): - api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id) + api.post(tenant_id=mock_tenant, dataset_id=mock_dataset.id) @patch("controllers.service_api.dataset.document.db") @patch("controllers.service_api.wraps.FeatureService") @@ -1469,7 +1615,7 @@ class TestDocumentAddByFileApiPost: mock_dataset, ): """Test ValueError when indexing_technique is missing.""" - _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_dataset.provider = "vendor" mock_dataset.indexing_technique = None mock_dataset.chunk_structure = None @@ -1487,7 +1633,7 @@ class TestDocumentAddByFileApiPost: ): api = DocumentAddByFileApi() with pytest.raises(ValueError, match="indexing_technique is required"): - api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id) + api.post(tenant_id=mock_tenant, dataset_id=mock_dataset.id) class TestDocumentUpdateByFileApiPatch: @@ -1512,8 +1658,11 @@ class TestDocumentUpdateByFileApiPatch: mock_dataset, ): """Test legacy POST aliases still dispatch while marked deprecated.""" - _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) - mock_update_document_by_file.return_value = ({"document": {"id": "doc-1"}, "batch": "batch-1"}, 200) + _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) + mock_update_document_by_file.return_value = ( + make_serializable_document(id="doc-1", batch="batch-1"), + "batch-1", + ) doc_id = str(uuid.uuid4()) with app.test_request_context( @@ -1522,16 +1671,19 @@ class TestDocumentUpdateByFileApiPatch: headers={"Authorization": "Bearer test_token"}, ): api = DeprecatedDocumentUpdateByFileApi() - response, status = api.post( - tenant_id=mock_tenant.id, - dataset_id=mock_dataset.id, - document_id=doc_id, - ) + with patch("models.dataset.db", _DocumentModelDbStub(_DocumentModelSessionStub([0]))): + response, status = api.post( + tenant_id=mock_tenant, + dataset_id=mock_dataset.id, + document_id=doc_id, + ) - assert status == 200 - assert response["batch"] == "batch-1" + assert (response, status) == ( + {"document": _expected_document_response(mock_update_document_by_file.return_value[0]), "batch": "batch-1"}, + 200, + ) mock_update_document_by_file.assert_called_once_with( - tenant_id=mock_tenant.id, + tenant_id=mock_tenant, dataset_id=mock_dataset.id, document_id=doc_id, ) @@ -1549,7 +1701,7 @@ class TestDocumentUpdateByFileApiPatch: mock_dataset, ): """Test ValueError when dataset not found.""" - _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_db.session.scalar.return_value = None from io import BytesIO @@ -1566,7 +1718,7 @@ class TestDocumentUpdateByFileApiPatch: api = DocumentApi() with pytest.raises(ValueError, match="Dataset does not exist"): api.patch( - tenant_id=mock_tenant.id, + tenant_id=mock_tenant, dataset_id=mock_dataset.id, document_id=doc_id, ) @@ -1584,7 +1736,7 @@ class TestDocumentUpdateByFileApiPatch: mock_dataset, ): """Test ValueError when dataset is external.""" - _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_dataset.provider = "external" mock_db.session.scalar.return_value = mock_dataset @@ -1602,7 +1754,7 @@ class TestDocumentUpdateByFileApiPatch: api = DocumentApi() with pytest.raises(ValueError, match="External datasets"): api.patch( - tenant_id=mock_tenant.id, + tenant_id=mock_tenant, dataset_id=mock_dataset.id, document_id=doc_id, ) @@ -1626,12 +1778,10 @@ class TestDocumentUpdateByFileApiPatch: mock_dataset, ): """Test successful document update by file.""" - _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id) + _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant) mock_dataset.indexing_technique = "economy" mock_dataset.provider = "vendor" mock_dataset.chunk_structure = None - mock_dataset.latest_process_rule = Mock() - mock_dataset.created_by_account = Mock() mock_db.session.scalar.return_value = mock_dataset mock_current_user.id = "user-1" @@ -1655,14 +1805,14 @@ class TestDocumentUpdateByFileApiPatch: headers={"Authorization": "Bearer test_token"}, ): api = DocumentApi() - response, status = api.patch( - tenant_id=mock_tenant.id, - dataset_id=mock_dataset.id, - document_id=doc_id, - ) + with patch("models.dataset.db", _DocumentModelDbStub(_DocumentModelSessionStub([object(), 0]))): + response, status = api.patch( + tenant_id=mock_tenant, + dataset_id=mock_dataset.id, + document_id=doc_id, + ) - assert status == 200 - assert "document" in response - assert response["batch"] == "batch-1" - assert response["document"]["id"] == "doc-update-file" - assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"} + assert (response, status) == ( + {"document": _expected_document_response(mock_document), "batch": "batch-1"}, + 200, + ) diff --git a/api/tests/unit_tests/controllers/service_api/dataset/test_rag_pipeline_file_upload_serialization.py b/api/tests/unit_tests/controllers/service_api/dataset/test_rag_pipeline_file_upload_serialization.py index a8dd8523acb..bfda3e23b32 100644 --- a/api/tests/unit_tests/controllers/service_api/dataset/test_rag_pipeline_file_upload_serialization.py +++ b/api/tests/unit_tests/controllers/service_api/dataset/test_rag_pipeline_file_upload_serialization.py @@ -2,9 +2,10 @@ Unit tests for Service API knowledge pipeline file-upload serialization. """ -import importlib.util from datetime import UTC, datetime -from pathlib import Path + +from controllers.service_api.dataset.rag_pipeline.rag_pipeline_workflow import PipelineUploadFileResponse +from libs.helper import dump_response class FakeUploadFile: @@ -17,21 +18,7 @@ class FakeUploadFile: created_at: datetime | None -def _load_serialize_upload_file(): - api_dir = Path(__file__).resolve().parents[5] - serializers_path = api_dir / "controllers" / "service_api" / "dataset" / "rag_pipeline" / "serializers.py" - - spec = importlib.util.spec_from_file_location("rag_pipeline_serializers", serializers_path) - assert spec - assert spec.loader - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) # type: ignore[attr-defined] - return module.serialize_upload_file - - def test_file_upload_created_at_is_isoformat_string(): - serialize_upload_file = _load_serialize_upload_file() - created_at = datetime(2026, 2, 8, 12, 0, 0, tzinfo=UTC) upload_file = FakeUploadFile() upload_file.id = "file-1" @@ -42,13 +29,11 @@ def test_file_upload_created_at_is_isoformat_string(): upload_file.created_by = "account-1" upload_file.created_at = created_at - result = serialize_upload_file(upload_file) + result = dump_response(PipelineUploadFileResponse, upload_file) assert result["created_at"] == created_at.isoformat() def test_file_upload_created_at_none_serializes_to_null(): - serialize_upload_file = _load_serialize_upload_file() - upload_file = FakeUploadFile() upload_file.id = "file-1" upload_file.name = "test.pdf" @@ -58,5 +43,5 @@ def test_file_upload_created_at_none_serializes_to_null(): upload_file.created_by = "account-1" upload_file.created_at = None - result = serialize_upload_file(upload_file) + result = dump_response(PipelineUploadFileResponse, upload_file) assert result["created_at"] is None diff --git a/packages/contracts/generated/api/console/agent/types.gen.ts b/packages/contracts/generated/api/console/agent/types.gen.ts index 43119c4f1f4..6e05107fc51 100644 --- a/packages/contracts/generated/api/console/agent/types.gen.ts +++ b/packages/contracts/generated/api/console/agent/types.gen.ts @@ -405,12 +405,30 @@ export type DeletedTool = { } export type ModelConfig = { - completion_params?: { - [key: string]: unknown - } - mode: LlmMode - name: string - provider: string + agent_mode?: JsonValue | null + annotation_reply?: JsonValue | null + chat_prompt_config?: JsonValue | null + completion_prompt_config?: JsonValue | null + created_at?: number | null + created_by?: string | null + dataset_configs?: JsonValue | null + dataset_query_variable?: string | null + external_data_tools?: JsonValue | null + file_upload?: JsonValue | null + model?: JsonValue | null + more_like_this?: JsonValue | null + opening_statement?: string | null + pre_prompt?: string | null + prompt_type?: string | null + retriever_resource?: JsonValue | null + sensitive_word_avoidance?: JsonValue | null + speech_to_text?: JsonValue | null + suggested_questions?: JsonValue | null + suggested_questions_after_answer?: JsonValue | null + text_to_speech?: JsonValue | null + updated_at?: number | null + updated_by?: string | null + user_input_form?: JsonValue | null } export type Site = { @@ -879,8 +897,6 @@ export type AgentAppPublishedReferenceResponse = { app_name: string } -export type LlmMode = 'chat' | 'completion' - export type AgentKind = 'dify_agent' export type AgentPublishedReferenceResponse = { diff --git a/packages/contracts/generated/api/console/agent/zod.gen.ts b/packages/contracts/generated/api/console/agent/zod.gen.ts index d7f5681ffc4..d7c29eb68e2 100644 --- a/packages/contracts/generated/api/console/agent/zod.gen.ts +++ b/packages/contracts/generated/api/console/agent/zod.gen.ts @@ -229,6 +229,36 @@ export const zJsonValue = z ]) .nullable() +/** + * ModelConfig + */ +export const zModelConfig = z.object({ + agent_mode: zJsonValue.nullish(), + annotation_reply: zJsonValue.nullish(), + chat_prompt_config: zJsonValue.nullish(), + completion_prompt_config: zJsonValue.nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + dataset_configs: zJsonValue.nullish(), + dataset_query_variable: z.string().nullish(), + external_data_tools: zJsonValue.nullish(), + file_upload: zJsonValue.nullish(), + model: zJsonValue.nullish(), + more_like_this: zJsonValue.nullish(), + opening_statement: z.string().nullish(), + pre_prompt: z.string().nullish(), + prompt_type: z.string().nullish(), + retriever_resource: zJsonValue.nullish(), + sensitive_word_avoidance: zJsonValue.nullish(), + speech_to_text: zJsonValue.nullish(), + suggested_questions: zJsonValue.nullish(), + suggested_questions_after_answer: zJsonValue.nullish(), + text_to_speech: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + user_input_form: zJsonValue.nullish(), +}) + /** * WorkflowPartial */ @@ -240,6 +270,43 @@ export const zWorkflowPartial = z.object({ updated_by: z.string().nullish(), }) +/** + * AgentAppDetailWithSite + */ +export const zAgentAppDetailWithSite = z.object({ + access_mode: z.string().nullish(), + active_config_is_published: z.boolean().optional().default(false), + api_base_url: z.string().nullish(), + app_id: z.string().nullish(), + bound_agent_id: z.string().nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + debug_conversation_id: z.string().nullish(), + deleted_tools: z.array(zDeletedTool).optional(), + description: z.string().nullish(), + enable_api: z.boolean(), + enable_site: z.boolean(), + icon: z.string().nullish(), + icon_background: z.string().nullish(), + icon_type: z.string().nullish(), + icon_url: z.string().nullable(), + id: z.string(), + maintainer: z.string().nullish(), + max_active_requests: z.int().nullish(), + mode: z.string(), + model_config: zModelConfig.nullish(), + name: z.string(), + permission_keys: z.array(z.string()).optional(), + role: z.string().nullish(), + site: zSite.nullish(), + tags: z.array(zTag).optional(), + tracing: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + use_icon_as_answer_icon: z.boolean().nullish(), + workflow: zWorkflowPartial.nullish(), +}) + /** * AgentConfigSnapshotSummaryResponse */ @@ -773,60 +840,6 @@ export const zAgentAppPagination = z.object({ total: z.int(), }) -/** - * LLMMode - * - * Enum class for large language model mode. - */ -export const zLlmMode = z.enum(['chat', 'completion']) - -/** - * ModelConfig - */ -export const zModelConfig = z.object({ - completion_params: z.record(z.string(), z.unknown()).optional(), - mode: zLlmMode, - name: z.string(), - provider: z.string(), -}) - -/** - * AgentAppDetailWithSite - */ -export const zAgentAppDetailWithSite = z.object({ - access_mode: z.string().nullish(), - active_config_is_published: z.boolean().optional().default(false), - api_base_url: z.string().nullish(), - app_id: z.string().nullish(), - bound_agent_id: z.string().nullish(), - created_at: z.int().nullish(), - created_by: z.string().nullish(), - debug_conversation_id: z.string().nullish(), - deleted_tools: z.array(zDeletedTool).optional(), - description: z.string().nullish(), - enable_api: z.boolean(), - enable_site: z.boolean(), - icon: z.string().nullish(), - icon_background: z.string().nullish(), - icon_type: z.string().nullish(), - icon_url: z.string().nullable(), - id: z.string(), - maintainer: z.string().nullish(), - max_active_requests: z.int().nullish(), - mode: z.string(), - model_config: zModelConfig.nullish(), - name: z.string(), - permission_keys: z.array(z.string()).optional(), - role: z.string().nullish(), - site: zSite.nullish(), - tags: z.array(zTag).optional(), - tracing: zJsonValue.nullish(), - updated_at: z.int().nullish(), - updated_by: z.string().nullish(), - use_icon_as_answer_icon: z.boolean().nullish(), - workflow: zWorkflowPartial.nullish(), -}) - /** * AgentKind * diff --git a/packages/contracts/generated/api/console/apps/types.gen.ts b/packages/contracts/generated/api/console/apps/types.gen.ts index 9e79518f3cd..956bfd2d6f9 100644 --- a/packages/contracts/generated/api/console/apps/types.gen.ts +++ b/packages/contracts/generated/api/console/apps/types.gen.ts @@ -1223,12 +1223,30 @@ export type DeletedTool = { } export type ModelConfig = { - completion_params?: { - [key: string]: unknown - } - mode: LlmMode - name: string - provider: string + agent_mode?: JsonValue | null + annotation_reply?: JsonValue | null + chat_prompt_config?: JsonValue | null + completion_prompt_config?: JsonValue | null + created_at?: number | null + created_by?: string | null + dataset_configs?: JsonValue | null + dataset_query_variable?: string | null + external_data_tools?: JsonValue | null + file_upload?: JsonValue | null + model?: JsonValue | null + more_like_this?: JsonValue | null + opening_statement?: string | null + pre_prompt?: string | null + prompt_type?: string | null + retriever_resource?: JsonValue | null + sensitive_word_avoidance?: JsonValue | null + speech_to_text?: JsonValue | null + suggested_questions?: JsonValue | null + suggested_questions_after_answer?: JsonValue | null + text_to_speech?: JsonValue | null + updated_at?: number | null + updated_by?: string | null + user_input_form?: JsonValue | null } export type Site = { @@ -1982,8 +2000,6 @@ export type ModelConfigPartial = { updated_by?: string | null } -export type LlmMode = 'chat' | 'completion' - export type Type = 'github' | 'marketplace' | 'package' export type Github = { diff --git a/packages/contracts/generated/api/console/apps/zod.gen.ts b/packages/contracts/generated/api/console/apps/zod.gen.ts index 9b86fda0a62..d63ecfcf5b2 100644 --- a/packages/contracts/generated/api/console/apps/zod.gen.ts +++ b/packages/contracts/generated/api/console/apps/zod.gen.ts @@ -889,6 +889,36 @@ export const zJsonValue = z */ export const zGeneratedAppResponse = zJsonValue +/** + * ModelConfig + */ +export const zModelConfig = z.object({ + agent_mode: zJsonValue.nullish(), + annotation_reply: zJsonValue.nullish(), + chat_prompt_config: zJsonValue.nullish(), + completion_prompt_config: zJsonValue.nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + dataset_configs: zJsonValue.nullish(), + dataset_query_variable: z.string().nullish(), + external_data_tools: zJsonValue.nullish(), + file_upload: zJsonValue.nullish(), + model: zJsonValue.nullish(), + more_like_this: zJsonValue.nullish(), + opening_statement: z.string().nullish(), + pre_prompt: z.string().nullish(), + prompt_type: z.string().nullish(), + retriever_resource: zJsonValue.nullish(), + sensitive_word_avoidance: zJsonValue.nullish(), + speech_to_text: zJsonValue.nullish(), + suggested_questions: zJsonValue.nullish(), + suggested_questions_after_answer: zJsonValue.nullish(), + text_to_speech: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + user_input_form: zJsonValue.nullish(), +}) + /** * WorkflowPartial */ @@ -900,6 +930,66 @@ export const zWorkflowPartial = z.object({ updated_by: z.string().nullish(), }) +/** + * AppDetailWithSite + */ +export const zAppDetailWithSite = z.object({ + access_mode: z.string().nullish(), + api_base_url: z.string().nullish(), + app_id: z.string().nullish(), + bound_agent_id: z.string().nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + deleted_tools: z.array(zDeletedTool).optional(), + description: z.string().nullish(), + enable_api: z.boolean(), + enable_site: z.boolean(), + icon: z.string().nullish(), + icon_background: z.string().nullish(), + icon_type: z.string().nullish(), + icon_url: z.string().nullable(), + id: z.string(), + maintainer: z.string().nullish(), + max_active_requests: z.int().nullish(), + mode: z.string(), + model_config: zModelConfig.nullish(), + name: z.string(), + permission_keys: z.array(z.string()).optional(), + site: zSite.nullish(), + tags: z.array(zTag).optional(), + tracing: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + use_icon_as_answer_icon: z.boolean().nullish(), + workflow: zWorkflowPartial.nullish(), +}) + +/** + * AppDetail + */ +export const zAppDetail = z.object({ + access_mode: z.string().nullish(), + app_model_config: zModelConfig.nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + description: z.string().nullish(), + enable_api: z.boolean(), + enable_site: z.boolean(), + icon: z.string().nullish(), + icon_background: z.string().nullish(), + id: z.string(), + maintainer: z.string().nullish(), + mode_compatible_with_agent: z.string(), + name: z.string(), + permission_keys: z.array(z.string()).optional(), + tags: z.array(zTag).optional(), + tracing: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + use_icon_as_answer_icon: z.boolean().nullish(), + workflow: zWorkflowPartial.nullish(), +}) + /** * ImportStatus */ @@ -1118,6 +1208,25 @@ export const zFeedbackStat = z.object({ like: z.int(), }) +/** + * ConversationDetail + */ +export const zConversationDetail = z.object({ + admin_feedback_stats: zFeedbackStat.nullish(), + annotated: z.boolean(), + created_at: z.int().nullish(), + from_account_id: z.string().nullish(), + from_end_user_id: z.string().nullish(), + from_source: z.string(), + id: z.string(), + introduction: z.string().nullish(), + message_count: z.int(), + model_config: zModelConfig.nullish(), + status: z.string(), + updated_at: z.int().nullish(), + user_feedback_stats: zFeedbackStat.nullish(), +}) + /** * ConversationVariableResponse */ @@ -1415,6 +1524,20 @@ export const zMessageDetail = z.object({ workflow_run_id: z.string().nullish(), }) +/** + * ConversationMessageDetail + */ +export const zConversationMessageDetail = z.object({ + created_at: z.int().nullish(), + first_message: zMessageDetail.nullish(), + from_account_id: z.string().nullish(), + from_end_user_id: z.string().nullish(), + from_source: z.string(), + id: z.string(), + model_config: zModelConfig.nullish(), + status: z.string(), +}) + /** * WorkflowRunForListResponse */ @@ -2056,116 +2179,6 @@ export const zAppPagination = z.object({ total: z.int(), }) -/** - * LLMMode - * - * Enum class for large language model mode. - */ -export const zLlmMode = z.enum(['chat', 'completion']) - -/** - * ModelConfig - */ -export const zModelConfig = z.object({ - completion_params: z.record(z.string(), z.unknown()).optional(), - mode: zLlmMode, - name: z.string(), - provider: z.string(), -}) - -/** - * AppDetailWithSite - */ -export const zAppDetailWithSite = z.object({ - access_mode: z.string().nullish(), - api_base_url: z.string().nullish(), - app_id: z.string().nullish(), - bound_agent_id: z.string().nullish(), - created_at: z.int().nullish(), - created_by: z.string().nullish(), - deleted_tools: z.array(zDeletedTool).optional(), - description: z.string().nullish(), - enable_api: z.boolean(), - enable_site: z.boolean(), - icon: z.string().nullish(), - icon_background: z.string().nullish(), - icon_type: z.string().nullish(), - icon_url: z.string().nullable(), - id: z.string(), - maintainer: z.string().nullish(), - max_active_requests: z.int().nullish(), - mode: z.string(), - model_config: zModelConfig.nullish(), - name: z.string(), - permission_keys: z.array(z.string()).optional(), - site: zSite.nullish(), - tags: z.array(zTag).optional(), - tracing: zJsonValue.nullish(), - updated_at: z.int().nullish(), - updated_by: z.string().nullish(), - use_icon_as_answer_icon: z.boolean().nullish(), - workflow: zWorkflowPartial.nullish(), -}) - -/** - * AppDetail - */ -export const zAppDetail = z.object({ - access_mode: z.string().nullish(), - app_model_config: zModelConfig.nullish(), - created_at: z.int().nullish(), - created_by: z.string().nullish(), - description: z.string().nullish(), - enable_api: z.boolean(), - enable_site: z.boolean(), - icon: z.string().nullish(), - icon_background: z.string().nullish(), - id: z.string(), - maintainer: z.string().nullish(), - mode_compatible_with_agent: z.string(), - name: z.string(), - permission_keys: z.array(z.string()).optional(), - tags: z.array(zTag).optional(), - tracing: zJsonValue.nullish(), - updated_at: z.int().nullish(), - updated_by: z.string().nullish(), - use_icon_as_answer_icon: z.boolean().nullish(), - workflow: zWorkflowPartial.nullish(), -}) - -/** - * ConversationDetail - */ -export const zConversationDetail = z.object({ - admin_feedback_stats: zFeedbackStat.nullish(), - annotated: z.boolean(), - created_at: z.int().nullish(), - from_account_id: z.string().nullish(), - from_end_user_id: z.string().nullish(), - from_source: z.string(), - id: z.string(), - introduction: z.string().nullish(), - message_count: z.int(), - model_config: zModelConfig.nullish(), - status: z.string(), - updated_at: z.int().nullish(), - user_feedback_stats: zFeedbackStat.nullish(), -}) - -/** - * ConversationMessageDetail - */ -export const zConversationMessageDetail = z.object({ - created_at: z.int().nullish(), - first_message: zMessageDetail.nullish(), - from_account_id: z.string().nullish(), - from_end_user_id: z.string().nullish(), - from_source: z.string(), - id: z.string(), - model_config: zModelConfig.nullish(), - status: z.string(), -}) - /** * Type */ diff --git a/packages/contracts/generated/api/console/datasets/orpc.gen.ts b/packages/contracts/generated/api/console/datasets/orpc.gen.ts index a8b096168bb..c85d94b55f6 100644 --- a/packages/contracts/generated/api/console/datasets/orpc.gen.ts +++ b/packages/contracts/generated/api/console/datasets/orpc.gen.ts @@ -146,9 +146,6 @@ import { zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksBody, zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksPath, zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse, - zPostDatasetsByDatasetIdDocumentsDownloadZipBody, - zPostDatasetsByDatasetIdDocumentsDownloadZipPath, - zPostDatasetsByDatasetIdDocumentsDownloadZipResponse, zPostDatasetsByDatasetIdDocumentsGenerateSummaryBody, zPostDatasetsByDatasetIdDocumentsGenerateSummaryPath, zPostDatasetsByDatasetIdDocumentsGenerateSummaryResponse, @@ -359,8 +356,12 @@ export const get5 = oc .input(z.object({ params: zGetDatasetsExternalKnowledgeApiByExternalKnowledgeApiIdPath })) .output(zGetDatasetsExternalKnowledgeApiByExternalKnowledgeApiIdResponse) +/** + * Update external knowledge API template + */ export const patch = oc .route({ + description: 'Update external knowledge API template', inputStructure: 'detailed', method: 'PATCH', operationId: 'patchDatasetsExternalKnowledgeApiByExternalKnowledgeApiId', @@ -397,8 +398,12 @@ export const get6 = oc .input(z.object({ query: zGetDatasetsExternalKnowledgeApiQuery.optional() })) .output(zGetDatasetsExternalKnowledgeApiResponse) +/** + * Create external knowledge API template + */ export const post4 = oc .route({ + description: 'Create external knowledge API template', inputStructure: 'detailed', method: 'POST', operationId: 'postDatasetsExternalKnowledgeApi', @@ -444,7 +449,6 @@ export const post6 = oc method: 'POST', operationId: 'postDatasetsInit', path: '/datasets/init', - successStatus: 201, tags: ['console'], }) .input(z.object({ body: zPostDatasetsInitBody })) @@ -621,33 +625,6 @@ export const batch = { byBatch, } -/** - * Stream a ZIP archive containing the requested uploaded documents - * - * Download selected dataset documents as a single ZIP archive (upload-file only) - */ -export const post9 = oc - .route({ - description: 'Download selected dataset documents as a single ZIP archive (upload-file only)', - inputStructure: 'detailed', - method: 'POST', - operationId: 'postDatasetsByDatasetIdDocumentsDownloadZip', - path: '/datasets/{dataset_id}/documents/download-zip', - summary: 'Stream a ZIP archive containing the requested uploaded documents', - tags: ['console'], - }) - .input( - z.object({ - body: zPostDatasetsByDatasetIdDocumentsDownloadZipBody, - params: zPostDatasetsByDatasetIdDocumentsDownloadZipPath, - }), - ) - .output(zPostDatasetsByDatasetIdDocumentsDownloadZipResponse) - -export const downloadZip = { - post: post9, -} - /** * Generate summary index for specified documents * @@ -656,7 +633,7 @@ export const downloadZip = { * (indexing_technique must be 'high_quality' and summary_index_setting.enable must be true), * then asynchronously generates summary indexes for the provided documents. */ -export const post10 = oc +export const post9 = oc .route({ description: 'Generate summary index for documents\nThis endpoint checks if the dataset configuration supports summary generation\n(indexing_technique must be \'high_quality\' and summary_index_setting.enable must be true),\nthen asynchronously generates summary indexes for the provided documents.', @@ -676,10 +653,10 @@ export const post10 = oc .output(zPostDatasetsByDatasetIdDocumentsGenerateSummaryResponse) export const generateSummary = { - post: post10, + post: post9, } -export const post11 = oc +export const post10 = oc .route({ inputStructure: 'detailed', method: 'POST', @@ -697,7 +674,7 @@ export const post11 = oc .output(zPostDatasetsByDatasetIdDocumentsMetadataResponse) export const metadata2 = { - post: post11, + post: post10, } export const patch2 = oc @@ -903,7 +880,7 @@ export const processing = { byAction: byAction2, } -export const post12 = oc +export const post11 = oc .route({ inputStructure: 'detailed', method: 'POST', @@ -920,7 +897,7 @@ export const post12 = oc .output(zPostDatasetsByDatasetIdDocumentsByDocumentIdRenameResponse) export const rename = { - post: post12, + post: post11, } export const patch6 = oc @@ -943,7 +920,7 @@ export const byAction3 = { patch: patch6, } -export const post13 = oc +export const post12 = oc .route({ inputStructure: 'detailed', method: 'POST', @@ -960,7 +937,7 @@ export const post13 = oc .output(zPostDatasetsByDatasetIdDocumentsByDocumentIdSegmentResponse) export const segment = { - post: post13, + post: post12, byAction: byAction3, } @@ -975,7 +952,7 @@ export const get19 = oc .input(z.object({ params: zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBatchImportPath })) .output(zGetDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBatchImportResponse) -export const post14 = oc +export const post13 = oc .route({ inputStructure: 'detailed', method: 'POST', @@ -993,7 +970,7 @@ export const post14 = oc export const batchImport = { get: get19, - post: post14, + post: post13, } export const delete3 = oc @@ -1074,7 +1051,7 @@ export const patch8 = oc ) .output(zPatchDatasetsByDatasetIdDocumentsByDocumentIdSegmentsBySegmentIdChildChunksResponse) -export const post15 = oc +export const post14 = oc .route({ inputStructure: 'detailed', method: 'POST', @@ -1093,7 +1070,7 @@ export const post15 = oc export const childChunks = { get: get20, patch: patch8, - post: post15, + post: post14, byChildChunkId, } @@ -1184,12 +1161,13 @@ export const segments = { * - generating: Number of summaries being generated * - error: Number of summaries with errors * - not_started: Number of segments without summary records + * - timeout: Number of summaries that timed out * - summaries: List of summary records with status and content preview */ export const get22 = oc .route({ description: - 'Get summary index generation status for a document\nReturns:\n- total_segments: Total number of segments in the document\n- summary_status: Dictionary with status counts\n - completed: Number of summaries completed\n - generating: Number of summaries being generated\n - error: Number of summaries with errors\n - not_started: Number of segments without summary records\n- summaries: List of summary records with status and content preview', + 'Get summary index generation status for a document\nReturns:\n- total_segments: Total number of segments in the document\n- summary_status: Dictionary with status counts\n - completed: Number of summaries completed\n - generating: Number of summaries being generated\n - error: Number of summaries with errors\n - not_started: Number of segments without summary records\n - timeout: Number of summaries that timed out\n- summaries: List of summary records with status and content preview', inputStructure: 'detailed', method: 'GET', operationId: 'getDatasetsByDatasetIdDocumentsByDocumentIdSummaryStatus', @@ -1304,7 +1282,7 @@ export const get25 = oc ) .output(zGetDatasetsByDatasetIdDocumentsResponse) -export const post16 = oc +export const post15 = oc .route({ inputStructure: 'detailed', method: 'POST', @@ -1323,8 +1301,7 @@ export const post16 = oc export const documents = { delete: delete7, get: get25, - post: post16, - downloadZip, + post: post15, generateSummary, metadata: metadata2, status, @@ -1353,7 +1330,7 @@ export const errorDocs = { /** * Test external knowledge retrieval for dataset */ -export const post17 = oc +export const post16 = oc .route({ description: 'Test external knowledge retrieval for dataset', inputStructure: 'detailed', @@ -1371,13 +1348,13 @@ export const post17 = oc .output(zPostDatasetsByDatasetIdExternalHitTestingResponse) export const externalHitTesting = { - post: post17, + post: post16, } /** * Test dataset knowledge retrieval */ -export const post18 = oc +export const post17 = oc .route({ description: 'Test dataset knowledge retrieval', inputStructure: 'detailed', @@ -1395,7 +1372,7 @@ export const post18 = oc .output(zPostDatasetsByDatasetIdHitTestingResponse) export const hitTesting = { - post: post18, + post: post17, } /** @@ -1417,7 +1394,7 @@ export const indexingStatus3 = { get: get27, } -export const post19 = oc +export const post18 = oc .route({ inputStructure: 'detailed', method: 'POST', @@ -1430,7 +1407,7 @@ export const post19 = oc .output(zPostDatasetsByDatasetIdMetadataBuiltInByActionResponse) export const byAction4 = { - post: post19, + post: post18, } export const builtIn2 = { @@ -1481,7 +1458,7 @@ export const get28 = oc .input(z.object({ params: zGetDatasetsByDatasetIdMetadataPath })) .output(zGetDatasetsByDatasetIdMetadataResponse) -export const post20 = oc +export const post19 = oc .route({ inputStructure: 'detailed', method: 'POST', @@ -1500,7 +1477,7 @@ export const post20 = oc export const metadata4 = { get: get28, - post: post20, + post: post19, builtIn: builtIn2, byMetadataId, } @@ -1584,7 +1561,7 @@ export const relatedApps = { /** * retry document */ -export const post21 = oc +export const post20 = oc .route({ inputStructure: 'detailed', method: 'POST', @@ -1603,7 +1580,7 @@ export const post21 = oc .output(zPostDatasetsByDatasetIdRetryResponse) export const retry = { - post: post21, + post: post20, } /** @@ -1734,7 +1711,7 @@ export const get35 = oc * * Create a new API key for a dataset */ -export const post22 = oc +export const post21 = oc .route({ description: 'Create a new API key for a dataset', inputStructure: 'detailed', @@ -1750,7 +1727,7 @@ export const post22 = oc export const apiKeys3 = { get: get35, - post: post22, + post: post21, byApiKeyId: byApiKeyId2, } @@ -1776,7 +1753,7 @@ export const get36 = oc /** * Create a new dataset */ -export const post23 = oc +export const post22 = oc .route({ description: 'Create a new dataset', inputStructure: 'detailed', @@ -1791,7 +1768,7 @@ export const post23 = oc export const datasets = { get: get36, - post: post23, + post: post22, apiBaseInfo, apiKeys, batchImportStatus, diff --git a/packages/contracts/generated/api/console/datasets/types.gen.ts b/packages/contracts/generated/api/console/datasets/types.gen.ts index f904ea7d717..9994aa7a396 100644 --- a/packages/contracts/generated/api/console/datasets/types.gen.ts +++ b/packages/contracts/generated/api/console/datasets/types.gen.ts @@ -97,51 +97,12 @@ export type ExternalDatasetCreatePayload = { name: string } -export type DatasetDetail = { - app_count?: number - author_name?: string - built_in_field_enabled?: boolean - chunk_structure?: string - created_at?: number - created_by?: string - data_source_type?: string - description?: string - doc_form?: string - doc_metadata?: Array - document_count?: number - embedding_available?: boolean - embedding_model?: string - embedding_model_provider?: string - enable_api?: boolean - external_knowledge_info?: ExternalKnowledgeInfo - external_retrieval_model?: ExternalRetrievalModel - icon_info?: DatasetIconInfo - id?: string - indexing_technique?: string - is_multimodal?: boolean - is_published?: boolean - name?: string - permission?: string - permission_keys?: Array - pipeline_id?: string - provider?: string - retrieval_model_dict?: DatasetRetrievalModel - runtime_mode?: string - summary_index_setting?: AnonymousInlineModelB1954337D565 - tags?: Array - total_available_documents?: number - total_documents?: number - updated_at?: number - updated_by?: string - word_count?: number -} - export type ExternalKnowledgeApiListResponse = { data: Array has_more: boolean limit: number page: number - total: number + total: number | null } export type ExternalKnowledgeApiPayload = { @@ -154,11 +115,11 @@ export type ExternalKnowledgeApiPayload = { export type ExternalKnowledgeApiResponse = { created_at: string created_by: string - dataset_bindings?: Array + dataset_bindings: Array description: string id: string name: string - settings?: { + settings: { [key: string]: unknown } | null tenant_id: string @@ -183,8 +144,11 @@ export type IndexingEstimatePayload = { } export type IndexingEstimateResponse = { - preview: Array - qa_preview?: Array | null + currency: string + preview: Array + qa_preview?: Array | null + tokens: number + total_price: number | number total_segments: number } @@ -236,8 +200,12 @@ export type IndexingEstimate = { total_segments: number } -export type OpaqueObjectResponse = { - [key: string]: unknown +export type ProcessRuleResponse = { + limits: { + [key: string]: unknown + } + mode: ProcessRuleMode + rules?: Rule | null } export type RetrievalSettingResponse = { @@ -333,12 +301,6 @@ export type DocumentWithSegmentsListResponse = { total: number } -export type DocumentBatchDownloadZipPayload = { - document_ids: Array -} - -export type BinaryFileResponse = Blob | File - export type GenerateSummaryPayload = { document_list: Array } @@ -347,6 +309,40 @@ export type MetadataOperationData = { operation_data: Array } +export type DocumentDetailResponse = { + archived?: boolean | null + average_segment_length?: number | null + completed_at?: number | null + created_at?: number | null + created_by?: string | null + created_from?: string | null + data_source_detail_dict?: unknown + data_source_info?: unknown + data_source_type?: string | null + dataset_process_rule?: unknown + dataset_process_rule_id?: string | null + disabled_at?: number | null + disabled_by?: string | null + display_status?: string | null + doc_form?: string | null + doc_language?: string | null + doc_metadata?: Array | null + doc_type?: string | null + document_process_rule?: unknown + enabled?: boolean | null + error?: string | null + hit_count?: number | null + id: string + indexing_latency?: number | null + indexing_status?: string | null + name?: string | null + need_summary?: boolean | null + position?: number | null + segment_count?: number | null + tokens?: number | null + updated_at?: number | null +} + export type UrlResponse = { url: string } @@ -376,6 +372,13 @@ export type SimpleResultMessageResponse = { result: string } +export type DocumentPipelineExecutionLogResponse = { + datasource_info?: JsonValue | null + datasource_node_id?: string | null + datasource_type?: string | null + input_data?: JsonValue | null +} + export type DocumentRenamePayload = { name: string } @@ -464,6 +467,12 @@ export type ChildChunkUpdatePayload = { content: string } +export type DocumentSummaryStatusResponse = { + summaries: Array + summary_status: SummaryStatusResponse + total_segments: number +} + export type ErrorDocsResponse = { data: Array total: number @@ -479,13 +488,10 @@ export type ExternalHitTestingPayload = { query: string } -export type ExternalRetrievalTestResponse - = | { - [key: string]: unknown - } - | Array<{ - [key: string]: unknown - }> +export type ExternalHitTestingResponse = { + query: ExternalHitTestingQueryResponse + records: Array +} export type HitTestingPayload = { attachment_ids?: Array | null @@ -641,68 +647,18 @@ export type DatasetTagResponse = { type: string } -export type DatasetDocMetadata = { - id?: string - name?: string - type?: string -} - -export type ExternalKnowledgeInfo = { - external_knowledge_api_endpoint?: string - external_knowledge_api_id?: string - external_knowledge_api_name?: string - external_knowledge_id?: string -} - -export type ExternalRetrievalModel = { - score_threshold?: number - score_threshold_enabled?: boolean - top_k?: number -} - -export type DatasetIconInfo = { - icon?: string - icon_background?: string - icon_type?: string - icon_url?: string -} - -export type DatasetRetrievalModel = { - reranking_enable?: boolean - reranking_mode?: string - reranking_model?: DatasetRerankingModel - score_threshold?: number - score_threshold_enabled?: boolean - search_method?: string - top_k?: number - weights?: DatasetWeightedScore -} - -export type AnonymousInlineModelB1954337D565 = { - enable?: boolean - model_name?: string - model_provider_name?: string - summary_prompt?: string -} - -export type Tag = { - id: string - name: string - type: string -} - -export type ExternalKnowledgeDatasetBindingResponse = { +export type ExternalKnowledgeApiBindingResponse = { id: string name: string } -export type IndexingEstimatePreviewItemResponse = { +export type PreviewDetail = { child_chunks?: Array | null content: string summary?: string | null } -export type IndexingEstimateQaPreviewItemResponse = { +export type QaPreviewDetail = { answer: string question: string } @@ -744,15 +700,13 @@ export type DatasetMetadataBuiltInFieldResponse = { type: string } -export type PreviewDetail = { - child_chunks?: Array | null - content: string - summary?: string | null -} +export type ProcessRuleMode = 'automatic' | 'custom' | 'hierarchical' -export type QaPreviewDetail = { - answer: string - question: string +export type Rule = { + parent_mode?: 'full-doc' | 'paragraph' | null + pre_processing_rules?: Array | null + segmentation?: Segmentation | null + subchunk_segmentation?: Segmentation | null } export type DocumentWithSegmentsResponse = { @@ -798,6 +752,8 @@ export type DocumentMetadataResponse = { value?: string | number | number | boolean | null } +export type JsonValue = unknown + export type SegmentResponse = { answer: string | null attachments: Array @@ -844,6 +800,37 @@ export type ChildChunkUpdateArgs = { id?: string | null } +export type SummaryEntryResponse = { + created_at?: number | null + error?: string | null + segment_id: string + segment_position: number + status: string + summary_preview?: string | null + updated_at?: number | null +} + +export type SummaryStatusResponse = { + completed?: number + error?: number + generating?: number + not_started?: number + timeout?: number +} + +export type ExternalHitTestingQueryResponse = { + content: string +} + +export type ExternalHitTestingRecordResponse = { + content?: string | null + metadata?: { + [key: string]: unknown + } | null + score?: number | null + title?: string | null +} + export type HitTestingQuery = { content: string } @@ -896,17 +883,6 @@ export type DatasetWeightedScoreResponse = { weight_type?: string | null } -export type DatasetRerankingModel = { - reranking_model_name?: string - reranking_provider_name?: string -} - -export type DatasetWeightedScore = { - keyword_setting?: DatasetKeywordSetting - vector_setting?: DatasetVectorSetting - weight_type?: string -} - export type InfoList = { data_source_type: 'notion_import' | 'upload_file' | 'website_crawl' file_info_list?: FileInfo | null @@ -914,15 +890,6 @@ export type InfoList = { website_info_list?: WebsiteInfo | null } -export type ProcessRuleMode = 'automatic' | 'custom' | 'hierarchical' - -export type Rule = { - parent_mode?: 'full-doc' | 'paragraph' | null - pre_processing_rules?: Array | null - segmentation?: Segmentation | null - subchunk_segmentation?: Segmentation | null -} - export type MetadataFilteringCondition = { conditions?: Array | null logical_operator?: 'and' | 'or' | null @@ -945,6 +912,17 @@ export type WeightModel = { weight_type?: 'customized' | 'keyword_first' | 'semantic_first' | null } +export type PreProcessingRule = { + enabled: boolean + id: 'remove_extra_spaces' | 'remove_stopwords' | 'remove_urls_emails' +} + +export type Segmentation = { + chunk_overlap?: number + max_tokens: number + separator?: string +} + export type MetadataDetail = { id: string name: string @@ -1018,16 +996,6 @@ export type DatasetVectorSettingResponse = { vector_weight?: number | null } -export type DatasetKeywordSetting = { - keyword_weight?: number -} - -export type DatasetVectorSetting = { - embedding_model_name?: string - embedding_provider_name?: string - vector_weight?: number -} - export type FileInfo = { file_ids: Array } @@ -1045,17 +1013,6 @@ export type WebsiteInfo = { urls: Array } -export type PreProcessingRule = { - enabled: boolean - id: 'remove_extra_spaces' | 'remove_stopwords' | 'remove_urls_emails' -} - -export type Segmentation = { - chunk_overlap?: number - max_tokens: number - separator?: string -} - export type Condition = { comparison_operator: | '<' @@ -1264,7 +1221,7 @@ export type PostDatasetsExternalErrors = { } export type PostDatasetsExternalResponses = { - 201: DatasetDetail + 201: DatasetDetailResponse } export type PostDatasetsExternalResponse @@ -1295,6 +1252,10 @@ export type PostDatasetsExternalKnowledgeApiData = { url: '/datasets/external-knowledge-api' } +export type PostDatasetsExternalKnowledgeApiErrors = { + 403: unknown +} + export type PostDatasetsExternalKnowledgeApiResponses = { 201: ExternalKnowledgeApiResponse } @@ -1347,6 +1308,10 @@ export type PatchDatasetsExternalKnowledgeApiByExternalKnowledgeApiIdData = { url: '/datasets/external-knowledge-api/{external_knowledge_api_id}' } +export type PatchDatasetsExternalKnowledgeApiByExternalKnowledgeApiIdErrors = { + 404: unknown +} + export type PatchDatasetsExternalKnowledgeApiByExternalKnowledgeApiIdResponses = { 200: ExternalKnowledgeApiResponse } @@ -1396,7 +1361,7 @@ export type PostDatasetsInitErrors = { } export type PostDatasetsInitResponses = { - 201: DatasetAndDocumentResponse + 200: DatasetAndDocumentResponse } export type PostDatasetsInitResponse = PostDatasetsInitResponses[keyof PostDatasetsInitResponses] @@ -1439,7 +1404,7 @@ export type GetDatasetsProcessRuleData = { } export type GetDatasetsProcessRuleResponses = { - 200: OpaqueObjectResponse + 200: ProcessRuleResponse } export type GetDatasetsProcessRuleResponse @@ -1581,7 +1546,7 @@ export type GetDatasetsByDatasetIdBatchByBatchIndexingEstimateData = { } export type GetDatasetsByDatasetIdBatchByBatchIndexingEstimateResponses = { - 200: OpaqueObjectResponse + 200: IndexingEstimateResponse } export type GetDatasetsByDatasetIdBatchByBatchIndexingEstimateResponse @@ -1659,22 +1624,6 @@ export type PostDatasetsByDatasetIdDocumentsResponses = { export type PostDatasetsByDatasetIdDocumentsResponse = PostDatasetsByDatasetIdDocumentsResponses[keyof PostDatasetsByDatasetIdDocumentsResponses] -export type PostDatasetsByDatasetIdDocumentsDownloadZipData = { - body: DocumentBatchDownloadZipPayload - path: { - dataset_id: string - } - query?: never - url: '/datasets/{dataset_id}/documents/download-zip' -} - -export type PostDatasetsByDatasetIdDocumentsDownloadZipResponses = { - 200: BinaryFileResponse -} - -export type PostDatasetsByDatasetIdDocumentsDownloadZipResponse - = PostDatasetsByDatasetIdDocumentsDownloadZipResponses[keyof PostDatasetsByDatasetIdDocumentsDownloadZipResponses] - export type PostDatasetsByDatasetIdDocumentsGenerateSummaryData = { body: GenerateSummaryPayload path: { @@ -1764,7 +1713,7 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdErrors = { } export type GetDatasetsByDatasetIdDocumentsByDocumentIdResponses = { - 200: OpaqueObjectResponse + 200: DocumentDetailResponse } export type GetDatasetsByDatasetIdDocumentsByDocumentIdResponse @@ -1803,7 +1752,7 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingEstimateErrors = } export type GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingEstimateResponses = { - 200: OpaqueObjectResponse + 200: IndexingEstimateResponse } export type GetDatasetsByDatasetIdDocumentsByDocumentIdIndexingEstimateResponse @@ -1880,7 +1829,7 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdPipelineExecutionLogData } export type GetDatasetsByDatasetIdDocumentsByDocumentIdPipelineExecutionLogResponses = { - 200: OpaqueObjectResponse + 200: DocumentPipelineExecutionLogResponse } export type GetDatasetsByDatasetIdDocumentsByDocumentIdPipelineExecutionLogResponse @@ -2225,7 +2174,7 @@ export type GetDatasetsByDatasetIdDocumentsByDocumentIdSummaryStatusErrors = { } export type GetDatasetsByDatasetIdDocumentsByDocumentIdSummaryStatusResponses = { - 200: OpaqueObjectResponse + 200: DocumentSummaryStatusResponse } export type GetDatasetsByDatasetIdDocumentsByDocumentIdSummaryStatusResponse @@ -2283,7 +2232,7 @@ export type PostDatasetsByDatasetIdExternalHitTestingErrors = { } export type PostDatasetsByDatasetIdExternalHitTestingResponses = { - 200: ExternalRetrievalTestResponse + 200: ExternalHitTestingResponse } export type PostDatasetsByDatasetIdExternalHitTestingResponse diff --git a/packages/contracts/generated/api/console/datasets/zod.gen.ts b/packages/contracts/generated/api/console/datasets/zod.gen.ts index 10ceb11cb5d..8bc827d5ca2 100644 --- a/packages/contracts/generated/api/console/datasets/zod.gen.ts +++ b/packages/contracts/generated/api/console/datasets/zod.gen.ts @@ -91,11 +91,6 @@ export const zNotionEstimatePayload = z.object({ process_rule: z.record(z.string(), z.unknown()), }) -/** - * OpaqueObjectResponse - */ -export const zOpaqueObjectResponse = z.record(z.string(), z.unknown()) - /** * RetrievalSettingResponse */ @@ -118,20 +113,6 @@ export const zAutoDisableLogsResponse = z.object({ document_ids: z.array(z.string()), }) -/** - * DocumentBatchDownloadZipPayload - * - * Request payload for bulk downloading documents as a zip archive. - */ -export const zDocumentBatchDownloadZipPayload = z.object({ - document_ids: z.array(z.uuid()).min(1).max(100), -}) - -/** - * BinaryFileResponse - */ -export const zBinaryFileResponse = z.custom() - /** * GenerateSummaryPayload */ @@ -247,14 +228,6 @@ export const zExternalHitTestingPayload = z.object({ query: z.string(), }) -/** - * ExternalRetrievalTestResponse - */ -export const zExternalRetrievalTestResponse = z.union([ - z.record(z.string(), z.unknown()), - z.array(z.record(z.string(), z.unknown())), -]) - /** * MetadataArgs */ @@ -397,52 +370,10 @@ export const zDatasetTagResponse = z.object({ type: z.string(), }) -export const zDatasetDocMetadata = z.object({ - id: z.string().optional(), - name: z.string().optional(), - type: z.string().optional(), -}) - -export const zExternalKnowledgeInfo = z.object({ - external_knowledge_api_endpoint: z.string().optional(), - external_knowledge_api_id: z.string().optional(), - external_knowledge_api_name: z.string().optional(), - external_knowledge_id: z.string().optional(), -}) - -export const zExternalRetrievalModel = z.object({ - score_threshold: z.number().optional(), - score_threshold_enabled: z.boolean().optional(), - top_k: z.int().optional(), -}) - -export const zDatasetIconInfo = z.object({ - icon: z.string().optional(), - icon_background: z.string().optional(), - icon_type: z.string().optional(), - icon_url: z.string().optional(), -}) - -export const zAnonymousInlineModelB1954337D565 = z.object({ - enable: z.boolean().optional(), - model_name: z.string().optional(), - model_provider_name: z.string().optional(), - summary_prompt: z.string().optional(), -}) - /** - * Tag + * ExternalKnowledgeApiBindingResponse */ -export const zTag = z.object({ - id: z.string(), - name: z.string(), - type: z.string(), -}) - -/** - * ExternalKnowledgeDatasetBindingResponse - */ -export const zExternalKnowledgeDatasetBindingResponse = z.object({ +export const zExternalKnowledgeApiBindingResponse = z.object({ id: z.string(), name: z.string(), }) @@ -453,11 +384,11 @@ export const zExternalKnowledgeDatasetBindingResponse = z.object({ export const zExternalKnowledgeApiResponse = z.object({ created_at: z.string(), created_by: z.string(), - dataset_bindings: z.array(zExternalKnowledgeDatasetBindingResponse).optional(), + dataset_bindings: z.array(zExternalKnowledgeApiBindingResponse), description: z.string(), id: z.string(), name: z.string(), - settings: z.record(z.string(), z.unknown()).nullish(), + settings: z.record(z.string(), z.unknown()).nullable(), tenant_id: z.string(), }) @@ -469,22 +400,22 @@ export const zExternalKnowledgeApiListResponse = z.object({ has_more: z.boolean(), limit: z.int(), page: z.int(), - total: z.int(), + total: z.int().nullable(), }) /** - * IndexingEstimatePreviewItemResponse + * PreviewDetail */ -export const zIndexingEstimatePreviewItemResponse = z.object({ +export const zPreviewDetail = z.object({ child_chunks: z.array(z.string()).nullish(), content: z.string(), summary: z.string().nullish(), }) /** - * IndexingEstimateQaPreviewItemResponse + * QAPreviewDetail */ -export const zIndexingEstimateQaPreviewItemResponse = z.object({ +export const zQaPreviewDetail = z.object({ answer: z.string(), question: z.string(), }) @@ -493,8 +424,20 @@ export const zIndexingEstimateQaPreviewItemResponse = z.object({ * IndexingEstimateResponse */ export const zIndexingEstimateResponse = z.object({ - preview: z.array(zIndexingEstimatePreviewItemResponse), - qa_preview: z.array(zIndexingEstimateQaPreviewItemResponse).nullish(), + currency: z.string(), + preview: z.array(zPreviewDetail), + qa_preview: z.array(zQaPreviewDetail).nullish(), + tokens: z.int(), + total_price: z.union([z.number(), z.int()]), + total_segments: z.int(), +}) + +/** + * IndexingEstimate + */ +export const zIndexingEstimate = z.object({ + preview: z.array(zPreviewDetail), + qa_preview: z.array(zQaPreviewDetail).nullish(), total_segments: z.int(), }) @@ -528,30 +471,11 @@ export const zDatasetMetadataBuiltInFieldsResponse = z.object({ }) /** - * PreviewDetail + * ProcessRuleMode + * + * Dataset Process Rule Mode */ -export const zPreviewDetail = z.object({ - child_chunks: z.array(z.string()).nullish(), - content: z.string(), - summary: z.string().nullish(), -}) - -/** - * QAPreviewDetail - */ -export const zQaPreviewDetail = z.object({ - answer: z.string(), - question: z.string(), -}) - -/** - * IndexingEstimate - */ -export const zIndexingEstimate = z.object({ - preview: z.array(zPreviewDetail), - qa_preview: z.array(zQaPreviewDetail).nullish(), - total_segments: z.int(), -}) +export const zProcessRuleMode = z.enum(['automatic', 'custom', 'hierarchical']) /** * DocumentMetadataResponse @@ -563,6 +487,43 @@ export const zDocumentMetadataResponse = z.object({ value: z.union([z.string(), z.int(), z.number(), z.boolean()]).nullish(), }) +/** + * DocumentDetailResponse + */ +export const zDocumentDetailResponse = z.object({ + archived: z.boolean().nullish(), + average_segment_length: z.number().nullish(), + completed_at: z.int().nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + created_from: z.string().nullish(), + data_source_detail_dict: z.unknown().optional(), + data_source_info: z.unknown().optional(), + data_source_type: z.string().nullish(), + dataset_process_rule: z.unknown().optional(), + dataset_process_rule_id: z.string().nullish(), + disabled_at: z.int().nullish(), + disabled_by: z.string().nullish(), + display_status: z.string().nullish(), + doc_form: z.string().nullish(), + doc_language: z.string().nullish(), + doc_metadata: z.array(zDocumentMetadataResponse).nullish(), + doc_type: z.string().nullish(), + document_process_rule: z.unknown().optional(), + enabled: z.boolean().nullish(), + error: z.string().nullish(), + hit_count: z.int().nullish(), + id: z.string(), + indexing_latency: z.number().nullish(), + indexing_status: z.string().nullish(), + name: z.string().nullish(), + need_summary: z.boolean().nullish(), + position: z.int().nullish(), + segment_count: z.int().nullish(), + tokens: z.int().nullish(), + updated_at: z.int().nullish(), +}) + /** * DocumentResponse */ @@ -646,6 +607,18 @@ export const zDocumentWithSegmentsListResponse = z.object({ total: z.int(), }) +export const zJsonValue = z.unknown() + +/** + * DocumentPipelineExecutionLogResponse + */ +export const zDocumentPipelineExecutionLogResponse = z.object({ + datasource_info: zJsonValue.nullish(), + datasource_node_id: z.string().nullish(), + datasource_type: z.string().nullish(), + input_data: zJsonValue.nullish(), +}) + /** * ChildChunkResponse */ @@ -700,6 +673,64 @@ export const zChildChunkBatchUpdatePayload = z.object({ chunks: z.array(zChildChunkUpdateArgs), }) +/** + * SummaryEntryResponse + */ +export const zSummaryEntryResponse = z.object({ + created_at: z.int().nullish(), + error: z.string().nullish(), + segment_id: z.string(), + segment_position: z.int(), + status: z.string(), + summary_preview: z.string().nullish(), + updated_at: z.int().nullish(), +}) + +/** + * SummaryStatusResponse + */ +export const zSummaryStatusResponse = z.object({ + completed: z.int().optional().default(0), + error: z.int().optional().default(0), + generating: z.int().optional().default(0), + not_started: z.int().optional().default(0), + timeout: z.int().optional().default(0), +}) + +/** + * DocumentSummaryStatusResponse + */ +export const zDocumentSummaryStatusResponse = z.object({ + summaries: z.array(zSummaryEntryResponse), + summary_status: zSummaryStatusResponse, + total_segments: z.int(), +}) + +/** + * ExternalHitTestingQueryResponse + */ +export const zExternalHitTestingQueryResponse = z.object({ + content: z.string(), +}) + +/** + * ExternalHitTestingRecordResponse + */ +export const zExternalHitTestingRecordResponse = z.object({ + content: z.string().nullish(), + metadata: z.record(z.string(), z.unknown()).nullish(), + score: z.number().nullish(), + title: z.string().nullish(), +}) + +/** + * ExternalHitTestingResponse + */ +export const zExternalHitTestingResponse = z.object({ + query: zExternalHitTestingQueryResponse, + records: z.array(zExternalHitTestingRecordResponse), +}) + /** * HitTestingQuery */ @@ -755,18 +786,6 @@ export const zDatasetRerankingModelResponse = z.object({ reranking_provider_name: z.string().nullish(), }) -export const zDatasetRerankingModel = z.object({ - reranking_model_name: z.string().optional(), - reranking_provider_name: z.string().optional(), -}) - -/** - * ProcessRuleMode - * - * Dataset Process Rule Mode - */ -export const zProcessRuleMode = z.enum(['automatic', 'custom', 'hierarchical']) - /** * RerankingModel */ @@ -785,6 +804,50 @@ export const zRetrievalMethod = z.enum([ 'semantic_search', ]) +/** + * PreProcessingRule + */ +export const zPreProcessingRule = z.object({ + enabled: z.boolean(), + id: z.enum(['remove_extra_spaces', 'remove_stopwords', 'remove_urls_emails']), +}) + +/** + * Segmentation + */ +export const zSegmentation = z.object({ + chunk_overlap: z.int().optional().default(0), + max_tokens: z.int(), + separator: z.string().optional().default('\n'), +}) + +/** + * Rule + */ +export const zRule = z.object({ + parent_mode: z.enum(['full-doc', 'paragraph']).nullish(), + pre_processing_rules: z.array(zPreProcessingRule).nullish(), + segmentation: zSegmentation.nullish(), + subchunk_segmentation: zSegmentation.nullish(), +}) + +/** + * ProcessRuleResponse + */ +export const zProcessRuleResponse = z.object({ + limits: z.record(z.string(), z.unknown()), + mode: zProcessRuleMode, + rules: zRule.nullish(), +}) + +/** + * ProcessRule + */ +export const zProcessRule = z.object({ + mode: zProcessRuleMode, + rules: zRule.nullish(), +}) + /** * MetadataDetail */ @@ -1079,88 +1142,6 @@ export const zDatasetListResponse = z.object({ total: z.int(), }) -export const zDatasetKeywordSetting = z.object({ - keyword_weight: z.number().optional(), -}) - -export const zDatasetVectorSetting = z.object({ - embedding_model_name: z.string().optional(), - embedding_provider_name: z.string().optional(), - vector_weight: z.number().optional(), -}) - -export const zDatasetWeightedScore = z.object({ - keyword_setting: zDatasetKeywordSetting.optional(), - vector_setting: zDatasetVectorSetting.optional(), - weight_type: z.string().optional(), -}) - -export const zDatasetRetrievalModel = z.object({ - reranking_enable: z.boolean().optional(), - reranking_mode: z.string().optional(), - reranking_model: zDatasetRerankingModel.optional(), - score_threshold: z.number().optional(), - score_threshold_enabled: z.boolean().optional(), - search_method: z.string().optional(), - top_k: z.int().optional(), - weights: zDatasetWeightedScore.optional(), -}) - -export const zDatasetDetail = z.object({ - app_count: z.int().optional(), - author_name: z.string().optional(), - built_in_field_enabled: z.boolean().optional(), - chunk_structure: z.string().optional(), - created_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - created_by: z.string().optional(), - data_source_type: z.string().optional(), - description: z.string().optional(), - doc_form: z.string().optional(), - doc_metadata: z.array(zDatasetDocMetadata).optional(), - document_count: z.int().optional(), - embedding_available: z.boolean().optional(), - embedding_model: z.string().optional(), - embedding_model_provider: z.string().optional(), - enable_api: z.boolean().optional(), - external_knowledge_info: zExternalKnowledgeInfo.optional(), - external_retrieval_model: zExternalRetrievalModel.optional(), - icon_info: zDatasetIconInfo.optional(), - id: z.string().optional(), - indexing_technique: z.string().optional(), - is_multimodal: z.boolean().optional(), - is_published: z.boolean().optional(), - name: z.string().optional(), - permission: z.string().optional(), - permission_keys: z.array(z.string()).optional(), - pipeline_id: z.string().optional(), - provider: z.string().optional(), - retrieval_model_dict: zDatasetRetrievalModel.optional(), - runtime_mode: z.string().optional(), - summary_index_setting: zAnonymousInlineModelB1954337D565.optional(), - tags: z.array(zTag).optional(), - total_available_documents: z.int().optional(), - total_documents: z.int().optional(), - updated_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - updated_by: z.string().optional(), - word_count: z.int().optional(), -}) - /** * FileInfo */ @@ -1178,41 +1159,6 @@ export const zWebsiteInfo = z.object({ urls: z.array(z.string()), }) -/** - * PreProcessingRule - */ -export const zPreProcessingRule = z.object({ - enabled: z.boolean(), - id: z.enum(['remove_extra_spaces', 'remove_stopwords', 'remove_urls_emails']), -}) - -/** - * Segmentation - */ -export const zSegmentation = z.object({ - chunk_overlap: z.int().optional().default(0), - max_tokens: z.int(), - separator: z.string().optional().default('\n'), -}) - -/** - * Rule - */ -export const zRule = z.object({ - parent_mode: z.enum(['full-doc', 'paragraph']).nullish(), - pre_processing_rules: z.array(zPreProcessingRule).nullish(), - segmentation: zSegmentation.nullish(), - subchunk_segmentation: zSegmentation.nullish(), -}) - -/** - * ProcessRule - */ -export const zProcessRule = z.object({ - mode: zProcessRuleMode, - rules: zRule.nullish(), -}) - /** * Condition * @@ -1558,7 +1504,7 @@ export const zPostDatasetsExternalBody = zExternalDatasetCreatePayload /** * External dataset created successfully */ -export const zPostDatasetsExternalResponse = zDatasetDetail +export const zPostDatasetsExternalResponse = zDatasetDetailResponse export const zGetDatasetsExternalKnowledgeApiQuery = z.object({ keyword: z.string().optional(), @@ -1653,7 +1599,7 @@ export const zGetDatasetsProcessRuleQuery = z.object({ /** * Process rules retrieved successfully */ -export const zGetDatasetsProcessRuleResponse = zOpaqueObjectResponse +export const zGetDatasetsProcessRuleResponse = zProcessRuleResponse /** * Retrieval settings retrieved successfully @@ -1723,9 +1669,9 @@ export const zGetDatasetsByDatasetIdBatchByBatchIndexingEstimatePath = z.object( }) /** - * Batch indexing estimate calculated successfully + * Indexing estimate calculated successfully */ -export const zGetDatasetsByDatasetIdBatchByBatchIndexingEstimateResponse = zOpaqueObjectResponse +export const zGetDatasetsByDatasetIdBatchByBatchIndexingEstimateResponse = zIndexingEstimateResponse export const zGetDatasetsByDatasetIdBatchByBatchIndexingStatusPath = z.object({ batch: z.string(), @@ -1775,17 +1721,6 @@ export const zPostDatasetsByDatasetIdDocumentsPath = z.object({ */ export const zPostDatasetsByDatasetIdDocumentsResponse = zDatasetAndDocumentResponse -export const zPostDatasetsByDatasetIdDocumentsDownloadZipBody = zDocumentBatchDownloadZipPayload - -export const zPostDatasetsByDatasetIdDocumentsDownloadZipPath = z.object({ - dataset_id: z.uuid(), -}) - -/** - * ZIP archive generated successfully - */ -export const zPostDatasetsByDatasetIdDocumentsDownloadZipResponse = zBinaryFileResponse - export const zPostDatasetsByDatasetIdDocumentsGenerateSummaryBody = zGenerateSummaryPayload export const zPostDatasetsByDatasetIdDocumentsGenerateSummaryPath = z.object({ @@ -1840,7 +1775,7 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdQuery = z.object({ /** * Document retrieved successfully */ -export const zGetDatasetsByDatasetIdDocumentsByDocumentIdResponse = zOpaqueObjectResponse +export const zGetDatasetsByDatasetIdDocumentsByDocumentIdResponse = zDocumentDetailResponse export const zGetDatasetsByDatasetIdDocumentsByDocumentIdDownloadPath = z.object({ dataset_id: z.uuid(), @@ -1861,7 +1796,7 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdIndexingEstimatePath = * Indexing estimate calculated successfully */ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdIndexingEstimateResponse - = zOpaqueObjectResponse + = zIndexingEstimateResponse export const zGetDatasetsByDatasetIdDocumentsByDocumentIdIndexingStatusPath = z.object({ dataset_id: z.uuid(), @@ -1904,10 +1839,10 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdPipelineExecutionLogPat }) /** - * Document pipeline execution log retrieved successfully + * Pipeline execution log retrieved successfully */ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdPipelineExecutionLogResponse - = zOpaqueObjectResponse + = zDocumentPipelineExecutionLogResponse export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdProcessingPausePath = z.object({ dataset_id: z.uuid(), @@ -2158,7 +2093,7 @@ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSummaryStatusPath = z.o * Summary status retrieved successfully */ export const zGetDatasetsByDatasetIdDocumentsByDocumentIdSummaryStatusResponse - = zOpaqueObjectResponse + = zDocumentSummaryStatusResponse export const zGetDatasetsByDatasetIdDocumentsByDocumentIdWebsiteSyncPath = z.object({ dataset_id: z.uuid(), @@ -2188,7 +2123,7 @@ export const zPostDatasetsByDatasetIdExternalHitTestingPath = z.object({ /** * External hit testing completed successfully */ -export const zPostDatasetsByDatasetIdExternalHitTestingResponse = zExternalRetrievalTestResponse +export const zPostDatasetsByDatasetIdExternalHitTestingResponse = zExternalHitTestingResponse export const zPostDatasetsByDatasetIdHitTestingBody = zHitTestingPayload diff --git a/packages/contracts/generated/api/console/instruction-generate/types.gen.ts b/packages/contracts/generated/api/console/instruction-generate/types.gen.ts index 82a9bee0864..08dc7f85248 100644 --- a/packages/contracts/generated/api/console/instruction-generate/types.gen.ts +++ b/packages/contracts/generated/api/console/instruction-generate/types.gen.ts @@ -25,15 +25,42 @@ export type SimpleDataResponse = { } export type ModelConfig = { - completion_params?: { - [key: string]: unknown - } - mode: LlmMode - name: string - provider: string + agent_mode?: JsonValue | null + annotation_reply?: JsonValue | null + chat_prompt_config?: JsonValue | null + completion_prompt_config?: JsonValue | null + created_at?: number | null + created_by?: string | null + dataset_configs?: JsonValue | null + dataset_query_variable?: string | null + external_data_tools?: JsonValue | null + file_upload?: JsonValue | null + model?: JsonValue | null + more_like_this?: JsonValue | null + opening_statement?: string | null + pre_prompt?: string | null + prompt_type?: string | null + retriever_resource?: JsonValue | null + sensitive_word_avoidance?: JsonValue | null + speech_to_text?: JsonValue | null + suggested_questions?: JsonValue | null + suggested_questions_after_answer?: JsonValue | null + text_to_speech?: JsonValue | null + updated_at?: number | null + updated_by?: string | null + user_input_form?: JsonValue | null } -export type LlmMode = 'chat' | 'completion' +export type JsonValue + = | string + | number + | number + | boolean + | { + [key: string]: unknown + } + | Array + | null export type PostInstructionGenerateData = { body: InstructionGeneratePayload diff --git a/packages/contracts/generated/api/console/instruction-generate/zod.gen.ts b/packages/contracts/generated/api/console/instruction-generate/zod.gen.ts index 2d89050e2a0..cbffd4478f7 100644 --- a/packages/contracts/generated/api/console/instruction-generate/zod.gen.ts +++ b/packages/contracts/generated/api/console/instruction-generate/zod.gen.ts @@ -21,21 +21,45 @@ export const zSimpleDataResponse = z.object({ data: z.string(), }) -/** - * LLMMode - * - * Enum class for large language model mode. - */ -export const zLlmMode = z.enum(['chat', 'completion']) +export const zJsonValue = z + .union([ + z.string(), + z.int(), + z.number(), + z.boolean(), + z.record(z.string(), z.unknown()), + z.array(z.unknown()), + ]) + .nullable() /** * ModelConfig */ export const zModelConfig = z.object({ - completion_params: z.record(z.string(), z.unknown()).optional(), - mode: zLlmMode, - name: z.string(), - provider: z.string(), + agent_mode: zJsonValue.nullish(), + annotation_reply: zJsonValue.nullish(), + chat_prompt_config: zJsonValue.nullish(), + completion_prompt_config: zJsonValue.nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + dataset_configs: zJsonValue.nullish(), + dataset_query_variable: z.string().nullish(), + external_data_tools: zJsonValue.nullish(), + file_upload: zJsonValue.nullish(), + model: zJsonValue.nullish(), + more_like_this: zJsonValue.nullish(), + opening_statement: z.string().nullish(), + pre_prompt: z.string().nullish(), + prompt_type: z.string().nullish(), + retriever_resource: zJsonValue.nullish(), + sensitive_word_avoidance: zJsonValue.nullish(), + speech_to_text: zJsonValue.nullish(), + suggested_questions: zJsonValue.nullish(), + suggested_questions_after_answer: zJsonValue.nullish(), + text_to_speech: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + user_input_form: zJsonValue.nullish(), }) /** diff --git a/packages/contracts/generated/api/console/rule-code-generate/types.gen.ts b/packages/contracts/generated/api/console/rule-code-generate/types.gen.ts index a1165a4f8a2..4883b92063d 100644 --- a/packages/contracts/generated/api/console/rule-code-generate/types.gen.ts +++ b/packages/contracts/generated/api/console/rule-code-generate/types.gen.ts @@ -14,15 +14,42 @@ export type RuleCodeGeneratePayload = { export type GeneratorResponse = unknown export type ModelConfig = { - completion_params?: { - [key: string]: unknown - } - mode: LlmMode - name: string - provider: string + agent_mode?: JsonValue | null + annotation_reply?: JsonValue | null + chat_prompt_config?: JsonValue | null + completion_prompt_config?: JsonValue | null + created_at?: number | null + created_by?: string | null + dataset_configs?: JsonValue | null + dataset_query_variable?: string | null + external_data_tools?: JsonValue | null + file_upload?: JsonValue | null + model?: JsonValue | null + more_like_this?: JsonValue | null + opening_statement?: string | null + pre_prompt?: string | null + prompt_type?: string | null + retriever_resource?: JsonValue | null + sensitive_word_avoidance?: JsonValue | null + speech_to_text?: JsonValue | null + suggested_questions?: JsonValue | null + suggested_questions_after_answer?: JsonValue | null + text_to_speech?: JsonValue | null + updated_at?: number | null + updated_by?: string | null + user_input_form?: JsonValue | null } -export type LlmMode = 'chat' | 'completion' +export type JsonValue + = | string + | number + | number + | boolean + | { + [key: string]: unknown + } + | Array + | null export type PostRuleCodeGenerateData = { body: RuleCodeGeneratePayload diff --git a/packages/contracts/generated/api/console/rule-code-generate/zod.gen.ts b/packages/contracts/generated/api/console/rule-code-generate/zod.gen.ts index 97e1b816289..0536774a0c9 100644 --- a/packages/contracts/generated/api/console/rule-code-generate/zod.gen.ts +++ b/packages/contracts/generated/api/console/rule-code-generate/zod.gen.ts @@ -7,21 +7,45 @@ import * as z from 'zod' */ export const zGeneratorResponse = z.unknown() -/** - * LLMMode - * - * Enum class for large language model mode. - */ -export const zLlmMode = z.enum(['chat', 'completion']) +export const zJsonValue = z + .union([ + z.string(), + z.int(), + z.number(), + z.boolean(), + z.record(z.string(), z.unknown()), + z.array(z.unknown()), + ]) + .nullable() /** * ModelConfig */ export const zModelConfig = z.object({ - completion_params: z.record(z.string(), z.unknown()).optional(), - mode: zLlmMode, - name: z.string(), - provider: z.string(), + agent_mode: zJsonValue.nullish(), + annotation_reply: zJsonValue.nullish(), + chat_prompt_config: zJsonValue.nullish(), + completion_prompt_config: zJsonValue.nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + dataset_configs: zJsonValue.nullish(), + dataset_query_variable: z.string().nullish(), + external_data_tools: zJsonValue.nullish(), + file_upload: zJsonValue.nullish(), + model: zJsonValue.nullish(), + more_like_this: zJsonValue.nullish(), + opening_statement: z.string().nullish(), + pre_prompt: z.string().nullish(), + prompt_type: z.string().nullish(), + retriever_resource: zJsonValue.nullish(), + sensitive_word_avoidance: zJsonValue.nullish(), + speech_to_text: zJsonValue.nullish(), + suggested_questions: zJsonValue.nullish(), + suggested_questions_after_answer: zJsonValue.nullish(), + text_to_speech: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + user_input_form: zJsonValue.nullish(), }) /** diff --git a/packages/contracts/generated/api/console/rule-generate/types.gen.ts b/packages/contracts/generated/api/console/rule-generate/types.gen.ts index 4e7c1421461..9a09a91374a 100644 --- a/packages/contracts/generated/api/console/rule-generate/types.gen.ts +++ b/packages/contracts/generated/api/console/rule-generate/types.gen.ts @@ -13,15 +13,42 @@ export type RuleGeneratePayload = { export type GeneratorResponse = unknown export type ModelConfig = { - completion_params?: { - [key: string]: unknown - } - mode: LlmMode - name: string - provider: string + agent_mode?: JsonValue | null + annotation_reply?: JsonValue | null + chat_prompt_config?: JsonValue | null + completion_prompt_config?: JsonValue | null + created_at?: number | null + created_by?: string | null + dataset_configs?: JsonValue | null + dataset_query_variable?: string | null + external_data_tools?: JsonValue | null + file_upload?: JsonValue | null + model?: JsonValue | null + more_like_this?: JsonValue | null + opening_statement?: string | null + pre_prompt?: string | null + prompt_type?: string | null + retriever_resource?: JsonValue | null + sensitive_word_avoidance?: JsonValue | null + speech_to_text?: JsonValue | null + suggested_questions?: JsonValue | null + suggested_questions_after_answer?: JsonValue | null + text_to_speech?: JsonValue | null + updated_at?: number | null + updated_by?: string | null + user_input_form?: JsonValue | null } -export type LlmMode = 'chat' | 'completion' +export type JsonValue + = | string + | number + | number + | boolean + | { + [key: string]: unknown + } + | Array + | null export type PostRuleGenerateData = { body: RuleGeneratePayload diff --git a/packages/contracts/generated/api/console/rule-generate/zod.gen.ts b/packages/contracts/generated/api/console/rule-generate/zod.gen.ts index 6e539e63f4e..b1f31427366 100644 --- a/packages/contracts/generated/api/console/rule-generate/zod.gen.ts +++ b/packages/contracts/generated/api/console/rule-generate/zod.gen.ts @@ -7,21 +7,45 @@ import * as z from 'zod' */ export const zGeneratorResponse = z.unknown() -/** - * LLMMode - * - * Enum class for large language model mode. - */ -export const zLlmMode = z.enum(['chat', 'completion']) +export const zJsonValue = z + .union([ + z.string(), + z.int(), + z.number(), + z.boolean(), + z.record(z.string(), z.unknown()), + z.array(z.unknown()), + ]) + .nullable() /** * ModelConfig */ export const zModelConfig = z.object({ - completion_params: z.record(z.string(), z.unknown()).optional(), - mode: zLlmMode, - name: z.string(), - provider: z.string(), + agent_mode: zJsonValue.nullish(), + annotation_reply: zJsonValue.nullish(), + chat_prompt_config: zJsonValue.nullish(), + completion_prompt_config: zJsonValue.nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + dataset_configs: zJsonValue.nullish(), + dataset_query_variable: z.string().nullish(), + external_data_tools: zJsonValue.nullish(), + file_upload: zJsonValue.nullish(), + model: zJsonValue.nullish(), + more_like_this: zJsonValue.nullish(), + opening_statement: z.string().nullish(), + pre_prompt: z.string().nullish(), + prompt_type: z.string().nullish(), + retriever_resource: zJsonValue.nullish(), + sensitive_word_avoidance: zJsonValue.nullish(), + speech_to_text: zJsonValue.nullish(), + suggested_questions: zJsonValue.nullish(), + suggested_questions_after_answer: zJsonValue.nullish(), + text_to_speech: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + user_input_form: zJsonValue.nullish(), }) /** diff --git a/packages/contracts/generated/api/console/rule-structured-output-generate/types.gen.ts b/packages/contracts/generated/api/console/rule-structured-output-generate/types.gen.ts index f7da1cd5cc8..c2b459fee56 100644 --- a/packages/contracts/generated/api/console/rule-structured-output-generate/types.gen.ts +++ b/packages/contracts/generated/api/console/rule-structured-output-generate/types.gen.ts @@ -12,15 +12,42 @@ export type RuleStructuredOutputPayload = { export type GeneratorResponse = unknown export type ModelConfig = { - completion_params?: { - [key: string]: unknown - } - mode: LlmMode - name: string - provider: string + agent_mode?: JsonValue | null + annotation_reply?: JsonValue | null + chat_prompt_config?: JsonValue | null + completion_prompt_config?: JsonValue | null + created_at?: number | null + created_by?: string | null + dataset_configs?: JsonValue | null + dataset_query_variable?: string | null + external_data_tools?: JsonValue | null + file_upload?: JsonValue | null + model?: JsonValue | null + more_like_this?: JsonValue | null + opening_statement?: string | null + pre_prompt?: string | null + prompt_type?: string | null + retriever_resource?: JsonValue | null + sensitive_word_avoidance?: JsonValue | null + speech_to_text?: JsonValue | null + suggested_questions?: JsonValue | null + suggested_questions_after_answer?: JsonValue | null + text_to_speech?: JsonValue | null + updated_at?: number | null + updated_by?: string | null + user_input_form?: JsonValue | null } -export type LlmMode = 'chat' | 'completion' +export type JsonValue + = | string + | number + | number + | boolean + | { + [key: string]: unknown + } + | Array + | null export type PostRuleStructuredOutputGenerateData = { body: RuleStructuredOutputPayload diff --git a/packages/contracts/generated/api/console/rule-structured-output-generate/zod.gen.ts b/packages/contracts/generated/api/console/rule-structured-output-generate/zod.gen.ts index 6119b0010d0..0929fccae64 100644 --- a/packages/contracts/generated/api/console/rule-structured-output-generate/zod.gen.ts +++ b/packages/contracts/generated/api/console/rule-structured-output-generate/zod.gen.ts @@ -7,21 +7,45 @@ import * as z from 'zod' */ export const zGeneratorResponse = z.unknown() -/** - * LLMMode - * - * Enum class for large language model mode. - */ -export const zLlmMode = z.enum(['chat', 'completion']) +export const zJsonValue = z + .union([ + z.string(), + z.int(), + z.number(), + z.boolean(), + z.record(z.string(), z.unknown()), + z.array(z.unknown()), + ]) + .nullable() /** * ModelConfig */ export const zModelConfig = z.object({ - completion_params: z.record(z.string(), z.unknown()).optional(), - mode: zLlmMode, - name: z.string(), - provider: z.string(), + agent_mode: zJsonValue.nullish(), + annotation_reply: zJsonValue.nullish(), + chat_prompt_config: zJsonValue.nullish(), + completion_prompt_config: zJsonValue.nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + dataset_configs: zJsonValue.nullish(), + dataset_query_variable: z.string().nullish(), + external_data_tools: zJsonValue.nullish(), + file_upload: zJsonValue.nullish(), + model: zJsonValue.nullish(), + more_like_this: zJsonValue.nullish(), + opening_statement: z.string().nullish(), + pre_prompt: z.string().nullish(), + prompt_type: z.string().nullish(), + retriever_resource: zJsonValue.nullish(), + sensitive_word_avoidance: zJsonValue.nullish(), + speech_to_text: zJsonValue.nullish(), + suggested_questions: zJsonValue.nullish(), + suggested_questions_after_answer: zJsonValue.nullish(), + text_to_speech: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + user_input_form: zJsonValue.nullish(), }) /** diff --git a/packages/contracts/generated/api/console/test/types.gen.ts b/packages/contracts/generated/api/console/test/types.gen.ts index 421460c015d..efbf875b0ef 100644 --- a/packages/contracts/generated/api/console/test/types.gen.ts +++ b/packages/contracts/generated/api/console/test/types.gen.ts @@ -10,19 +10,24 @@ export type BedrockRetrievalPayload = { retrieval_setting: BedrockRetrievalSetting } -export type ExternalRetrievalTestResponse - = | { - [key: string]: unknown - } - | Array<{ - [key: string]: unknown - }> +export type BedrockRetrievalResponse = { + records: Array +} export type BedrockRetrievalSetting = { score_threshold?: number top_k?: number | null } +export type BedrockRetrievalRecordResponse = { + content?: string | null + metadata?: { + [key: string]: unknown + } | null + score: number + title?: string | null +} + export type PostTestRetrievalData = { body: BedrockRetrievalPayload path?: never @@ -31,7 +36,7 @@ export type PostTestRetrievalData = { } export type PostTestRetrievalResponses = { - 200: ExternalRetrievalTestResponse + 200: BedrockRetrievalResponse } export type PostTestRetrievalResponse = PostTestRetrievalResponses[keyof PostTestRetrievalResponses] diff --git a/packages/contracts/generated/api/console/test/zod.gen.ts b/packages/contracts/generated/api/console/test/zod.gen.ts index 35ec1f4b034..84cb75e6196 100644 --- a/packages/contracts/generated/api/console/test/zod.gen.ts +++ b/packages/contracts/generated/api/console/test/zod.gen.ts @@ -2,14 +2,6 @@ import * as z from 'zod' -/** - * ExternalRetrievalTestResponse - */ -export const zExternalRetrievalTestResponse = z.union([ - z.record(z.string(), z.unknown()), - z.array(z.record(z.string(), z.unknown())), -]) - /** * BedrockRetrievalSetting * @@ -29,9 +21,26 @@ export const zBedrockRetrievalPayload = z.object({ retrieval_setting: zBedrockRetrievalSetting, }) +/** + * BedrockRetrievalRecordResponse + */ +export const zBedrockRetrievalRecordResponse = z.object({ + content: z.string().nullish(), + metadata: z.record(z.string(), z.unknown()).nullish(), + score: z.number(), + title: z.string().nullish(), +}) + +/** + * BedrockRetrievalResponse + */ +export const zBedrockRetrievalResponse = z.object({ + records: z.array(zBedrockRetrievalRecordResponse), +}) + export const zPostTestRetrievalBody = zBedrockRetrievalPayload /** * Bedrock retrieval test completed */ -export const zPostTestRetrievalResponse = zExternalRetrievalTestResponse +export const zPostTestRetrievalResponse = zBedrockRetrievalResponse diff --git a/packages/contracts/generated/api/console/trial-apps/orpc.gen.ts b/packages/contracts/generated/api/console/trial-apps/orpc.gen.ts index ebc2624fa19..35859bf863e 100644 --- a/packages/contracts/generated/api/console/trial-apps/orpc.gen.ts +++ b/packages/contracts/generated/api/console/trial-apps/orpc.gen.ts @@ -19,18 +19,9 @@ import { zGetTrialAppsByAppIdWorkflowsResponse, zPostTrialAppsByAppIdAudioToTextPath, zPostTrialAppsByAppIdAudioToTextResponse, - zPostTrialAppsByAppIdChatMessagesBody, - zPostTrialAppsByAppIdChatMessagesPath, - zPostTrialAppsByAppIdChatMessagesResponse, - zPostTrialAppsByAppIdCompletionMessagesBody, - zPostTrialAppsByAppIdCompletionMessagesPath, - zPostTrialAppsByAppIdCompletionMessagesResponse, zPostTrialAppsByAppIdTextToAudioBody, zPostTrialAppsByAppIdTextToAudioPath, zPostTrialAppsByAppIdTextToAudioResponse, - zPostTrialAppsByAppIdWorkflowsRunBody, - zPostTrialAppsByAppIdWorkflowsRunPath, - zPostTrialAppsByAppIdWorkflowsRunResponse, zPostTrialAppsByAppIdWorkflowsTasksByTaskIdStopPath, zPostTrialAppsByAppIdWorkflowsTasksByTaskIdStopResponse, } from './zod.gen' @@ -50,46 +41,6 @@ export const audioToText = { post, } -export const post2 = oc - .route({ - inputStructure: 'detailed', - method: 'POST', - operationId: 'postTrialAppsByAppIdChatMessages', - path: '/trial-apps/{app_id}/chat-messages', - tags: ['console'], - }) - .input( - z.object({ - body: zPostTrialAppsByAppIdChatMessagesBody, - params: zPostTrialAppsByAppIdChatMessagesPath, - }), - ) - .output(zPostTrialAppsByAppIdChatMessagesResponse) - -export const chatMessages = { - post: post2, -} - -export const post3 = oc - .route({ - inputStructure: 'detailed', - method: 'POST', - operationId: 'postTrialAppsByAppIdCompletionMessages', - path: '/trial-apps/{app_id}/completion-messages', - tags: ['console'], - }) - .input( - z.object({ - body: zPostTrialAppsByAppIdCompletionMessagesBody, - params: zPostTrialAppsByAppIdCompletionMessagesPath, - }), - ) - .output(zPostTrialAppsByAppIdCompletionMessagesResponse) - -export const completionMessages = { - post: post3, -} - export const get = oc .route({ inputStructure: 'detailed', @@ -175,7 +126,7 @@ export const site = { get: get4, } -export const post4 = oc +export const post2 = oc .route({ inputStructure: 'detailed', method: 'POST', @@ -192,37 +143,13 @@ export const post4 = oc .output(zPostTrialAppsByAppIdTextToAudioResponse) export const textToAudio = { - post: post4, -} - -/** - * Run workflow - */ -export const post5 = oc - .route({ - inputStructure: 'detailed', - method: 'POST', - operationId: 'postTrialAppsByAppIdWorkflowsRun', - path: '/trial-apps/{app_id}/workflows/run', - summary: 'Run workflow', - tags: ['console'], - }) - .input( - z.object({ - body: zPostTrialAppsByAppIdWorkflowsRunBody, - params: zPostTrialAppsByAppIdWorkflowsRunPath, - }), - ) - .output(zPostTrialAppsByAppIdWorkflowsRunResponse) - -export const run = { - post: post5, + post: post2, } /** * Stop workflow task */ -export const post6 = oc +export const post3 = oc .route({ inputStructure: 'detailed', method: 'POST', @@ -235,7 +162,7 @@ export const post6 = oc .output(zPostTrialAppsByAppIdWorkflowsTasksByTaskIdStopResponse) export const stop = { - post: post6, + post: post3, } export const byTaskId = { @@ -263,7 +190,6 @@ export const get5 = oc export const workflows = { get: get5, - run, tasks, } @@ -285,8 +211,6 @@ export const get6 = oc export const byAppId = { get: get6, audioToText, - chatMessages, - completionMessages, datasets, messages, parameters, diff --git a/packages/contracts/generated/api/console/trial-apps/types.gen.ts b/packages/contracts/generated/api/console/trial-apps/types.gen.ts index 894da1102ee..a9f42362780 100644 --- a/packages/contracts/generated/api/console/trial-apps/types.gen.ts +++ b/packages/contracts/generated/api/console/trial-apps/types.gen.ts @@ -4,66 +4,47 @@ export type ClientOptions = { baseUrl: `${string}://${string}/console/api` | (string & {}) } -export type TrialAppDetailWithSite = { - access_mode?: string - api_base_url?: string - created_at?: number - created_by?: string - deleted_tools?: Array - description?: string - enable_api?: boolean - enable_site?: boolean - icon?: string - icon_background?: string - icon_type?: string - icon_url?: string - id?: string - max_active_requests?: number - mode?: string - model_config?: TrialAppModelConfig - name?: string +export type AppDetailWithSite = { + access_mode?: string | null + api_base_url?: string | null + app_id?: string | null + bound_agent_id?: string | null + created_at?: number | null + created_by?: string | null + deleted_tools?: Array + description?: string | null + enable_api: boolean + enable_site: boolean + icon?: string | null + icon_background?: string | null + icon_type?: string | null + readonly icon_url: string | null + id: string + maintainer?: string | null + max_active_requests?: number | null + mode: string + model_config?: ModelConfig | null + name: string permission_keys?: Array - site?: TrialSite - tags?: Array - updated_at?: number - updated_by?: string - use_icon_as_answer_icon?: boolean - workflow?: TrialWorkflowPartial + site?: Site | null + tags?: Array + tracing?: JsonValue | null + updated_at?: number | null + updated_by?: string | null + use_icon_as_answer_icon?: boolean | null + workflow?: WorkflowPartial | null } export type AudioTranscriptResponse = { text: string } -export type ChatRequest = { - conversation_id?: string | null - files?: Array | null - inputs: { - [key: string]: unknown - } - parent_message_id?: string | null - query: string - retriever_from?: string -} - -export type GeneratedAppResponse = JsonValue - -export type CompletionRequest = { - files?: Array | null - inputs: { - [key: string]: unknown - } - query?: string - response_mode?: 'blocking' | 'streaming' | null - retriever_from?: string -} - -export type TrialDatasetList = { - data?: Array - has_more?: boolean - limit?: number - page?: number - total?: number +export type TrialDatasetListResponse = { + data: Array + has_more: boolean + limit: number + page: number + total: number } export type SuggestedQuestionsResponse = { @@ -111,143 +92,69 @@ export type TextToSpeechRequest = { export type AudioBinaryResponse = Blob | File -export type TrialWorkflow = { - conversation_variables?: Array - created_at?: number - created_by?: TrialSimpleAccount - environment_variables?: Array<{ - [key: string]: unknown - }> - features?: { +export type WorkflowResponse = { + conversation_variables: Array + created_at: number + created_by?: SimpleAccount | null + environment_variables: Array + features: { [key: string]: unknown } - graph?: { - [key: string]: unknown - } - hash?: string - id?: string - marked_comment?: string - marked_name?: string - rag_pipeline_variables?: Array - tool_published?: boolean - updated_at?: number - updated_by?: TrialSimpleAccount - version?: string -} - -export type WorkflowRunRequest = { - files?: Array | null - inputs: { + graph: { [key: string]: unknown } + hash: string + id: string + marked_comment: string + marked_name: string + rag_pipeline_variables: Array + tool_published: boolean + updated_at: number + updated_by?: SimpleAccount | null + version: string } export type SimpleResultResponse = { result: string } -export type TrialDeletedTool = { - provider_id?: string - tool_name?: string - type?: string +export type DeletedTool = { + provider_id: string + tool_name: string + type: string } -export type TrialAppModelConfig = { - agent_mode?: { - [key: string]: unknown - } - annotation_reply?: { - [key: string]: unknown - } - chat_prompt_config?: { - [key: string]: unknown - } - completion_prompt_config?: { - [key: string]: unknown - } - created_at?: number - created_by?: string - dataset_configs?: { - [key: string]: unknown - } - dataset_query_variable?: string - external_data_tools?: Array<{ - [key: string]: unknown - }> - file_upload?: { - [key: string]: unknown - } - model?: { - [key: string]: unknown - } - more_like_this?: { - [key: string]: unknown - } - opening_statement?: string - pre_prompt?: string - prompt_type?: string - retriever_resource?: { - [key: string]: unknown - } - sensitive_word_avoidance?: { - [key: string]: unknown - } - speech_to_text?: { - [key: string]: unknown - } - suggested_questions?: Array - suggested_questions_after_answer?: { - [key: string]: unknown - } - text_to_speech?: { - [key: string]: unknown - } - updated_at?: number - updated_by?: string - user_input_form?: Array<{ - [key: string]: unknown - }> +export type ModelConfig = { + agent_mode?: JsonValue | null + annotation_reply?: JsonValue | null + chat_prompt_config?: JsonValue | null + completion_prompt_config?: JsonValue | null + created_at?: number | null + created_by?: string | null + dataset_configs?: JsonValue | null + dataset_query_variable?: string | null + external_data_tools?: JsonValue | null + file_upload?: JsonValue | null + model?: JsonValue | null + more_like_this?: JsonValue | null + opening_statement?: string | null + pre_prompt?: string | null + prompt_type?: string | null + retriever_resource?: JsonValue | null + sensitive_word_avoidance?: JsonValue | null + speech_to_text?: JsonValue | null + suggested_questions?: JsonValue | null + suggested_questions_after_answer?: JsonValue | null + text_to_speech?: JsonValue | null + updated_at?: number | null + updated_by?: string | null + user_input_form?: JsonValue | null } -export type TrialSite = { - access_token?: string - app_base_url?: string - chat_color_theme?: string - chat_color_theme_inverted?: boolean - code?: string - copyright?: string - created_at?: number - created_by?: string - custom_disclaimer?: string - customize_domain?: string - customize_token_strategy?: string - default_language?: string - description?: string - icon?: string - icon_background?: string - icon_type?: string - icon_url?: string - privacy_policy?: string - prompt_public?: boolean - show_workflow_steps?: boolean - title?: string - updated_at?: number - updated_by?: string - use_icon_as_answer_icon?: boolean -} - -export type TrialTag = { - id?: string - name?: string - type?: string -} - -export type TrialWorkflowPartial = { - created_at?: number - created_by?: string - id?: string - updated_at?: number - updated_by?: string +export type Tag = { + id: string + name: string + type: string } export type JsonValue @@ -261,16 +168,52 @@ export type JsonValue | Array | null -export type TrialDataset = { - created_at?: number - created_by?: string - data_source_type?: string - description?: string - id?: string - indexing_technique?: string - name?: string - permission?: string +export type WorkflowPartial = { + created_at?: number | null + created_by?: string | null + id: string + updated_at?: number | null + updated_by?: string | null +} + +export type TrialDatasetListItemResponse = { + app_count: number + author_name: string | null + built_in_field_enabled: boolean + chunk_structure: string | null + created_at: number + created_by: string + data_source_type: string | null + description: string | null + doc_form: string | null + doc_metadata: Array + document_count: number + embedding_available?: boolean | null + embedding_model: string | null + embedding_model_provider: string | null + enable_api: boolean + external_knowledge_info?: DatasetExternalKnowledgeInfoResponse + external_retrieval_model: DatasetExternalRetrievalModelResponse | null + icon_info?: DatasetIconInfoResponse + id: string + indexing_technique: string | null + is_multimodal: boolean + is_published: boolean + maintainer?: string | null + name: string + permission: string permission_keys?: Array + pipeline_id: string | null + provider: string + retrieval_model_dict: DatasetRetrievalModelResponse + runtime_mode: string | null + summary_index_setting?: DatasetSummaryIndexSettingResponse + tags: Array + total_available_documents: number + total_documents: number + updated_at: number + updated_by: string | null + word_count: number } export type JsonObject = { @@ -285,56 +228,145 @@ export type SystemParameters = { workflow_file_upload_limit: number } -export type TrialConversationVariable = { - description?: string - id?: string - name?: string - value?: - | string - | number - | number - | boolean - | { - [key: string]: unknown - } - | Array - | null - value_type?: string +export type WorkflowConversationVariableResponse = { + description: string + id: string + name: string + value: unknown + value_type: string } -export type TrialSimpleAccount = { - email?: string - id?: string - name?: string +export type SimpleAccount = { + email: string + id: string + name: string } -export type TrialPipelineVariable = { - allow_file_extension?: Array - allow_file_upload_methods?: Array - allowed_file_types?: Array - belong_to_node_id?: string - default_value?: - | string - | number - | number - | boolean - | { - [key: string]: unknown - } - | Array - | null - label?: string - max_length?: number - options?: Array - placeholder?: string - required?: boolean - tooltips?: string - type?: string - unit?: string - variable?: string +export type WorkflowEnvironmentVariableResponse = { + description: string + id: string + name: string + value: unknown + value_type: string } -export type GeneratedAppResponseWritable = JsonValue +export type PipelineVariableResponse = { + allowed_file_extensions?: Array | null + allowed_file_types?: Array | null + allowed_file_upload_methods?: Array | null + belong_to_node_id: string + default_value?: unknown + label: string + max_length?: number | null + options?: Array | null + placeholder?: string | null + required: boolean + tooltips?: string | null + type: string + unit?: string | null + variable: string +} + +export type DatasetDocMetadataResponse = { + id: string + name: string + type: string +} + +export type DatasetExternalKnowledgeInfoResponse = { + external_knowledge_api_endpoint?: string | null + external_knowledge_api_id?: string | null + external_knowledge_api_name?: string | null + external_knowledge_id?: string | null +} + +export type DatasetExternalRetrievalModelResponse = { + score_threshold?: number | null + score_threshold_enabled?: boolean | null + top_k: number +} + +export type DatasetIconInfoResponse = { + icon?: string | null + icon_background?: string | null + icon_type?: string | null + icon_url?: string | null +} + +export type DatasetRetrievalModelResponse = { + reranking_enable: boolean + reranking_mode?: string | null + reranking_model?: DatasetRerankingModelResponse + score_threshold?: number | null + score_threshold_enabled: boolean + search_method: string + top_k: number + weights?: DatasetWeightedScoreResponse | null +} + +export type DatasetSummaryIndexSettingResponse = { + enable?: boolean | null + model_name?: string | null + model_provider_name?: string | null + summary_prompt?: string | null +} + +export type DatasetTagResponse = { + id: string + name: string + type: string +} + +export type DatasetRerankingModelResponse = { + reranking_model_name?: string | null + reranking_provider_name?: string | null +} + +export type DatasetWeightedScoreResponse = { + keyword_setting?: DatasetKeywordSettingResponse + vector_setting?: DatasetVectorSettingResponse + weight_type?: string | null +} + +export type DatasetKeywordSettingResponse = { + keyword_weight?: number | null +} + +export type DatasetVectorSettingResponse = { + embedding_model_name?: string | null + embedding_provider_name?: string | null + vector_weight?: number | null +} + +export type AppDetailWithSiteWritable = { + access_mode?: string | null + api_base_url?: string | null + app_id?: string | null + bound_agent_id?: string | null + created_at?: number | null + created_by?: string | null + deleted_tools?: Array + description?: string | null + enable_api: boolean + enable_site: boolean + icon?: string | null + icon_background?: string | null + icon_type?: string | null + id: string + maintainer?: string | null + max_active_requests?: number | null + mode: string + model_config?: ModelConfig | null + name: string + permission_keys?: Array + site?: SiteWritable | null + tags?: Array + tracing?: JsonValue | null + updated_at?: number | null + updated_by?: string | null + use_icon_as_answer_icon?: boolean | null + workflow?: WorkflowPartial | null +} export type SiteWritable = { chat_color_theme?: string | null @@ -362,7 +394,7 @@ export type GetTrialAppsByAppIdData = { } export type GetTrialAppsByAppIdResponses = { - 200: TrialAppDetailWithSite + 200: AppDetailWithSite } export type GetTrialAppsByAppIdResponse @@ -384,38 +416,6 @@ export type PostTrialAppsByAppIdAudioToTextResponses = { export type PostTrialAppsByAppIdAudioToTextResponse = PostTrialAppsByAppIdAudioToTextResponses[keyof PostTrialAppsByAppIdAudioToTextResponses] -export type PostTrialAppsByAppIdChatMessagesData = { - body: ChatRequest - path: { - app_id: string - } - query?: never - url: '/trial-apps/{app_id}/chat-messages' -} - -export type PostTrialAppsByAppIdChatMessagesResponses = { - 200: GeneratedAppResponse -} - -export type PostTrialAppsByAppIdChatMessagesResponse - = PostTrialAppsByAppIdChatMessagesResponses[keyof PostTrialAppsByAppIdChatMessagesResponses] - -export type PostTrialAppsByAppIdCompletionMessagesData = { - body: CompletionRequest - path: { - app_id: string - } - query?: never - url: '/trial-apps/{app_id}/completion-messages' -} - -export type PostTrialAppsByAppIdCompletionMessagesResponses = { - 200: GeneratedAppResponse -} - -export type PostTrialAppsByAppIdCompletionMessagesResponse - = PostTrialAppsByAppIdCompletionMessagesResponses[keyof PostTrialAppsByAppIdCompletionMessagesResponses] - export type GetTrialAppsByAppIdDatasetsData = { body?: never path: { @@ -430,7 +430,7 @@ export type GetTrialAppsByAppIdDatasetsData = { } export type GetTrialAppsByAppIdDatasetsResponses = { - 200: TrialDatasetList + 200: TrialDatasetListResponse } export type GetTrialAppsByAppIdDatasetsResponse @@ -511,28 +511,12 @@ export type GetTrialAppsByAppIdWorkflowsData = { } export type GetTrialAppsByAppIdWorkflowsResponses = { - 200: TrialWorkflow + 200: WorkflowResponse } export type GetTrialAppsByAppIdWorkflowsResponse = GetTrialAppsByAppIdWorkflowsResponses[keyof GetTrialAppsByAppIdWorkflowsResponses] -export type PostTrialAppsByAppIdWorkflowsRunData = { - body: WorkflowRunRequest - path: { - app_id: string - } - query?: never - url: '/trial-apps/{app_id}/workflows/run' -} - -export type PostTrialAppsByAppIdWorkflowsRunResponses = { - 200: GeneratedAppResponse -} - -export type PostTrialAppsByAppIdWorkflowsRunResponse - = PostTrialAppsByAppIdWorkflowsRunResponses[keyof PostTrialAppsByAppIdWorkflowsRunResponses] - export type PostTrialAppsByAppIdWorkflowsTasksByTaskIdStopData = { body?: never path: { diff --git a/packages/contracts/generated/api/console/trial-apps/zod.gen.ts b/packages/contracts/generated/api/console/trial-apps/zod.gen.ts index b8768790ef9..070fdcab1d0 100644 --- a/packages/contracts/generated/api/console/trial-apps/zod.gen.ts +++ b/packages/contracts/generated/api/console/trial-apps/zod.gen.ts @@ -9,29 +9,6 @@ export const zAudioTranscriptResponse = z.object({ text: z.string(), }) -/** - * ChatRequest - */ -export const zChatRequest = z.object({ - conversation_id: z.string().nullish(), - files: z.array(z.unknown()).nullish(), - inputs: z.record(z.string(), z.unknown()), - parent_message_id: z.string().nullish(), - query: z.string(), - retriever_from: z.string().optional().default('explore_app'), -}) - -/** - * CompletionRequest - */ -export const zCompletionRequest = z.object({ - files: z.array(z.unknown()).nullish(), - inputs: z.record(z.string(), z.unknown()), - query: z.string().optional().default(''), - response_mode: z.enum(['blocking', 'streaming']).nullish(), - retriever_from: z.string().optional().default('explore_app'), -}) - /** * SuggestedQuestionsResponse */ @@ -74,14 +51,6 @@ export const zTextToSpeechRequest = z.object({ */ export const zAudioBinaryResponse = z.custom() -/** - * WorkflowRunRequest - */ -export const zWorkflowRunRequest = z.object({ - files: z.array(z.unknown()).nullish(), - inputs: z.record(z.string(), z.unknown()), -}) - /** * SimpleResultResponse */ @@ -89,169 +58,22 @@ export const zSimpleResultResponse = z.object({ result: z.string(), }) -export const zTrialDeletedTool = z.object({ - provider_id: z.string().optional(), - tool_name: z.string().optional(), - type: z.string().optional(), +/** + * DeletedTool + */ +export const zDeletedTool = z.object({ + provider_id: z.string(), + tool_name: z.string(), + type: z.string(), }) -export const zTrialAppModelConfig = z.object({ - agent_mode: z.record(z.string(), z.unknown()).optional(), - annotation_reply: z.record(z.string(), z.unknown()).optional(), - chat_prompt_config: z.record(z.string(), z.unknown()).optional(), - completion_prompt_config: z.record(z.string(), z.unknown()).optional(), - created_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - created_by: z.string().optional(), - dataset_configs: z.record(z.string(), z.unknown()).optional(), - dataset_query_variable: z.string().optional(), - external_data_tools: z.array(z.record(z.string(), z.unknown())).optional(), - file_upload: z.record(z.string(), z.unknown()).optional(), - model: z.record(z.string(), z.unknown()).optional(), - more_like_this: z.record(z.string(), z.unknown()).optional(), - opening_statement: z.string().optional(), - pre_prompt: z.string().optional(), - prompt_type: z.string().optional(), - retriever_resource: z.record(z.string(), z.unknown()).optional(), - sensitive_word_avoidance: z.record(z.string(), z.unknown()).optional(), - speech_to_text: z.record(z.string(), z.unknown()).optional(), - suggested_questions: z.array(z.string()).optional(), - suggested_questions_after_answer: z.record(z.string(), z.unknown()).optional(), - text_to_speech: z.record(z.string(), z.unknown()).optional(), - updated_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - updated_by: z.string().optional(), - user_input_form: z.array(z.record(z.string(), z.unknown())).optional(), -}) - -export const zTrialSite = z.object({ - access_token: z.string().optional(), - app_base_url: z.string().optional(), - chat_color_theme: z.string().optional(), - chat_color_theme_inverted: z.boolean().optional(), - code: z.string().optional(), - copyright: z.string().optional(), - created_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - created_by: z.string().optional(), - custom_disclaimer: z.string().optional(), - customize_domain: z.string().optional(), - customize_token_strategy: z.string().optional(), - default_language: z.string().optional(), - description: z.string().optional(), - icon: z.string().optional(), - icon_background: z.string().optional(), - icon_type: z.string().optional(), - icon_url: z.string().optional(), - privacy_policy: z.string().optional(), - prompt_public: z.boolean().optional(), - show_workflow_steps: z.boolean().optional(), - title: z.string().optional(), - updated_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - updated_by: z.string().optional(), - use_icon_as_answer_icon: z.boolean().optional(), -}) - -export const zTrialTag = z.object({ - id: z.string().optional(), - name: z.string().optional(), - type: z.string().optional(), -}) - -export const zTrialWorkflowPartial = z.object({ - created_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - created_by: z.string().optional(), - id: z.string().optional(), - updated_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - updated_by: z.string().optional(), -}) - -export const zTrialAppDetailWithSite = z.object({ - access_mode: z.string().optional(), - api_base_url: z.string().optional(), - created_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - created_by: z.string().optional(), - deleted_tools: z.array(zTrialDeletedTool).optional(), - description: z.string().optional(), - enable_api: z.boolean().optional(), - enable_site: z.boolean().optional(), - icon: z.string().optional(), - icon_background: z.string().optional(), - icon_type: z.string().optional(), - icon_url: z.string().optional(), - id: z.string().optional(), - max_active_requests: z.int().optional(), - mode: z.string().optional(), - model_config: zTrialAppModelConfig.optional(), - name: z.string().optional(), - permission_keys: z.array(z.string()).optional(), - site: zTrialSite.optional(), - tags: z.array(zTrialTag).optional(), - updated_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - updated_by: z.string().optional(), - use_icon_as_answer_icon: z.boolean().optional(), - workflow: zTrialWorkflowPartial.optional(), +/** + * Tag + */ +export const zTag = z.object({ + id: z.string(), + name: z.string(), + type: z.string(), }) export const zJsonValue = z @@ -266,36 +88,78 @@ export const zJsonValue = z .nullable() /** - * GeneratedAppResponse + * ModelConfig */ -export const zGeneratedAppResponse = zJsonValue - -export const zTrialDataset = z.object({ - created_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - created_by: z.string().optional(), - data_source_type: z.string().optional(), - description: z.string().optional(), - id: z.string().optional(), - indexing_technique: z.string().optional(), - name: z.string().optional(), - permission: z.string().optional(), - permission_keys: z.array(z.string()).optional(), +export const zModelConfig = z.object({ + agent_mode: zJsonValue.nullish(), + annotation_reply: zJsonValue.nullish(), + chat_prompt_config: zJsonValue.nullish(), + completion_prompt_config: zJsonValue.nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + dataset_configs: zJsonValue.nullish(), + dataset_query_variable: z.string().nullish(), + external_data_tools: zJsonValue.nullish(), + file_upload: zJsonValue.nullish(), + model: zJsonValue.nullish(), + more_like_this: zJsonValue.nullish(), + opening_statement: z.string().nullish(), + pre_prompt: z.string().nullish(), + prompt_type: z.string().nullish(), + retriever_resource: zJsonValue.nullish(), + sensitive_word_avoidance: zJsonValue.nullish(), + speech_to_text: zJsonValue.nullish(), + suggested_questions: zJsonValue.nullish(), + suggested_questions_after_answer: zJsonValue.nullish(), + text_to_speech: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + user_input_form: zJsonValue.nullish(), }) -export const zTrialDatasetList = z.object({ - data: z.array(zTrialDataset).optional(), - has_more: z.boolean().optional(), - limit: z.int().optional(), - page: z.int().optional(), - total: z.int().optional(), +/** + * WorkflowPartial + */ +export const zWorkflowPartial = z.object({ + created_at: z.int().nullish(), + created_by: z.string().nullish(), + id: z.string(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), +}) + +/** + * AppDetailWithSite + */ +export const zAppDetailWithSite = z.object({ + access_mode: z.string().nullish(), + api_base_url: z.string().nullish(), + app_id: z.string().nullish(), + bound_agent_id: z.string().nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + deleted_tools: z.array(zDeletedTool).optional(), + description: z.string().nullish(), + enable_api: z.boolean(), + enable_site: z.boolean(), + icon: z.string().nullish(), + icon_background: z.string().nullish(), + icon_type: z.string().nullish(), + icon_url: z.string().nullable(), + id: z.string(), + maintainer: z.string().nullish(), + max_active_requests: z.int().nullish(), + mode: z.string(), + model_config: zModelConfig.nullish(), + name: z.string(), + permission_keys: z.array(z.string()).optional(), + site: zSite.nullish(), + tags: z.array(zTag).optional(), + tracing: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + use_icon_as_answer_icon: z.boolean().nullish(), + workflow: zWorkflowPartial.nullish(), }) export const zJsonObject = z.record(z.string(), z.unknown()) @@ -329,93 +193,235 @@ export const zParameters = z.object({ user_input_form: z.array(zJsonObject), }) -export const zTrialConversationVariable = z.object({ - description: z.string().optional(), - id: z.string().optional(), - name: z.string().optional(), - value: z - .union([ - z.string(), - z.int(), - z.number(), - z.boolean(), - z.record(z.string(), z.unknown()), - z.array(z.unknown()), - ]) - .nullish(), - value_type: z.string().optional(), -}) - -export const zTrialSimpleAccount = z.object({ - email: z.string().optional(), - id: z.string().optional(), - name: z.string().optional(), -}) - -export const zTrialPipelineVariable = z.object({ - allow_file_extension: z.array(z.string()).optional(), - allow_file_upload_methods: z.array(z.string()).optional(), - allowed_file_types: z.array(z.string()).optional(), - belong_to_node_id: z.string().optional(), - default_value: z - .union([ - z.string(), - z.int(), - z.number(), - z.boolean(), - z.record(z.string(), z.unknown()), - z.array(z.unknown()), - ]) - .nullish(), - label: z.string().optional(), - max_length: z.int().optional(), - options: z.array(z.string()).optional(), - placeholder: z.string().optional(), - required: z.boolean().optional(), - tooltips: z.string().optional(), - type: z.string().optional(), - unit: z.string().optional(), - variable: z.string().optional(), -}) - -export const zTrialWorkflow = z.object({ - conversation_variables: z.array(zTrialConversationVariable).optional(), - created_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - created_by: zTrialSimpleAccount.optional(), - environment_variables: z.array(z.record(z.string(), z.unknown())).optional(), - features: z.record(z.string(), z.unknown()).optional(), - graph: z.record(z.string(), z.unknown()).optional(), - hash: z.string().optional(), - id: z.string().optional(), - marked_comment: z.string().optional(), - marked_name: z.string().optional(), - rag_pipeline_variables: z.array(zTrialPipelineVariable).optional(), - tool_published: z.boolean().optional(), - updated_at: z.coerce - .bigint() - .min(BigInt('-9223372036854775808'), { - error: 'Invalid value: Expected int64 to be >= -9223372036854775808', - }) - .max(BigInt('9223372036854775807'), { - error: 'Invalid value: Expected int64 to be <= 9223372036854775807', - }) - .optional(), - updated_by: zTrialSimpleAccount.optional(), - version: z.string().optional(), +/** + * WorkflowConversationVariableResponse + */ +export const zWorkflowConversationVariableResponse = z.object({ + description: z.string(), + id: z.string(), + name: z.string(), + value: z.unknown(), + value_type: z.string(), }) /** - * GeneratedAppResponse + * SimpleAccount */ -export const zGeneratedAppResponseWritable = zJsonValue +export const zSimpleAccount = z.object({ + email: z.string(), + id: z.string(), + name: z.string(), +}) + +/** + * WorkflowEnvironmentVariableResponse + */ +export const zWorkflowEnvironmentVariableResponse = z.object({ + description: z.string(), + id: z.string(), + name: z.string(), + value: z.unknown(), + value_type: z.string(), +}) + +/** + * PipelineVariableResponse + */ +export const zPipelineVariableResponse = z.object({ + allowed_file_extensions: z.array(z.string()).nullish(), + allowed_file_types: z.array(z.string()).nullish(), + allowed_file_upload_methods: z.array(z.string()).nullish(), + belong_to_node_id: z.string(), + default_value: z.unknown().optional(), + label: z.string(), + max_length: z.int().nullish(), + options: z.array(z.string()).nullish(), + placeholder: z.string().nullish(), + required: z.boolean(), + tooltips: z.string().nullish(), + type: z.string(), + unit: z.string().nullish(), + variable: z.string(), +}) + +/** + * WorkflowResponse + */ +export const zWorkflowResponse = z.object({ + conversation_variables: z.array(zWorkflowConversationVariableResponse), + created_at: z.int(), + created_by: zSimpleAccount.nullish(), + environment_variables: z.array(zWorkflowEnvironmentVariableResponse), + features: z.record(z.string(), z.unknown()), + graph: z.record(z.string(), z.unknown()), + hash: z.string(), + id: z.string(), + marked_comment: z.string(), + marked_name: z.string(), + rag_pipeline_variables: z.array(zPipelineVariableResponse), + tool_published: z.boolean(), + updated_at: z.int(), + updated_by: zSimpleAccount.nullish(), + version: z.string(), +}) + +/** + * DatasetDocMetadataResponse + */ +export const zDatasetDocMetadataResponse = z.object({ + id: z.string(), + name: z.string(), + type: z.string(), +}) + +/** + * DatasetExternalKnowledgeInfoResponse + */ +export const zDatasetExternalKnowledgeInfoResponse = z.object({ + external_knowledge_api_endpoint: z.string().nullish(), + external_knowledge_api_id: z.string().nullish(), + external_knowledge_api_name: z.string().nullish(), + external_knowledge_id: z.string().nullish(), +}) + +/** + * DatasetExternalRetrievalModelResponse + */ +export const zDatasetExternalRetrievalModelResponse = z.object({ + score_threshold: z.number().nullish(), + score_threshold_enabled: z.boolean().nullish(), + top_k: z.int(), +}) + +/** + * DatasetIconInfoResponse + */ +export const zDatasetIconInfoResponse = z.object({ + icon: z.string().nullish(), + icon_background: z.string().nullish(), + icon_type: z.string().nullish(), + icon_url: z.string().nullish(), +}) + +/** + * DatasetSummaryIndexSettingResponse + */ +export const zDatasetSummaryIndexSettingResponse = z.object({ + enable: z.boolean().nullish(), + model_name: z.string().nullish(), + model_provider_name: z.string().nullish(), + summary_prompt: z.string().nullish(), +}) + +/** + * DatasetTagResponse + */ +export const zDatasetTagResponse = z.object({ + id: z.string(), + name: z.string(), + type: z.string(), +}) + +/** + * DatasetRerankingModelResponse + */ +export const zDatasetRerankingModelResponse = z.object({ + reranking_model_name: z.string().nullish(), + reranking_provider_name: z.string().nullish(), +}) + +/** + * DatasetKeywordSettingResponse + */ +export const zDatasetKeywordSettingResponse = z.object({ + keyword_weight: z.number().nullish(), +}) + +/** + * DatasetVectorSettingResponse + */ +export const zDatasetVectorSettingResponse = z.object({ + embedding_model_name: z.string().nullish(), + embedding_provider_name: z.string().nullish(), + vector_weight: z.number().nullish(), +}) + +/** + * DatasetWeightedScoreResponse + */ +export const zDatasetWeightedScoreResponse = z.object({ + keyword_setting: zDatasetKeywordSettingResponse.optional(), + vector_setting: zDatasetVectorSettingResponse.optional(), + weight_type: z.string().nullish(), +}) + +/** + * DatasetRetrievalModelResponse + */ +export const zDatasetRetrievalModelResponse = z.object({ + reranking_enable: z.boolean(), + reranking_mode: z.string().nullish(), + reranking_model: zDatasetRerankingModelResponse.optional(), + score_threshold: z.number().nullish(), + score_threshold_enabled: z.boolean(), + search_method: z.string(), + top_k: z.int(), + weights: zDatasetWeightedScoreResponse.nullish(), +}) + +/** + * TrialDatasetListItemResponse + */ +export const zTrialDatasetListItemResponse = z.object({ + app_count: z.int(), + author_name: z.string().nullable(), + built_in_field_enabled: z.boolean(), + chunk_structure: z.string().nullable(), + created_at: z.int(), + created_by: z.string(), + data_source_type: z.string().nullable(), + description: z.string().nullable(), + doc_form: z.string().nullable(), + doc_metadata: z.array(zDatasetDocMetadataResponse), + document_count: z.int(), + embedding_available: z.boolean().nullish(), + embedding_model: z.string().nullable(), + embedding_model_provider: z.string().nullable(), + enable_api: z.boolean(), + external_knowledge_info: zDatasetExternalKnowledgeInfoResponse.optional(), + external_retrieval_model: zDatasetExternalRetrievalModelResponse.nullable(), + icon_info: zDatasetIconInfoResponse.optional(), + id: z.string(), + indexing_technique: z.string().nullable(), + is_multimodal: z.boolean(), + is_published: z.boolean(), + maintainer: z.string().nullish(), + name: z.string(), + permission: z.string(), + permission_keys: z.array(z.string()).optional(), + pipeline_id: z.string().nullable(), + provider: z.string(), + retrieval_model_dict: zDatasetRetrievalModelResponse, + runtime_mode: z.string().nullable(), + summary_index_setting: zDatasetSummaryIndexSettingResponse.optional(), + tags: z.array(zDatasetTagResponse), + total_available_documents: z.int(), + total_documents: z.int(), + updated_at: z.int(), + updated_by: z.string().nullable(), + word_count: z.int(), +}) + +/** + * TrialDatasetListResponse + */ +export const zTrialDatasetListResponse = z.object({ + data: z.array(zTrialDatasetListItemResponse), + has_more: z.boolean(), + limit: z.int(), + page: z.int(), + total: z.int(), +}) /** * Site @@ -436,14 +442,47 @@ export const zSiteWritable = z.object({ use_icon_as_answer_icon: z.boolean(), }) +/** + * AppDetailWithSite + */ +export const zAppDetailWithSiteWritable = z.object({ + access_mode: z.string().nullish(), + api_base_url: z.string().nullish(), + app_id: z.string().nullish(), + bound_agent_id: z.string().nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + deleted_tools: z.array(zDeletedTool).optional(), + description: z.string().nullish(), + enable_api: z.boolean(), + enable_site: z.boolean(), + icon: z.string().nullish(), + icon_background: z.string().nullish(), + icon_type: z.string().nullish(), + id: z.string(), + maintainer: z.string().nullish(), + max_active_requests: z.int().nullish(), + mode: z.string(), + model_config: zModelConfig.nullish(), + name: z.string(), + permission_keys: z.array(z.string()).optional(), + site: zSiteWritable.nullish(), + tags: z.array(zTag).optional(), + tracing: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + use_icon_as_answer_icon: z.boolean().nullish(), + workflow: zWorkflowPartial.nullish(), +}) + export const zGetTrialAppsByAppIdPath = z.object({ app_id: z.uuid(), }) /** - * Success + * App detail retrieved successfully */ -export const zGetTrialAppsByAppIdResponse = zTrialAppDetailWithSite +export const zGetTrialAppsByAppIdResponse = zAppDetailWithSite export const zPostTrialAppsByAppIdAudioToTextPath = z.object({ app_id: z.uuid(), @@ -454,28 +493,6 @@ export const zPostTrialAppsByAppIdAudioToTextPath = z.object({ */ export const zPostTrialAppsByAppIdAudioToTextResponse = zAudioTranscriptResponse -export const zPostTrialAppsByAppIdChatMessagesBody = zChatRequest - -export const zPostTrialAppsByAppIdChatMessagesPath = z.object({ - app_id: z.uuid(), -}) - -/** - * Success - */ -export const zPostTrialAppsByAppIdChatMessagesResponse = zGeneratedAppResponse - -export const zPostTrialAppsByAppIdCompletionMessagesBody = zCompletionRequest - -export const zPostTrialAppsByAppIdCompletionMessagesPath = z.object({ - app_id: z.uuid(), -}) - -/** - * Success - */ -export const zPostTrialAppsByAppIdCompletionMessagesResponse = zGeneratedAppResponse - export const zGetTrialAppsByAppIdDatasetsPath = z.object({ app_id: z.uuid(), }) @@ -489,7 +506,7 @@ export const zGetTrialAppsByAppIdDatasetsQuery = z.object({ /** * Success */ -export const zGetTrialAppsByAppIdDatasetsResponse = zTrialDatasetList +export const zGetTrialAppsByAppIdDatasetsResponse = zTrialDatasetListResponse export const zGetTrialAppsByAppIdMessagesByMessageIdSuggestedQuestionsPath = z.object({ app_id: z.uuid(), @@ -536,20 +553,9 @@ export const zGetTrialAppsByAppIdWorkflowsPath = z.object({ }) /** - * Success + * Workflow detail retrieved successfully */ -export const zGetTrialAppsByAppIdWorkflowsResponse = zTrialWorkflow - -export const zPostTrialAppsByAppIdWorkflowsRunBody = zWorkflowRunRequest - -export const zPostTrialAppsByAppIdWorkflowsRunPath = z.object({ - app_id: z.uuid(), -}) - -/** - * Success - */ -export const zPostTrialAppsByAppIdWorkflowsRunResponse = zGeneratedAppResponse +export const zGetTrialAppsByAppIdWorkflowsResponse = zWorkflowResponse export const zPostTrialAppsByAppIdWorkflowsTasksByTaskIdStopPath = z.object({ app_id: z.uuid(), diff --git a/packages/contracts/generated/api/console/workflow-generate/types.gen.ts b/packages/contracts/generated/api/console/workflow-generate/types.gen.ts index 7f67a572cb5..7fdaec5f4f0 100644 --- a/packages/contracts/generated/api/console/workflow-generate/types.gen.ts +++ b/packages/contracts/generated/api/console/workflow-generate/types.gen.ts @@ -17,15 +17,42 @@ export type WorkflowGeneratePayload = { export type GeneratorResponse = unknown export type ModelConfig = { - completion_params?: { - [key: string]: unknown - } - mode: LlmMode - name: string - provider: string + agent_mode?: JsonValue | null + annotation_reply?: JsonValue | null + chat_prompt_config?: JsonValue | null + completion_prompt_config?: JsonValue | null + created_at?: number | null + created_by?: string | null + dataset_configs?: JsonValue | null + dataset_query_variable?: string | null + external_data_tools?: JsonValue | null + file_upload?: JsonValue | null + model?: JsonValue | null + more_like_this?: JsonValue | null + opening_statement?: string | null + pre_prompt?: string | null + prompt_type?: string | null + retriever_resource?: JsonValue | null + sensitive_word_avoidance?: JsonValue | null + speech_to_text?: JsonValue | null + suggested_questions?: JsonValue | null + suggested_questions_after_answer?: JsonValue | null + text_to_speech?: JsonValue | null + updated_at?: number | null + updated_by?: string | null + user_input_form?: JsonValue | null } -export type LlmMode = 'chat' | 'completion' +export type JsonValue + = | string + | number + | number + | boolean + | { + [key: string]: unknown + } + | Array + | null export type PostWorkflowGenerateData = { body: WorkflowGeneratePayload diff --git a/packages/contracts/generated/api/console/workflow-generate/zod.gen.ts b/packages/contracts/generated/api/console/workflow-generate/zod.gen.ts index c57f0e31412..3076374073e 100644 --- a/packages/contracts/generated/api/console/workflow-generate/zod.gen.ts +++ b/packages/contracts/generated/api/console/workflow-generate/zod.gen.ts @@ -7,21 +7,45 @@ import * as z from 'zod' */ export const zGeneratorResponse = z.unknown() -/** - * LLMMode - * - * Enum class for large language model mode. - */ -export const zLlmMode = z.enum(['chat', 'completion']) +export const zJsonValue = z + .union([ + z.string(), + z.int(), + z.number(), + z.boolean(), + z.record(z.string(), z.unknown()), + z.array(z.unknown()), + ]) + .nullable() /** * ModelConfig */ export const zModelConfig = z.object({ - completion_params: z.record(z.string(), z.unknown()).optional(), - mode: zLlmMode, - name: z.string(), - provider: z.string(), + agent_mode: zJsonValue.nullish(), + annotation_reply: zJsonValue.nullish(), + chat_prompt_config: zJsonValue.nullish(), + completion_prompt_config: zJsonValue.nullish(), + created_at: z.int().nullish(), + created_by: z.string().nullish(), + dataset_configs: zJsonValue.nullish(), + dataset_query_variable: z.string().nullish(), + external_data_tools: zJsonValue.nullish(), + file_upload: zJsonValue.nullish(), + model: zJsonValue.nullish(), + more_like_this: zJsonValue.nullish(), + opening_statement: z.string().nullish(), + pre_prompt: z.string().nullish(), + prompt_type: z.string().nullish(), + retriever_resource: zJsonValue.nullish(), + sensitive_word_avoidance: zJsonValue.nullish(), + speech_to_text: zJsonValue.nullish(), + suggested_questions: zJsonValue.nullish(), + suggested_questions_after_answer: zJsonValue.nullish(), + text_to_speech: zJsonValue.nullish(), + updated_at: z.int().nullish(), + updated_by: z.string().nullish(), + user_input_form: zJsonValue.nullish(), }) /** diff --git a/packages/contracts/generated/api/service/orpc.gen.ts b/packages/contracts/generated/api/service/orpc.gen.ts index b15cc78d07d..7ed6b163dd9 100644 --- a/packages/contracts/generated/api/service/orpc.gen.ts +++ b/packages/contracts/generated/api/service/orpc.gen.ts @@ -1474,16 +1474,20 @@ export const get13 = oc .output(zGetDatasetsByDatasetIdDocumentsByDocumentIdResponse) /** - * Update an existing document by uploading a file + * Update Document by File + * + * Update an existing document by uploading a new file. Re-triggers indexing — use the returned `batch` ID with [Get Document Indexing Status](/api-reference/documents/get-document-indexing-status) to track progress. */ export const patch4 = oc .route({ - description: 'Update an existing document by uploading a file', + description: + 'Update an existing document by uploading a new file. Re-triggers indexing — use the returned `batch` ID with [Get Document Indexing Status](/api-reference/documents/get-document-indexing-status) to track progress.', inputStructure: 'detailed', method: 'PATCH', operationId: 'patchDatasetsByDatasetIdDocumentsByDocumentId', path: '/datasets/{dataset_id}/documents/{document_id}', - tags: ['service_api'], + summary: 'Update Document by File', + tags: ['Documents'], }) .input( z.object({ diff --git a/packages/contracts/generated/api/service/types.gen.ts b/packages/contracts/generated/api/service/types.gen.ts index 97921643514..7fd65aadd9b 100644 --- a/packages/contracts/generated/api/service/types.gen.ts +++ b/packages/contracts/generated/api/service/types.gen.ts @@ -592,40 +592,45 @@ export type DocumentBatchDownloadZipPayload = { } export type DocumentDetailResponse = { - archived?: boolean | null - average_segment_length?: number | null + archived?: boolean + average_segment_length?: number | number completed_at?: number | null - created_at?: number | null - created_by?: string | null - created_from?: string | null + created_at?: number + created_by?: string + created_from?: string data_source_info?: { [key: string]: unknown - } | null - data_source_type?: string | null + } + data_source_type?: string dataset_process_rule?: { [key: string]: unknown - } | null + } dataset_process_rule_id?: string | null disabled_at?: number | null disabled_by?: string | null display_status?: string | null - doc_form?: string | null + doc_form?: string doc_language?: string | null - doc_metadata?: Array | null + doc_metadata?: + | Array + | { + [key: string]: unknown + } + | null doc_type?: string | null document_process_rule?: { [key: string]: unknown - } | null - enabled?: boolean | null + } + enabled?: boolean error?: string | null - hit_count?: number | null + hit_count?: number id: string indexing_latency?: number | null - indexing_status?: string | null - name?: string | null - need_summary?: boolean | null - position?: number | null - segment_count?: number | null + indexing_status?: string + name?: string + need_summary?: boolean + position?: number + segment_count?: number summary_index_status?: string | null tokens?: number | null updated_at?: number | null @@ -2624,6 +2629,7 @@ export type PatchDatasetsByDatasetIdDocumentsByDocumentIdData = { } export type PatchDatasetsByDatasetIdDocumentsByDocumentIdErrors = { + 400: unknown 401: unknown 403: unknown 404: unknown diff --git a/packages/contracts/generated/api/service/zod.gen.ts b/packages/contracts/generated/api/service/zod.gen.ts index 6ccc5671cb2..e475038cf26 100644 --- a/packages/contracts/generated/api/service/zod.gen.ts +++ b/packages/contracts/generated/api/service/zod.gen.ts @@ -761,34 +761,36 @@ export const zDocumentMetadataResponse = z.object({ * DocumentDetailResponse */ export const zDocumentDetailResponse = z.object({ - archived: z.boolean().nullish(), - average_segment_length: z.number().nullish(), + archived: z.boolean().optional(), + average_segment_length: z.union([z.int(), z.number()]).optional(), completed_at: z.int().nullish(), - created_at: z.int().nullish(), - created_by: z.string().nullish(), - created_from: z.string().nullish(), - data_source_info: z.record(z.string(), z.unknown()).nullish(), - data_source_type: z.string().nullish(), - dataset_process_rule: z.record(z.string(), z.unknown()).nullish(), + created_at: z.int().optional(), + created_by: z.string().optional(), + created_from: z.string().optional(), + data_source_info: z.record(z.string(), z.unknown()).optional(), + data_source_type: z.string().optional(), + dataset_process_rule: z.record(z.string(), z.unknown()).optional(), dataset_process_rule_id: z.string().nullish(), disabled_at: z.int().nullish(), disabled_by: z.string().nullish(), display_status: z.string().nullish(), - doc_form: z.string().nullish(), + doc_form: z.string().optional(), doc_language: z.string().nullish(), - doc_metadata: z.array(zDocumentMetadataResponse).nullish(), + doc_metadata: z + .union([z.array(zDocumentMetadataResponse), z.record(z.string(), z.unknown())]) + .nullish(), doc_type: z.string().nullish(), - document_process_rule: z.record(z.string(), z.unknown()).nullish(), - enabled: z.boolean().nullish(), + document_process_rule: z.record(z.string(), z.unknown()).optional(), + enabled: z.boolean().optional(), error: z.string().nullish(), - hit_count: z.int().nullish(), + hit_count: z.int().optional(), id: z.string(), indexing_latency: z.number().nullish(), - indexing_status: z.string().nullish(), - name: z.string().nullish(), - need_summary: z.boolean().nullish(), - position: z.int().nullish(), - segment_count: z.int().nullish(), + indexing_status: z.string().optional(), + name: z.string().optional(), + need_summary: z.boolean().optional(), + position: z.int().optional(), + segment_count: z.int().optional(), summary_index_status: z.string().nullish(), tokens: z.int().nullish(), updated_at: z.int().nullish(), @@ -2756,7 +2758,7 @@ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdPath = z.object({ }) /** - * Document updated successfully + * Document updated successfully. */ export const zPatchDatasetsByDatasetIdDocumentsByDocumentIdResponse = zDocumentAndBatchResponse diff --git a/packages/contracts/openapi-ts.api.config.ts b/packages/contracts/openapi-ts.api.config.ts index 8fce8a25bd3..1adbf4fda8e 100644 --- a/packages/contracts/openapi-ts.api.config.ts +++ b/packages/contracts/openapi-ts.api.config.ts @@ -10,13 +10,21 @@ type SwaggerSchema = JsonObject & { $ref?: string } +type OpenApiMediaType = JsonObject & { + schema?: SwaggerSchema +} + +type OpenApiResponse = JsonObject & { + content?: Record +} + type OpenApiComponents = JsonObject & { schemas?: Record } type SwaggerOperation = JsonObject & { operationId?: string - responses?: Record + responses?: Record } type SwaggerDocument = JsonObject & { @@ -52,6 +60,17 @@ const currentDir = path.dirname(fileURLToPath(import.meta.url)) const apiOpenApiDir = path.resolve(currentDir, 'openapi') const operationMethods = new Set(['delete', 'get', 'patch', 'post', 'put']) +const pydanticDecimalStringPattern = '^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$' +const codegenSafeDecimalStringPattern = '^(?![-+.]*$)[+-]?0*\\d*\\.?\\d*$' + +const opaqueJsonContent = (): Record => ({ + 'application/json': { + schema: { + additionalProperties: true, + type: 'object', + }, + }, +}) const apiSpecs: ApiSpec[] = [ { filename: 'console-openapi.json', name: 'console' }, @@ -182,6 +201,46 @@ const addOperationIds = (document: SwaggerDocument) => { } } +const isOpaqueContractResponse = (response: OpenApiResponse) => { + const content = response.content + if (!isObject(content)) + return false + + return Object.entries(content).some(([mediaType, media]) => { + if (!isObject(media)) + return false + + return (mediaType === 'application/json' || mediaType === 'text/event-stream') && !('schema' in media) + }) +} + +const hasOpaqueContractSuccessResponse = (operation: SwaggerOperation) => { + return Object.entries(operation.responses ?? {}).some(([status, response]) => { + return /^2\d\d$/.test(status) && isObject(response) && isOpaqueContractResponse(response) + }) +} + +const normalizeOpaqueContractResponses = (document: SwaggerDocument) => { + // Some backend endpoints has no schema (e.g. external) and will trap heyapi here + // So we forge an opaque schema here + for (const pathItem of Object.values(document.paths ?? {})) { + for (const [method, operation] of Object.entries(pathItem)) { + if (!operationMethods.has(method) || !isObject(operation)) + continue + + const swaggerOperation = operation as SwaggerOperation + if (!hasOpaqueContractSuccessResponse(swaggerOperation)) + continue + + Object.values(swaggerOperation.responses ?? {}) + .filter(response => isObject(response) && isOpaqueContractResponse(response)) + .forEach((response) => { + response.content = opaqueJsonContent() + }) + } + } +} + const hasSuccessResponse = (operation: SwaggerOperation) => { return Object.entries(operation.responses ?? {}).some(([status, response]) => { if (!/^2\d\d$/.test(status)) @@ -215,6 +274,7 @@ const filterContractOperations = (document: SwaggerDocument) => { } const normalizeApiSwagger = (document: SwaggerDocument) => { + normalizeOpaqueContractResponses(document) filterContractOperations(document) addOperationIds(document) @@ -380,10 +440,20 @@ const createApiConfig = (job: ApiJob): UserConfig => ({ 'name': 'zod', '~resolvers': { string: (ctx) => { - if (ctx.schema.format !== 'binary') - return undefined + if (ctx.schema.format === 'binary') + return $(ctx.symbols.z).attr('custom').call().generic($.type.or($.type('Blob'), $.type('File'))) - return $(ctx.symbols.z).attr('custom').call().generic($.type.or($.type('Blob'), $.type('File'))) + if (ctx.schema.pattern === pydanticDecimalStringPattern) { + // the pydantic generated regex will emit error like + // regexp/no-useless-assertions, so patch the regex here + return $(ctx.symbols.z) + .attr('string') + .call() + .attr('regex') + .call($.regexp(codegenSafeDecimalStringPattern)) + } + + return undefined }, }, },