From c52eafe2cac7f36c72804ad5d05b57ed6377b59a Mon Sep 17 00:00:00 2001 From: Stephen Zhou Date: Thu, 18 Jun 2026 16:43:39 +0800 Subject: [PATCH] docs: enrich generated service API descriptions (#37615) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- api/controllers/common/controller_schemas.py | 121 ++- api/controllers/common/human_input.py | 7 +- .../console/datasets/hit_testing_base.py | 19 +- api/controllers/service_api/app/annotation.py | 37 +- api/controllers/service_api/app/audio.py | 11 +- api/controllers/service_api/app/completion.py | 106 ++- .../service_api/app/conversation.py | 32 +- .../service_api/app/file_preview.py | 14 +- api/controllers/service_api/app/message.py | 6 +- api/controllers/service_api/app/workflow.py | 62 +- .../service_api/app/workflow_events.py | 24 +- .../service_api/dataset/dataset.py | 188 ++++- .../service_api/dataset/document.py | 150 +++- .../service_api/dataset/hit_testing.py | 2 +- .../service_api/dataset/metadata.py | 15 +- .../rag_pipeline/rag_pipeline_workflow.py | 42 +- .../service_api/dataset/segment.py | 37 +- api/controllers/service_api/schema.py | 70 +- .../service_api/workspace/models.py | 8 +- api/core/rag/entities/metadata_entities.py | 48 +- api/core/rag/entities/processing_entities.py | 42 +- api/libs/flask_restx_compat.py | 31 + api/openapi/markdown/console-openapi.md | 162 ++-- api/openapi/markdown/openapi-openapi.md | 2 +- api/openapi/markdown/service-openapi.md | 782 +++++++++--------- api/openapi/markdown/web-openapi.md | 32 +- .../knowledge_entities/knowledge_entities.py | 257 ++++-- .../entity/pipeline_service_api_entities.py | 138 +++- .../unit_tests/controllers/test_swagger.py | 68 +- .../api/console/datasets/types.gen.ts | 17 +- .../generated/api/console/datasets/zod.gen.ts | 28 +- .../api/console/installed-apps/types.gen.ts | 5 +- .../api/console/installed-apps/zod.gen.ts | 13 +- .../generated/api/service/types.gen.ts | 123 ++- .../generated/api/service/zod.gen.ts | 193 ++++- .../contracts/generated/api/web/types.gen.ts | 5 +- .../contracts/generated/api/web/zod.gen.ts | 13 +- 37 files changed, 1992 insertions(+), 918 deletions(-) diff --git a/api/controllers/common/controller_schemas.py b/api/controllers/common/controller_schemas.py index b47a1a013a0..d35fd9fe1b3 100644 --- a/api/controllers/common/controller_schemas.py +++ b/api/controllers/common/controller_schemas.py @@ -1,8 +1,8 @@ from copy import deepcopy -from typing import Any, Literal, override +from typing import Annotated, Any, Literal, override from uuid import UUID -from pydantic import BaseModel, Field, GetJsonSchemaHandler, model_validator +from pydantic import BaseModel, Field, GetJsonSchemaHandler, WithJsonSchema, model_validator from libs.helper import UUIDStrOrEmpty @@ -10,8 +10,14 @@ from libs.helper import UUIDStrOrEmpty class ConversationRenamePayload(BaseModel): - name: str | None = None - auto_generate: bool = False + name: str | None = Field( + default=None, + description="Conversation name. Required when `auto_generate` is `false`.", + ) + auto_generate: bool = Field( + default=False, + description="Automatically generate the conversation name. When `true`, the `name` field is ignored.", + ) @classmethod @override @@ -64,14 +70,28 @@ class ConversationRenamePayload(BaseModel): class MessageListQuery(BaseModel): - conversation_id: UUIDStrOrEmpty = Field(description="Conversation UUID") - first_id: UUIDStrOrEmpty | None = Field(default=None, description="First message ID for pagination") - limit: int = Field(default=20, ge=1, le=100, description="Number of messages to return (1-100)") + conversation_id: UUIDStrOrEmpty = Field(description="Conversation ID.") + first_id: UUIDStrOrEmpty | None = Field( + default=None, + description=( + "The ID of the first chat record on the current page. Omit this value to fetch the latest messages; " + "for subsequent pages, use the first message ID from the current list to fetch older messages." + ), + ) + limit: int = Field( + default=20, + ge=1, + le=100, + description="Number of chat history messages to return per request.", + ) class MessageFeedbackPayload(BaseModel): - rating: Literal["like", "dislike"] | None = None - content: str | None = None + rating: Literal["like", "dislike"] | None = Field( + default=None, + description="Feedback rating. Set to `null` to revoke previously submitted feedback.", + ) + content: str | None = Field(default=None, description="Optional text feedback providing additional detail.") # --- Saved message schemas --- @@ -88,6 +108,39 @@ class SavedMessageCreatePayload(BaseModel): # --- Workflow schemas --- +WORKFLOW_INPUT_FILE_ITEM_SCHEMA: dict[str, object] = { + "type": "object", + "required": ["type", "transfer_method"], + "properties": { + "type": { + "description": "File type.", + "enum": ["document", "image", "audio", "video", "custom"], + "type": "string", + }, + "transfer_method": { + "description": "Transfer method: `remote_url` for file URL, `local_file` for uploaded file.", + "enum": ["remote_url", "local_file"], + "type": "string", + }, + "url": { + "description": "File URL when `transfer_method` is `remote_url`.", + "format": "url", + "type": "string", + }, + "upload_file_id": { + "description": ( + "Uploaded file ID obtained from the [Upload File](/api-reference/files/upload-file) API when " + "`transfer_method` is `local_file`." + ), + "type": "string", + }, + }, +} +WORKFLOW_INPUT_FILE_LIST_SCHEMA: dict[str, object] = { + "anyOf": [{"items": WORKFLOW_INPUT_FILE_ITEM_SCHEMA, "type": "array"}, {"type": "null"}] +} +WorkflowInputFileList = Annotated[list[dict[str, Any]] | None, WithJsonSchema(WORKFLOW_INPUT_FILE_LIST_SCHEMA)] + class DefaultBlockConfigQuery(BaseModel): q: str | None = None @@ -101,8 +154,22 @@ class WorkflowListQuery(BaseModel): class WorkflowRunPayload(BaseModel): - inputs: dict[str, Any] - files: list[dict[str, Any]] | None = Field(default=None) + inputs: dict[str, Any] = Field( + description=( + "Key-value pairs for workflow input variables. Values for file-type variables should be arrays of " + "file objects with `type`, `transfer_method`, and either `url` or `upload_file_id`. Refer to the " + "`user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) " + "response to discover the variable names and types expected by your app." + ) + ) + files: WorkflowInputFileList = Field( + default=None, + description=( + "File list for workflow system file inputs. Available when file upload is enabled for the workflow. " + "To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use " + "the returned `id` as `upload_file_id` with `transfer_method: local_file`." + ), + ) class WorkflowUpdatePayload(BaseModel): @@ -117,30 +184,48 @@ DOCUMENT_BATCH_DOWNLOAD_ZIP_MAX_DOCS = 100 class ChildChunkCreatePayload(BaseModel): - content: str + content: str = Field(description="Child chunk text content.") class ChildChunkUpdatePayload(BaseModel): - content: str + content: str = Field(description="Child chunk text content.") class DocumentBatchDownloadZipPayload(BaseModel): """Request payload for bulk downloading documents as a zip archive.""" - document_ids: list[UUID] = Field(..., min_length=1, max_length=DOCUMENT_BATCH_DOWNLOAD_ZIP_MAX_DOCS) + document_ids: list[UUID] = Field( + ..., + min_length=1, + max_length=DOCUMENT_BATCH_DOWNLOAD_ZIP_MAX_DOCS, + description="List of document IDs to include in the ZIP download.", + ) class MetadataUpdatePayload(BaseModel): - name: str + name: str = Field(description="New metadata field name.") # --- Audio schemas --- +UUIDString = Annotated[str, WithJsonSchema({"format": "uuid", "type": "string"})] + + class TextToAudioPayload(BaseModel): - message_id: str | None = Field(default=None, description="Message ID") - voice: str | None = Field(default=None, description="Voice to use for TTS") - text: str | None = Field(default=None, description="Text to convert to audio") + message_id: UUIDString | None = Field( + default=None, + description="Message ID. Takes priority over `text` when both are provided.", + ) + voice: str | None = Field( + default=None, + description=( + "Voice to use for text-to-speech. Available voices depend on the TTS provider configured for this app. " + "Omit to use the app's configured voice when available; that value is exposed by " + "[Get App Parameters](/api-reference/applications/get-app-parameters) as `text_to_speech.voice`." + ), + ) + text: str | None = Field(default=None, description="Speech content to convert.") streaming: bool | None = Field( default=None, description="Reserved for compatibility; TTS response streaming is determined by the provider output.", diff --git a/api/controllers/common/human_input.py b/api/controllers/common/human_input.py index 4b2e70e2d03..3bb2684df43 100644 --- a/api/controllers/common/human_input.py +++ b/api/controllers/common/human_input.py @@ -35,7 +35,12 @@ class HumanInputFormSubmitPayload(BaseModel): ), examples=[HUMAN_INPUT_FORM_INPUT_EXAMPLE], ) - action: str + action: str = Field( + description=( + "ID of the action button the recipient selected. Must match one of the `id` values from the form's " + "`user_actions` list." + ) + ) def stringify_form_default_values(values: dict[str, object]) -> dict[str, str]: diff --git a/api/controllers/console/datasets/hit_testing_base.py b/api/controllers/console/datasets/hit_testing_base.py index e3efe804872..4e90e66eb25 100644 --- a/api/controllers/console/datasets/hit_testing_base.py +++ b/api/controllers/console/datasets/hit_testing_base.py @@ -23,17 +23,26 @@ from libs.login import resolve_account_fallback from models.account import Account from models.dataset import Dataset from services.dataset_service import DatasetService -from services.entities.knowledge_entities.knowledge_entities import RetrievalModel +from services.entities.knowledge_entities.knowledge_entities import ExternalRetrievalModel, RetrievalModel from services.hit_testing_service import HitTestingService logger = logging.getLogger(__name__) class HitTestingPayload(BaseModel): - query: str = Field(max_length=250) - retrieval_model: RetrievalModel | None = None - external_retrieval_model: dict[str, Any] | None = Field(default=None) - attachment_ids: list[str] | None = None + query: str = Field(description="Search query text.", max_length=250) + retrieval_model: RetrievalModel | None = Field( + default=None, + description="Retrieval model configuration. Controls how chunks are searched and ranked.", + ) + external_retrieval_model: ExternalRetrievalModel = Field( + default=None, + description="Retrieval settings for external knowledge bases.", + ) + attachment_ids: list[str] | None = Field( + default=None, + description="List of attachment IDs to include in the retrieval context.", + ) class DatasetsHitTestingBase: diff --git a/api/controllers/service_api/app/annotation.py b/api/controllers/service_api/app/annotation.py index d2522c4a41b..627545d7168 100644 --- a/api/controllers/service_api/app/annotation.py +++ b/api/controllers/service_api/app/annotation.py @@ -23,20 +23,25 @@ from services.annotation_service import ( class AnnotationCreatePayload(BaseModel): - question: str = Field(description="Annotation question") - answer: str = Field(description="Annotation answer") + question: str = Field(description="Annotation question.") + answer: str = Field(description="Annotation answer.") class AnnotationReplyActionPayload(BaseModel): - score_threshold: float = Field(description="Score threshold for annotation matching") - embedding_provider_name: str = Field(description="Embedding provider name") - embedding_model_name: str = Field(description="Embedding model name") + score_threshold: float = Field( + description=( + "Minimum similarity score for an annotation to be considered a match. Higher values require closer matches." + ), + json_schema_extra={"format": "float"}, + ) + embedding_provider_name: str = Field(description="Name of the embedding model provider.") + embedding_model_name: str = Field(description="Name of the embedding model to use for annotation matching.") class AnnotationListQuery(BaseModel): - page: int = Field(default=1, ge=1, description="Page number") - limit: int = Field(default=20, ge=1, description="Number of annotations per page") - keyword: str = Field(default="", description="Keyword to search annotations") + page: int = Field(default=1, ge=1, description="Page number for pagination.") + limit: int = Field(default=20, ge=1, description="Number of items per page.") + keyword: str = Field(default="", description="Keyword to filter annotations by question or answer content.") class AnnotationJobStatusResponse(ResponseModel): @@ -46,7 +51,7 @@ class AnnotationJobStatusResponse(ResponseModel): ANNOTATION_REPLY_ACTION_PARAM = { - "description": "Action to perform: 'enable' or 'disable'", + "description": "Action to perform: `enable` or `disable`.", "enum": ["enable", "disable"], "type": "string", } @@ -125,7 +130,15 @@ class AnnotationReplyActionStatusApi(Resource): ) @service_api_ns.doc("get_annotation_reply_action_status") @service_api_ns.doc(description="Get the status of an annotation reply action job") - @service_api_ns.doc(params={"action": "Action type", "job_id": "Job ID"}) + @service_api_ns.doc( + params={ + "action": ANNOTATION_REPLY_ACTION_PARAM, + "job_id": ( + "Job ID returned by " + "[Configure Annotation Reply](/api-reference/annotations/configure-annotation-reply)." + ), + } + ) @service_api_ns.doc( responses={ 200: "Job status retrieved successfully", @@ -248,7 +261,7 @@ class AnnotationUpdateDeleteApi(Resource): @service_api_ns.expect(service_api_ns.models[AnnotationCreatePayload.__name__]) @service_api_ns.doc("update_annotation") @service_api_ns.doc(description="Update an existing annotation") - @service_api_ns.doc(params={"annotation_id": "Annotation ID"}) + @service_api_ns.doc(params={"annotation_id": "The unique identifier of the annotation to update."}) @service_api_ns.doc( responses={ 200: "Annotation updated successfully", @@ -284,7 +297,7 @@ class AnnotationUpdateDeleteApi(Resource): ) @service_api_ns.doc("delete_annotation") @service_api_ns.doc(description="Delete an annotation") - @service_api_ns.doc(params={"annotation_id": "Annotation ID"}) + @service_api_ns.doc(params={"annotation_id": "The unique identifier of the annotation to delete."}) @service_api_ns.doc( responses={ 204: "Annotation deleted successfully", diff --git a/api/controllers/service_api/app/audio.py b/api/controllers/service_api/app/audio.py index 4bff8e66150..2b5a9ba83a1 100644 --- a/api/controllers/service_api/app/audio.py +++ b/api/controllers/service_api/app/audio.py @@ -64,7 +64,16 @@ class AudioApi(Resource): ) @service_api_ns.doc("audio_to_text") @service_api_ns.doc(description="Convert audio to text using speech-to-text") - @service_api_ns.doc(consumes=["multipart/form-data"], params=multipart_file_params(include_user=True)) + @service_api_ns.doc( + consumes=["multipart/form-data"], + params=multipart_file_params( + include_user=True, + file_description=( + "Audio file to transcribe. Supported MIME types: `audio/mp3`, `audio/mpga`, `audio/m4a`, " + "`audio/wav`, and `audio/amr`. File size limit is `30 MB`." + ), + ), + ) @service_api_ns.doc( responses={ 200: "Audio successfully transcribed", diff --git a/api/controllers/service_api/app/completion.py b/api/controllers/service_api/app/completion.py index 99e7aaecd8c..1468f3d776f 100644 --- a/api/controllers/service_api/app/completion.py +++ b/api/controllers/service_api/app/completion.py @@ -5,6 +5,7 @@ from uuid import UUID from flask import request from flask_restx import Resource from pydantic import BaseModel, Field, field_validator +from pydantic.json_schema import SkipJsonSchema from werkzeug.exceptions import BadRequest, InternalServerError, NotFound import services @@ -20,7 +21,12 @@ from controllers.service_api.app.error import ( ProviderNotInitializeError, ProviderQuotaExceededError, ) -from controllers.service_api.schema import expect_user_json, expect_with_user, json_or_event_stream_response +from controllers.service_api.schema import ( + InputFileList, + expect_user_json, + expect_with_user, + json_or_event_stream_response, +) from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError from core.app.entities.app_invoke_entities import InvokeFrom @@ -52,24 +58,84 @@ def _resolve_agent_app_streaming(*, app_mode: AppMode, response_mode: str | None class CompletionRequestPayload(BaseModel): - inputs: dict[str, Any] - query: str = Field(default="") - files: list[dict[str, Any]] | None = Field(default=None) - response_mode: Literal["blocking", "streaming"] | None = None - retriever_from: str = Field(default="dev") - trace_session_id: str | None = Field(default=None, description="Trace session ID for observability grouping") + inputs: dict[str, Any] = Field( + description=( + "Values for app-defined variables. Refer to the `user_input_form` field in the " + "[Get App Parameters](/api-reference/applications/get-app-parameters) response to discover expected " + "variable names and types." + ) + ) + query: str = Field(default="", description="User input or prompt content.") + files: InputFileList = Field( + default=None, + description=( + "File list for multimodal understanding, including images, documents, audio, and video. To attach a " + "local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned " + "`id` as `upload_file_id` with `transfer_method: local_file`." + ), + ) + response_mode: Literal["blocking", "streaming"] | None = Field( + default=None, + description=( + "Response mode. `streaming` uses Server-Sent Events; `blocking` returns after completion. When omitted, " + "the request runs in blocking mode." + ), + ) + retriever_from: SkipJsonSchema[str] = Field(default="dev") + trace_session_id: SkipJsonSchema[str | None] = Field( + default=None, description="Trace session ID for observability grouping" + ) class ChatRequestPayload(BaseModel): - inputs: dict[str, Any] - query: str - files: list[dict[str, Any]] | None = Field(default=None) - response_mode: Literal["blocking", "streaming"] | None = None - conversation_id: UUIDStrOrEmpty | None = Field(default=None, description="Conversation UUID") - retriever_from: str = Field(default="dev") - auto_generate_name: bool = Field(default=True, description="Auto generate conversation name") - workflow_id: str | None = Field(default=None, description="Workflow ID for advanced chat") - trace_session_id: str | None = Field(default=None, description="Trace session ID for observability grouping") + inputs: dict[str, Any] = Field( + description=( + "Values for app-defined variables. Refer to the `user_input_form` field in the " + "[Get App Parameters](/api-reference/applications/get-app-parameters) response to discover expected " + "variable names and types." + ) + ) + query: str = Field(description="User input or question content.") + files: InputFileList = Field( + default=None, + description=( + "File list for multimodal understanding, including images, documents, audio, and video. To attach a " + "local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned " + "`id` as `upload_file_id` with `transfer_method: local_file`." + ), + ) + response_mode: Literal["blocking", "streaming"] | None = Field( + default=None, + description=( + "Response mode. `streaming` uses Server-Sent Events; `blocking` returns after completion. New Agent app " + "mode supports streaming only. When omitted, non-Agent apps run in blocking mode and new Agent apps stream." + ), + ) + conversation_id: UUIDStrOrEmpty | None = Field( + default=None, + description=( + "Conversation ID to continue a conversation. Omit this field or pass an empty string to start a new " + "conversation, then pass the returned `conversation_id` in subsequent requests." + ), + ) + retriever_from: SkipJsonSchema[str] = Field(default="dev") + auto_generate_name: bool = Field( + default=True, + description=( + "Auto-generate the conversation title. If `false`, use the Rename Conversation API with " + "`auto_generate: true` to generate the title asynchronously." + ), + ) + workflow_id: str | None = Field( + default=None, + description=( + "Published workflow version ID to execute for advanced chat. If omitted, the app's current published " + "workflow is used." + ), + ) + trace_session_id: SkipJsonSchema[str | None] = Field( + default=None, description="Trace session ID for observability grouping" + ) @field_validator("conversation_id", mode="before") @classmethod @@ -206,7 +272,9 @@ class CompletionStopApi(Resource): @expect_user_json(service_api_ns) @service_api_ns.doc("stop_completion") @service_api_ns.doc(description="Stop a running completion task") - @service_api_ns.doc(params={"task_id": "The ID of the task to stop"}) + @service_api_ns.doc( + params={"task_id": ("Task ID, obtained from a streaming chunk returned by the Send Completion Message API.")} + ) @service_api_ns.doc( responses={ 200: "Task stopped successfully", @@ -355,7 +423,9 @@ class ChatStopApi(Resource): @expect_user_json(service_api_ns) @service_api_ns.doc("stop_chat_message") @service_api_ns.doc(description="Stop a running chat message generation") - @service_api_ns.doc(params={"task_id": "The ID of the task to stop"}) + @service_api_ns.doc( + params={"task_id": "Task ID, obtained from a streaming chunk returned by the Send Chat Message API."} + ) @service_api_ns.doc( responses={ 200: "Task stopped successfully", diff --git a/api/controllers/service_api/app/conversation.py b/api/controllers/service_api/app/conversation.py index a208c8fee49..9b5533ea07a 100644 --- a/api/controllers/service_api/app/conversation.py +++ b/api/controllers/service_api/app/conversation.py @@ -30,18 +30,28 @@ from services.conversation_service import ConversationService class ConversationListQuery(BaseModel): - last_id: UUIDStrOrEmpty | None = Field(default=None, description="Last conversation ID for pagination") - limit: int = Field(default=20, ge=1, le=100, description="Number of conversations to return") + last_id: UUIDStrOrEmpty | None = Field( + default=None, + description="The ID of the last record on the current page. Used to fetch the next page.", + ) + limit: int = Field(default=20, ge=1, le=100, description="Number of records to return.") sort_by: Literal["created_at", "-created_at", "updated_at", "-updated_at"] = Field( - default="-updated_at", description="Sort order for conversations" + default="-updated_at", + description="Sorting field. Use the `-` prefix for descending order.", ) class ConversationVariablesQuery(BaseModel): - last_id: UUIDStrOrEmpty | None = Field(default=None, description="Last variable ID for pagination") - limit: int = Field(default=20, ge=1, le=100, description="Number of variables to return") + last_id: UUIDStrOrEmpty | None = Field( + default=None, + description="The ID of the last record on the current page. Used to fetch the next page.", + ) + limit: int = Field(default=20, ge=1, le=100, description="Number of records to return.") variable_name: str | None = Field( - default=None, description="Filter variables by name", min_length=1, max_length=255 + default=None, + description="Filter variables by a specific name.", + min_length=1, + max_length=255, ) @field_validator("variable_name", mode="before") @@ -69,7 +79,7 @@ class ConversationVariablesQuery(BaseModel): class ConversationVariableUpdatePayload(BaseModel): - value: Any + value: Any = Field(description="The new value for the variable. Must match the variable's expected type.") class ConversationVariableResponse(ResponseModel): @@ -221,7 +231,7 @@ class ConversationDetailApi(Resource): @expect_user_json(service_api_ns) @service_api_ns.doc("delete_conversation") @service_api_ns.doc(description="Delete a specific conversation") - @service_api_ns.doc(params={"c_id": "Conversation ID"}) + @service_api_ns.doc(params={"c_id": "Conversation ID."}) @service_api_ns.doc( responses={ 204: "Conversation deleted successfully", @@ -263,7 +273,7 @@ class ConversationRenameApi(Resource): @expect_with_user(service_api_ns, ConversationRenamePayload) @service_api_ns.doc("rename_conversation") @service_api_ns.doc(description="Rename a conversation or auto-generate a name") - @service_api_ns.doc(params={"c_id": "Conversation ID"}) + @service_api_ns.doc(params={"c_id": "Conversation ID."}) @service_api_ns.doc( responses={ 200: "Conversation renamed successfully", @@ -315,7 +325,7 @@ class ConversationVariablesApi(Resource): @service_api_ns.doc(params=query_params_from_model(ConversationVariablesQuery)) @service_api_ns.doc("list_conversation_variables") @service_api_ns.doc(description="List all variables for a conversation") - @service_api_ns.doc(params={"c_id": "Conversation ID"}) + @service_api_ns.doc(params={"c_id": "Conversation ID."}) @service_api_ns.doc( responses={ 200: "Variables retrieved successfully", @@ -375,7 +385,7 @@ class ConversationVariableDetailApi(Resource): @expect_with_user(service_api_ns, ConversationVariableUpdatePayload) @service_api_ns.doc("update_conversation_variable") @service_api_ns.doc(description="Update a conversation variable's value") - @service_api_ns.doc(params={"c_id": "Conversation ID", "variable_id": "Variable ID"}) + @service_api_ns.doc(params={"c_id": "Conversation ID.", "variable_id": "Variable ID."}) @service_api_ns.doc( responses={ 200: "Variable updated successfully", diff --git a/api/controllers/service_api/app/file_preview.py b/api/controllers/service_api/app/file_preview.py index 0b7e057152b..3bd0db6eb54 100644 --- a/api/controllers/service_api/app/file_preview.py +++ b/api/controllers/service_api/app/file_preview.py @@ -25,7 +25,10 @@ logger = logging.getLogger(__name__) class FilePreviewQuery(BaseModel): - as_attachment: bool = Field(default=False, description="Download as attachment") + as_attachment: bool = Field( + default=False, + description="If `true`, forces the file to download as an attachment instead of previewing in browser.", + ) register_schema_model(service_api_ns, FilePreviewQuery) @@ -83,7 +86,14 @@ class FilePreviewApi(Resource): @binary_response(service_api_ns, FILE_PREVIEW_RESPONSE_MEDIA_TYPES) @service_api_ns.doc("preview_file") @service_api_ns.doc(description="Preview or download a file uploaded via Service API") - @service_api_ns.doc(params={"file_id": "UUID of the file to preview"}) + @service_api_ns.doc( + params={ + "file_id": ( + "The unique identifier of the file to preview, obtained from the " + "[Upload File](/api-reference/files/upload-file) API response." + ) + } + ) @service_api_ns.doc( responses={ 200: "File retrieved successfully", diff --git a/api/controllers/service_api/app/message.py b/api/controllers/service_api/app/message.py index a51daf7973d..18d1c5d3254 100644 --- a/api/controllers/service_api/app/message.py +++ b/api/controllers/service_api/app/message.py @@ -31,8 +31,8 @@ logger = logging.getLogger(__name__) class FeedbackListQuery(BaseModel): - page: int = Field(default=1, ge=1, description="Page number") - limit: int = Field(default=20, ge=1, le=101, description="Number of feedbacks per page") + page: int = Field(default=1, ge=1, description="Page number for pagination.") + limit: int = Field(default=20, ge=1, le=101, description="Number of records per page.") class AppFeedbackResponse(ResponseModel): @@ -142,7 +142,7 @@ class MessageFeedbackApi(Resource): @service_api_ns.response(200, "Feedback submitted successfully", service_api_ns.models[ResultResponse.__name__]) @service_api_ns.doc("create_message_feedback") @service_api_ns.doc(description="Submit feedback for a message") - @service_api_ns.doc(params={"message_id": "Message ID"}) + @service_api_ns.doc(params={"message_id": "Message ID."}) @service_api_ns.doc( responses={ 200: "Feedback submitted successfully", diff --git a/api/controllers/service_api/app/workflow.py b/api/controllers/service_api/app/workflow.py index 234488885de..091b79fefbd 100644 --- a/api/controllers/service_api/app/workflow.py +++ b/api/controllers/service_api/app/workflow.py @@ -7,6 +7,7 @@ from dateutil.parser import isoparse from flask import request from flask_restx import Resource, fields from pydantic import BaseModel, Field, field_validator +from pydantic.json_schema import SkipJsonSchema from sqlalchemy.orm import sessionmaker from werkzeug.exceptions import BadRequest, InternalServerError, NotFound @@ -58,19 +59,41 @@ logger = logging.getLogger(__name__) class WorkflowRunPayload(WorkflowRunPayloadBase): - response_mode: Literal["blocking", "streaming"] | None = None - trace_session_id: str | None = Field(default=None, description="Trace session ID for observability grouping") + response_mode: Literal["blocking", "streaming"] | None = Field( + default=None, + description=( + "Response mode. Use `blocking` for synchronous responses or `streaming` for Server-Sent Events. " + "When omitted, the request runs in blocking mode." + ), + ) + trace_session_id: SkipJsonSchema[str | None] = Field( + default=None, description="Trace session ID for observability grouping" + ) class WorkflowLogQuery(BaseModel): - keyword: str | None = None - status: Literal["succeeded", "failed", "stopped"] | None = None - created_at__before: str | None = None - created_at__after: str | None = None - created_by_end_user_session_id: str | None = None - created_by_account: str | None = None - page: int = Field(default=1, ge=1, le=99999) - limit: int = Field(default=20, ge=1, le=100) + keyword: str | None = Field(default=None, description="Keyword to search in logs.") + status: Literal["succeeded", "failed", "stopped"] | None = Field( + default=None, + description="Filter by execution status.", + ) + created_at__before: str | None = Field( + default=None, + description="Filter logs created before this ISO 8601 timestamp.", + json_schema_extra={"format": "date-time"}, + ) + created_at__after: str | None = Field( + default=None, + description="Filter logs created after this ISO 8601 timestamp.", + json_schema_extra={"format": "date-time"}, + ) + created_by_end_user_session_id: str | None = Field( + default=None, + description="Filter by end user session ID.", + ) + created_by_account: str | None = Field(default=None, description="Filter by account ID.") + page: int = Field(default=1, ge=1, le=99999, description="Page number for pagination.") + limit: int = Field(default=20, ge=1, le=100, description="Number of items per page.") register_schema_models(service_api_ns, WorkflowRunPayload, WorkflowLogQuery) @@ -226,7 +249,11 @@ class WorkflowRunDetailApi(Resource): ) @service_api_ns.doc("get_workflow_run_detail") @service_api_ns.doc(description="Get workflow run details") - @service_api_ns.doc(params={"workflow_run_id": "Workflow run ID"}) + @service_api_ns.doc( + params={ + "workflow_run_id": "Workflow run ID, obtained from the workflow execution response or streaming events." + } + ) @service_api_ns.doc( responses={ 200: "Workflow run details retrieved successfully", @@ -397,7 +424,14 @@ class WorkflowRunByIdApi(Resource): @json_or_event_stream_response(service_api_ns) @service_api_ns.doc("run_workflow_by_id") @service_api_ns.doc(description="Execute a specific workflow by ID") - @service_api_ns.doc(params={"workflow_id": "Workflow ID to execute"}) + @service_api_ns.doc( + params={ + "workflow_id": ( + "Workflow ID of the specific version to execute. This value is returned in the `workflow_id` field " + "of workflow run responses." + ) + } + ) @service_api_ns.doc( responses={ 200: "Workflow executed successfully", @@ -482,7 +516,9 @@ class WorkflowTaskStopApi(Resource): @expect_user_json(service_api_ns) @service_api_ns.doc("stop_workflow_task") @service_api_ns.doc(description="Stop a running workflow task") - @service_api_ns.doc(params={"task_id": "Task ID to stop"}) + @service_api_ns.doc( + params={"task_id": "Task ID, obtained from the streaming chunk returned by the Run Workflow API."} + ) @service_api_ns.doc( responses={ 200: "Task stopped successfully", diff --git a/api/controllers/service_api/app/workflow_events.py b/api/controllers/service_api/app/workflow_events.py index 1bace170f53..83a7c8227f5 100644 --- a/api/controllers/service_api/app/workflow_events.py +++ b/api/controllers/service_api/app/workflow_events.py @@ -32,9 +32,25 @@ from services.workflow_event_snapshot_service import build_workflow_event_stream class WorkflowEventsQuery(BaseModel): - user: str = Field(..., description="End user identifier") - include_state_snapshot: bool = Field(default=False, description="Replay from persisted state snapshot") - continue_on_pause: bool = Field(default=False, description="Keep the stream open across workflow_paused events") + user: str = Field( + ..., + description="End-user identifier that originally triggered the run. Must match the creator of the run.", + ) + include_state_snapshot: bool = Field( + default=False, + description=( + "When `true`, replay from the persisted state snapshot to include a status summary of already-executed " + "nodes before streaming new events." + ), + ) + continue_on_pause: bool = Field( + default=False, + description=( + "Set to `true` to keep the stream open across multiple `workflow_paused` events, which is useful when " + "the workflow has more than one Human Input node in sequence. By default, the stream closes after the " + "first pause." + ), + ) register_schema_models(service_api_ns, WorkflowEventsQuery) @@ -65,7 +81,7 @@ class WorkflowEventsApi(Resource): @event_stream_response(service_api_ns) @service_api_ns.doc("get_workflow_events") @service_api_ns.doc(description="Get workflow execution events stream after resume") - @service_api_ns.doc(params={"task_id": "Workflow run ID"}) + @service_api_ns.doc(params={"task_id": "Workflow run ID returned by the original workflow run request."}) @service_api_ns.doc(params=query_params_from_model(WorkflowEventsQuery)) @service_api_ns.doc( responses={ diff --git a/api/controllers/service_api/dataset/dataset.py b/api/controllers/service_api/dataset/dataset.py index 0ca5c5bbf6b..c8f95f3acd4 100644 --- a/api/controllers/service_api/dataset/dataset.py +++ b/api/controllers/service_api/dataset/dataset.py @@ -1,8 +1,17 @@ -from typing import Any, Literal, override +from typing import Annotated, Literal, override from uuid import UUID from flask import request -from pydantic import BaseModel, ConfigDict, Field, GetJsonSchemaHandler, RootModel, field_validator, model_validator +from pydantic import ( + BaseModel, + ConfigDict, + Field, + GetJsonSchemaHandler, + RootModel, + WithJsonSchema, + field_validator, + model_validator, +) from werkzeug.exceptions import Forbidden, NotFound import services @@ -33,7 +42,12 @@ from models.dataset import DatasetPermissionEnum from models.enums import TagType from models.provider_ids import ModelProviderID from services.dataset_service import DatasetPermissionService, DatasetService, DocumentService -from services.entities.knowledge_entities.knowledge_entities import RetrievalModel +from services.entities.knowledge_entities.knowledge_entities import ( + ExternalRetrievalModel, + KnowledgeProvider, + RetrievalModel, + SummaryIndexSetting, +) from services.tag_service import ( SaveTagPayload, TagBindingCreatePayload, @@ -46,37 +60,122 @@ from services.tag_service import ( register_enum_models(service_api_ns, DatasetPermissionEnum) +PartialMemberList = Annotated[ + list[dict[str, str]] | None, + WithJsonSchema( + { + "anyOf": [ + { + "items": { + "properties": { + "user_id": { + "description": "ID of the team member to grant access.", + "type": "string", + } + }, + "type": "object", + }, + "type": "array", + }, + {"type": "null"}, + ] + } + ), +] + class DatasetCreatePayload(BaseModel): - name: str = Field(..., min_length=1, max_length=40) - description: str = Field(default="", description="Dataset description (max 400 chars)", max_length=400) - indexing_technique: Literal["high_quality", "economy"] | None = None - permission: DatasetPermissionEnum | None = DatasetPermissionEnum.ONLY_ME - external_knowledge_api_id: str | None = None - provider: str = "vendor" - external_knowledge_id: str | None = None - retrieval_model: RetrievalModel | None = None - embedding_model: str | None = None - embedding_model_provider: str | None = None - summary_index_setting: dict | None = Field(default=None) + name: str = Field(..., min_length=1, max_length=40, description="Name of the knowledge base.") + description: str = Field(default="", description="Description of the knowledge base.", max_length=400) + indexing_technique: Literal["high_quality", "economy"] | None = Field( + default=None, + description="`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing.", + ) + permission: DatasetPermissionEnum | None = Field( + default=DatasetPermissionEnum.ONLY_ME, + description=( + "Controls who can access this knowledge base. `only_me` restricts access to the creator, " + "`all_team_members` grants workspace-wide access, and `partial_members` grants access to specified " + "members." + ), + ) + external_knowledge_api_id: str | None = Field(default=None, description="ID of the external knowledge API.") + provider: KnowledgeProvider = Field( + default="vendor", + description="Knowledge base provider: `vendor` for internal knowledge bases, `external` for external ones.", + ) + external_knowledge_id: str | None = Field(default=None, description="ID of the external knowledge base.") + retrieval_model: RetrievalModel | None = Field( + default=None, + description="Retrieval model configuration. Controls how chunks are searched and ranked.", + ) + embedding_model: str | None = Field( + default=None, + description=( + "Embedding model name. Use the `model` field from " + "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." + ), + ) + embedding_model_provider: str | None = Field( + default=None, + description=( + "Embedding model provider. Use the `provider` field from " + "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." + ), + ) + summary_index_setting: SummaryIndexSetting = Field( + default=None, + description="Summary index configuration.", + ) class DatasetUpdatePayload(BaseModel): - name: str | None = Field(default=None, min_length=1, max_length=40) - description: str | None = Field(default=None, description="Dataset description (max 400 chars)", max_length=400) - indexing_technique: Literal["high_quality", "economy"] | None = None - permission: DatasetPermissionEnum | None = None - embedding_model: str | None = None - embedding_model_provider: str | None = None - retrieval_model: RetrievalModel | None = None - partial_member_list: list[dict[str, str]] | None = None - external_retrieval_model: dict[str, Any] | None = Field(default=None) - external_knowledge_id: str | None = None - external_knowledge_api_id: str | None = None + name: str | None = Field(default=None, min_length=1, max_length=40, description="Name of the knowledge base.") + description: str | None = Field(default=None, description="Description of the knowledge base.", max_length=400) + indexing_technique: Literal["high_quality", "economy"] | None = Field( + default=None, + description="`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing.", + ) + permission: DatasetPermissionEnum | None = Field( + default=None, + description=( + "Controls who can access this knowledge base. `only_me` restricts access to the creator, " + "`all_team_members` grants workspace-wide access, and `partial_members` grants access to specified " + "members." + ), + ) + embedding_model: str | None = Field( + default=None, + description=( + "Embedding model name. Use the `model` field from " + "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." + ), + ) + embedding_model_provider: str | None = Field( + default=None, + description=( + "Embedding model provider. Use the `provider` field from " + "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." + ), + ) + retrieval_model: RetrievalModel | None = Field( + default=None, + description="Retrieval model configuration. Controls how chunks are searched and ranked.", + ) + partial_member_list: PartialMemberList = Field( + default=None, + description="List of team members with access when `permission` is `partial_members`.", + ) + external_retrieval_model: ExternalRetrievalModel = Field( + default=None, + description="Retrieval settings for external knowledge bases.", + ) + external_knowledge_id: str | None = Field(default=None, description="ID of the external knowledge base.") + external_knowledge_api_id: str | None = Field(default=None, description="ID of the external knowledge API.") class DocumentStatusPayload(BaseModel): - document_ids: list[str] = Field(default_factory=list, description="Document IDs to update") + document_ids: list[str] = Field(default_factory=list, description="List of document IDs to update.") DOCUMENT_STATUS_ACTION_PARAM = { @@ -87,7 +186,7 @@ DOCUMENT_STATUS_ACTION_PARAM = { class TagNamePayload(BaseModel): - name: str = Field(..., min_length=1, max_length=50) + name: str = Field(..., min_length=1, max_length=50, description="Tag name.") class TagCreatePayload(TagNamePayload): @@ -95,16 +194,16 @@ class TagCreatePayload(TagNamePayload): class TagUpdatePayload(TagNamePayload): - tag_id: str + tag_id: str = Field(description="Tag ID to update.") class TagDeletePayload(BaseModel): - tag_id: str + tag_id: str = Field(description="Tag ID to delete.") class TagBindingPayload(BaseModel): - tag_ids: list[str] - target_id: str + tag_ids: list[str] = Field(description="Tag IDs to bind.") + target_id: str = Field(description="Knowledge base ID to bind the tags to.") @field_validator("tag_ids") @classmethod @@ -119,7 +218,7 @@ class TagUnbindingPayload(BaseModel): tag_ids: list[str] = Field(default_factory=list) tag_id: str | None = None - target_id: str + target_id: str = Field(description="Knowledge base ID.") @classmethod @override @@ -134,7 +233,7 @@ class TagUnbindingPayload(BaseModel): "minItems": 1, "type": "array", } - target_id_property = {"title": "Target Id", "type": "string"} + target_id_property = {"description": "Knowledge base ID.", "title": "Target Id", "type": "string"} return { "anyOf": [ { @@ -192,11 +291,14 @@ class KnowledgeTagListResponse(RootModel[list[KnowledgeTagResponse]]): class DatasetListQuery(BaseModel): - page: int = Field(default=1, description="Page number") - limit: int = Field(default=20, description="Number of items per page") - keyword: str | None = Field(default=None, description="Search keyword") - include_all: bool = Field(default=False, description="Include all datasets") - tag_ids: list[str] = Field(default_factory=list, description="Filter by tag IDs") + page: int = Field(default=1, description="Page number to retrieve.") + limit: int = Field(default=20, description="Number of items per page. Server caps at `100`.") + keyword: str | None = Field(default=None, description="Search keyword to filter by name.") + include_all: bool = Field( + default=False, + description="Whether to include all knowledge bases regardless of permissions.", + ) + tag_ids: list[str] = Field(default_factory=list, description="Tag IDs to filter by.") class DatasetDetailWithPartialMembersResponse(DatasetDetailResponse): @@ -409,7 +511,7 @@ class DatasetApi(DatasetApiResource): ) @service_api_ns.doc("get_dataset") @service_api_ns.doc(description="Get a specific dataset by ID") - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc( responses={ 200: "Dataset retrieved successfully", @@ -488,7 +590,7 @@ class DatasetApi(DatasetApiResource): @service_api_ns.expect(service_api_ns.models[DatasetUpdatePayload.__name__]) @service_api_ns.doc("update_dataset") @service_api_ns.doc(description="Update an existing dataset") - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc( responses={ 200: "Dataset updated successfully", @@ -585,7 +687,7 @@ class DatasetApi(DatasetApiResource): ) @service_api_ns.doc("delete_dataset") @service_api_ns.doc(description="Delete a dataset") - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc( responses={ 204: "Dataset deleted successfully", @@ -648,7 +750,7 @@ class DocumentStatusApi(DatasetApiResource): @service_api_ns.doc(description="Batch update document status") @service_api_ns.doc( params={ - "dataset_id": "Dataset ID", + "dataset_id": "Knowledge base ID.", "action": DOCUMENT_STATUS_ACTION_PARAM, } ) @@ -927,7 +1029,7 @@ class DatasetTagsBindingStatusApi(DatasetApiResource): ) @service_api_ns.doc("get_dataset_tags_binding_status") @service_api_ns.doc(description="Get tags bound to a specific dataset") - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc( responses={ 200: "Tags retrieved successfully", diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index 871c5e888b1..9bae862814a 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -9,11 +9,11 @@ import json from collections.abc import Mapping from contextlib import ExitStack from copy import deepcopy -from typing import Any, Literal, Self, override +from typing import Annotated, Any, Literal, Self, override from uuid import UUID from flask import request, send_file -from pydantic import BaseModel, Field, GetJsonSchemaHandler, field_validator, model_validator +from pydantic import BaseModel, Field, GetJsonSchemaHandler, WithJsonSchema, field_validator, model_validator from sqlalchemy import desc, func, select from werkzeug.exceptions import Forbidden, NotFound @@ -63,6 +63,8 @@ from models.dataset import Dataset, Document, DocumentSegment from models.enums import SegmentStatus from services.dataset_service import DatasetService, DocumentService from services.entities.knowledge_entities.knowledge_entities import ( + DocForm, + IndexingTechnique, KnowledgeConfig, ProcessRule, RetrievalModel, @@ -72,16 +74,44 @@ from services.summary_index_service import SummaryIndexService class DocumentTextCreatePayload(BaseModel): - name: str - text: str - process_rule: ProcessRule | None = None - original_document_id: str | None = None - doc_form: str = Field(default="text_model") - doc_language: str = Field(default="English") - indexing_technique: str | None = None - retrieval_model: RetrievalModel | None = None - embedding_model: str | None = None - embedding_model_provider: str | None = None + name: str = Field(description="Document name.") + text: str = Field(description="Document text content.") + process_rule: ProcessRule | None = Field(default=None, description="Processing rules for chunking.") + original_document_id: str | None = Field(default=None, description="Original document ID for replacement.") + doc_form: DocForm = Field( + default="text_model", + description=( + "`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, " + "`qa_model` for question-answer pair extraction." + ), + ) + doc_language: str = Field(default="English", description="Language of the document for processing optimization.") + indexing_technique: IndexingTechnique = Field( + default=None, + description=( + "`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing. " + "Required when adding the first document to a knowledge base; subsequent documents inherit the " + "knowledge base's indexing technique if omitted." + ), + ) + retrieval_model: RetrievalModel | None = Field( + default=None, + description="Retrieval model configuration. Controls how chunks are searched and ranked.", + ) + embedding_model: str | None = Field( + default=None, + description=( + "Embedding model name. Use the `model` field from " + "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." + ), + ) + embedding_model_provider: str | None = Field( + default=None, + description=( + "Embedding model provider. Use the `provider` field from " + "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." + ), + ) @field_validator("doc_form") @classmethod @@ -92,12 +122,21 @@ class DocumentTextCreatePayload(BaseModel): class DocumentTextUpdate(BaseModel): - name: str | None = None - text: str | None = None - process_rule: ProcessRule | None = None - doc_form: str = "text_model" - doc_language: str = "English" - retrieval_model: RetrievalModel | None = None + name: str | None = Field(default=None, description="Document name. Required when `text` is provided.") + text: str | None = Field(default=None, description="Document text content.") + process_rule: ProcessRule | None = Field(default=None, description="Processing rules for chunking.") + doc_form: DocForm = Field( + default="text_model", + description=( + "`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, " + "`qa_model` for question-answer pair extraction." + ), + ) + doc_language: str = Field(default="English", description="Language of the document for processing optimization.") + retrieval_model: RetrievalModel | None = Field( + default=None, + description="Retrieval model configuration. Controls how chunks are searched and ranked.", + ) @field_validator("doc_form") @classmethod @@ -119,7 +158,7 @@ class DocumentTextUpdate(BaseModel): text_branch_properties["name"] = _non_null_property_schema(properties.get("name")) no_text_branch_properties = deepcopy(properties) - no_text_branch_properties["text"] = {"type": "null"} + no_text_branch_properties["text"] = {"description": "Document text content.", "type": "null"} return { **schema, @@ -161,19 +200,41 @@ def _non_null_property_schema(property_schema: object) -> dict[str, Any]: return deepcopy(property_schema) +DocumentDisplayStatus = Annotated[ + str | None, + WithJsonSchema( + { + "anyOf": [ + { + "enum": ["queuing", "indexing", "paused", "error", "available", "disabled", "archived"], + "type": "string", + }, + {"type": "null"}, + ] + } + ), +] + + class DocumentListQuery(BaseModel): - page: int = Field(default=1, description="Page number") - limit: int = Field(default=20, description="Number of items per page") - keyword: str | None = Field(default=None, description="Search keyword") - status: str | None = Field(default=None, description="Document status filter") + page: int = Field(default=1, description="Page number to retrieve.") + limit: int = Field(default=20, description="Number of items per page. Server caps at `100`.") + keyword: str | None = Field(default=None, description="Search keyword to filter by document name.") + status: DocumentDisplayStatus = Field(default=None, description="Filter by display status.") class DocumentGetQuery(BaseModel): - metadata: Literal["all", "only", "without"] = Field(default="all", description="Metadata response mode") + metadata: Literal["all", "only", "without"] = Field( + default="all", + description=( + "`all` returns all fields including metadata. `only` returns only `id`, `doc_type`, and " + "`doc_metadata`. `without` returns all fields except `doc_metadata`." + ), + ) DOCUMENT_CREATE_BY_FILE_PARAMS = { - "dataset_id": "Dataset ID", + "dataset_id": "Knowledge base ID.", "file": { "in": "formData", "type": "file", @@ -184,23 +245,32 @@ DOCUMENT_CREATE_BY_FILE_PARAMS = { "in": "formData", "type": "string", "required": False, - "description": "Optional JSON string with document creation settings.", + "description": ( + "JSON string containing configuration. Accepts the same fields as " + "[Create Document by Text](/api-reference/documents/create-document-by-text) (`indexing_technique`, " + "`doc_form`, `doc_language`, `process_rule`, `retrieval_model`, `embedding_model`, " + "`embedding_model_provider`) except `name` and `text`." + ), }, } DOCUMENT_UPDATE_BY_FILE_PARAMS = { - "dataset_id": "Dataset ID", - "document_id": "Document ID", + "dataset_id": "Knowledge base ID.", + "document_id": "Document ID.", "file": { "in": "formData", "type": "file", "required": False, - "description": "Replacement document file.", + "description": "Replacement document file to upload.", }, "data": { "in": "formData", "type": "string", "required": False, - "description": "Optional JSON string with document update settings.", + "description": ( + "JSON string containing document update settings such as `doc_form`, `doc_language`, `process_rule`, " + "`retrieval_model`, `embedding_model`, and `embedding_model_provider`. `name` and `text` are not used " + "for file updates." + ), }, } @@ -422,7 +492,7 @@ class DocumentAddByTextApi(DatasetApiResource): @service_api_ns.expect(service_api_ns.models[DocumentTextCreatePayload.__name__]) @service_api_ns.doc("create_document_by_text") @service_api_ns.doc(description="Create a new document by providing text content") - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc( responses={ 200: "Document created successfully", @@ -454,7 +524,7 @@ class DeprecatedDocumentAddByTextApi(DatasetApiResource): "Use /datasets/{dataset_id}/document/create-by-text instead." ) ) - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc( responses={ 200: "Document created successfully", @@ -499,7 +569,7 @@ class DocumentUpdateByTextApi(DatasetApiResource): @service_api_ns.expect(service_api_ns.models[DocumentTextUpdate.__name__]) @service_api_ns.doc("update_document_by_text") @service_api_ns.doc(description="Update an existing document by providing text content") - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."}) @service_api_ns.doc( responses={ 200: "Document updated successfully", @@ -530,7 +600,7 @@ class DeprecatedDocumentUpdateByTextApi(DatasetApiResource): "Use /datasets/{dataset_id}/documents/{document_id}/update-by-text instead." ) ) - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."}) @service_api_ns.doc( responses={ 200: "Document updated successfully", @@ -839,7 +909,7 @@ class DocumentListApi(DatasetApiResource): ) @service_api_ns.doc("list_documents") @service_api_ns.doc(description="List all documents in a dataset") - @service_api_ns.doc(params={"dataset_id": "Dataset ID", **query_params_from_model(DocumentListQuery)}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", **query_params_from_model(DocumentListQuery)}) @service_api_ns.doc( responses={ 200: "Documents retrieved successfully", @@ -913,7 +983,7 @@ class DocumentBatchDownloadZipApi(DatasetApiResource): @service_api_ns.expect(service_api_ns.models[DocumentBatchDownloadZipPayload.__name__]) @service_api_ns.doc("download_documents_as_zip") @service_api_ns.doc(description="Download selected uploaded documents as a single ZIP archive") - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc( responses={ 200: "ZIP archive generated successfully", @@ -965,7 +1035,7 @@ class DocumentIndexingStatusApi(DatasetApiResource): ) @service_api_ns.doc("get_document_indexing_status") @service_api_ns.doc(description="Get indexing status for documents in a batch") - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "batch": "Batch ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "batch": "Batch ID."}) @service_api_ns.doc( responses={ 200: "Indexing status retrieved successfully", @@ -1047,7 +1117,7 @@ class DocumentDownloadApi(DatasetApiResource): ) @service_api_ns.doc("get_document_download_url") @service_api_ns.doc(description="Get a signed download URL for a document's original uploaded file") - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."}) @service_api_ns.doc( responses={ 200: "Download URL generated successfully", @@ -1099,7 +1169,7 @@ class DocumentApi(DatasetApiResource): ) @service_api_ns.doc("get_document") @service_api_ns.doc(description="Get a specific document by ID") - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."}) @service_api_ns.doc(params=query_params_from_model(DocumentGetQuery)) @service_api_ns.doc( responses={ @@ -1251,7 +1321,7 @@ class DocumentApi(DatasetApiResource): ) @service_api_ns.doc("delete_document") @service_api_ns.doc(description="Delete a document") - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."}) @service_api_ns.doc( responses={ 204: "Document deleted successfully", diff --git a/api/controllers/service_api/dataset/hit_testing.py b/api/controllers/service_api/dataset/hit_testing.py index fb55b18059d..31881e86322 100644 --- a/api/controllers/service_api/dataset/hit_testing.py +++ b/api/controllers/service_api/dataset/hit_testing.py @@ -41,7 +41,7 @@ class HitTestingApi(DatasetApiResource, DatasetsHitTestingBase): ) @service_api_ns.doc("dataset_hit_testing") @service_api_ns.doc(description="Perform hit testing on a dataset") - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.response( 200, "Hit testing results", diff --git a/api/controllers/service_api/dataset/metadata.py b/api/controllers/service_api/dataset/metadata.py index 82f571ab0f4..426d008c412 100644 --- a/api/controllers/service_api/dataset/metadata.py +++ b/api/controllers/service_api/dataset/metadata.py @@ -25,7 +25,7 @@ from services.entities.knowledge_entities.knowledge_entities import ( from services.metadata_service import MetadataService BUILT_IN_METADATA_ACTION_PARAM = { - "description": "Action to perform: 'enable' or 'disable'", + "description": "`enable` to activate built-in metadata fields, `disable` to deactivate them.", "enum": ["enable", "disable"], "type": "string", } @@ -63,7 +63,7 @@ class DatasetMetadataCreateServiceApi(DatasetApiResource): @service_api_ns.expect(service_api_ns.models[MetadataArgs.__name__]) @service_api_ns.doc("create_dataset_metadata") @service_api_ns.doc(description="Create metadata for a dataset") - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc( responses={ 201: "Metadata created successfully", @@ -101,7 +101,7 @@ class DatasetMetadataCreateServiceApi(DatasetApiResource): ) @service_api_ns.doc("get_dataset_metadata") @service_api_ns.doc(description="Get all metadata for a dataset") - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc( responses={ 200: "Metadata retrieved successfully", @@ -135,7 +135,7 @@ class DatasetMetadataServiceApi(DatasetApiResource): @service_api_ns.expect(service_api_ns.models[MetadataUpdatePayload.__name__]) @service_api_ns.doc("update_dataset_metadata") @service_api_ns.doc(description="Update metadata name") - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "metadata_id": "Metadata ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "metadata_id": "Metadata field ID."}) @service_api_ns.doc( responses={ 200: "Metadata updated successfully", @@ -174,7 +174,7 @@ class DatasetMetadataServiceApi(DatasetApiResource): ) @service_api_ns.doc("delete_dataset_metadata") @service_api_ns.doc(description="Delete metadata") - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "metadata_id": "Metadata ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "metadata_id": "Metadata field ID."}) @service_api_ns.doc( responses={ 204: "Metadata deleted successfully", @@ -211,6 +211,7 @@ class DatasetMetadataBuiltInFieldServiceApi(DatasetApiResource): ) @service_api_ns.doc("get_built_in_fields") @service_api_ns.doc(description="Get all built-in metadata fields") + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc( responses={ 200: "Built-in fields retrieved successfully", @@ -240,7 +241,7 @@ class DatasetMetadataBuiltInFieldActionServiceApi(DatasetApiResource): ) @service_api_ns.doc("toggle_built_in_field") @service_api_ns.doc(description="Enable or disable built-in metadata field") - @service_api_ns.doc(params={"dataset_id": "Dataset ID", "action": BUILT_IN_METADATA_ACTION_PARAM}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "action": BUILT_IN_METADATA_ACTION_PARAM}) @service_api_ns.doc( responses={ 200: "Action completed successfully", @@ -284,7 +285,7 @@ class DocumentMetadataEditServiceApi(DatasetApiResource): @service_api_ns.expect(service_api_ns.models[MetadataOperationData.__name__]) @service_api_ns.doc("update_documents_metadata") @service_api_ns.doc(description="Update metadata for multiple documents") - @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc( responses={ 200: "Documents metadata updated successfully", diff --git a/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py b/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py index 57b360370f4..a6a61262cdc 100644 --- a/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py +++ b/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py @@ -37,6 +37,7 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError from services.file_service import FileService from services.rag_pipeline.entity.pipeline_service_api_entities import ( DatasourceNodeRunApiEntity, + DatasourceType, PipelineRunApiEntity, ) from services.rag_pipeline.pipeline_generate_service import PipelineGenerateService @@ -44,14 +45,27 @@ from services.rag_pipeline.rag_pipeline import RagPipelineService class DatasourceNodeRunPayload(BaseModel): - inputs: dict[str, Any] - datasource_type: str - credential_id: str | None = None - is_published: bool + inputs: dict[str, Any] = Field(description="Input variables for the datasource node.") + datasource_type: DatasourceType = Field(description="Type of the datasource.") + credential_id: str | None = Field( + default=None, description="Datasource credential ID. Uses the default if omitted." + ) + is_published: bool = Field( + description=( + "Whether to run the published or draft version of the node. `true` runs the published version, " + "`false` runs the draft." + ) + ) class DatasourcePluginsQuery(BaseModel): - is_published: bool = True + is_published: bool = Field( + default=True, + description=( + "Whether to retrieve nodes from the published or draft pipeline. `true` returns nodes from the published " + "version, `false` returns nodes from the draft." + ), + ) class DatasourceCredentialInfoResponse(ResponseModel): @@ -114,11 +128,7 @@ class DatasourcePluginsApi(DatasetApiResource): ) @service_api_ns.doc(shortcut="list_rag_pipeline_datasource_plugins") @service_api_ns.doc(description="List all datasource plugins for a rag pipeline") - @service_api_ns.doc( - path={ - "dataset_id": "Dataset ID", - } - ) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc(params=query_params_from_model(DatasourcePluginsQuery)) @service_api_ns.doc( responses={ @@ -169,11 +179,7 @@ class DatasourceNodeRunApi(DatasetApiResource): @event_stream_response(service_api_ns) @service_api_ns.doc(shortcut="pipeline_datasource_node_run") @service_api_ns.doc(description="Run a datasource node for a rag pipeline") - @service_api_ns.doc( - path={ - "dataset_id": "Dataset ID", - } - ) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "node_id": "ID of the datasource node to execute."}) @service_api_ns.doc( responses={ 200: "Datasource node run successfully", @@ -245,11 +251,7 @@ class PipelineRunApi(DatasetApiResource): @json_or_event_stream_response(service_api_ns) @service_api_ns.doc(shortcut="pipeline_datasource_node_run") @service_api_ns.doc(description="Run a datasource node for a rag pipeline") - @service_api_ns.doc( - path={ - "dataset_id": "Dataset ID", - } - ) + @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."}) @service_api_ns.doc( responses={ 200: "Pipeline run successfully", diff --git a/api/controllers/service_api/dataset/segment.py b/api/controllers/service_api/dataset/segment.py index c334649c602..dd8d7c76632 100644 --- a/api/controllers/service_api/dataset/segment.py +++ b/api/controllers/service_api/dataset/segment.py @@ -47,10 +47,10 @@ from services.summary_index_service import SummaryIndexService class SegmentCreateItemPayload(BaseModel): - content: str = Field(min_length=1) - answer: str | None = None - keywords: list[str] | None = None - attachment_ids: list[str] | None = None + content: str = Field(min_length=1, description="Chunk text content.") + answer: str | None = Field(default=None, description="Answer content for QA mode.") + keywords: list[str] | None = Field(default=None, description="Keywords for the chunk.") + attachment_ids: list[str] | None = Field(default=None, description="Attachment file IDs.") @field_validator("content") @classmethod @@ -61,31 +61,34 @@ class SegmentCreateItemPayload(BaseModel): class SegmentCreatePayload(BaseModel): - segments: list[SegmentCreateItemPayload] = Field(min_length=1) + segments: list[SegmentCreateItemPayload] = Field(min_length=1, description="Array of chunk objects to create.") class SegmentListQuery(BaseModel): - limit: int = Field(default=20, ge=1) - page: int = Field(default=1, ge=1) - status: list[str] = Field(default_factory=list) - keyword: str | None = None + limit: int = Field(default=20, ge=1, description="Number of items per page. Server caps at `100`.") + page: int = Field(default=1, ge=1, description="Page number to retrieve.") + status: list[str] = Field( + default_factory=list, + description="Filter chunks by indexing status, such as `completed`, `indexing`, or `error`.", + ) + keyword: str | None = Field(default=None, description="Search keyword.") class SegmentUpdatePayload(BaseModel): - segment: SegmentUpdateArgs + segment: SegmentUpdateArgs = Field(description="Chunk update payload.") class ChildChunkListQuery(BaseModel): - limit: int = Field(default=20, ge=1) - keyword: str | None = None - page: int = Field(default=1, ge=1) + limit: int = Field(default=20, ge=1, description="Number of items per page. Server caps at `100`.") + keyword: str | None = Field(default=None, description="Search keyword.") + page: int = Field(default=1, ge=1, description="Page number to retrieve.") class SegmentDocParams: - DATASET_DOCUMENT = {"dataset_id": "Dataset ID", "document_id": "Document ID"} - DATASET_DOCUMENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Segment ID"} - DATASET_DOCUMENT_PARENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Parent segment ID"} - DATASET_DOCUMENT_CHILD_CHUNK = {**DATASET_DOCUMENT_PARENT_SEGMENT, "child_chunk_id": "Child chunk ID"} + DATASET_DOCUMENT = {"dataset_id": "Knowledge base ID.", "document_id": "Document ID."} + DATASET_DOCUMENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Chunk ID."} + DATASET_DOCUMENT_PARENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Chunk ID."} + DATASET_DOCUMENT_CHILD_CHUNK = {**DATASET_DOCUMENT_PARENT_SEGMENT, "child_chunk_id": "Child chunk ID."} class SegmentCreateListResponse(ResponseModel): diff --git a/api/controllers/service_api/schema.py b/api/controllers/service_api/schema.py index ed528e4fc9d..87f8aa54287 100644 --- a/api/controllers/service_api/schema.py +++ b/api/controllers/service_api/schema.py @@ -8,19 +8,69 @@ from __future__ import annotations from collections.abc import Sequence from copy import deepcopy -from typing import cast +from typing import Annotated, Any, cast from flask_restx import Namespace -from pydantic import BaseModel +from pydantic import BaseModel, WithJsonSchema -USER_PROPERTY_SCHEMA: dict[str, object] = {"description": "End user identifier", "type": "string"} -USER_QUERY_PARAM: dict[str, object] = {"description": "End user identifier", "in": "query", "type": "string"} -USER_FORM_PARAM: dict[str, object] = {"description": "End user identifier", "in": "formData", "type": "string"} -FILE_FORM_PARAM: dict[str, object] = {"in": "formData", "required": True, "type": "file"} +USER_DESCRIPTION = ( + "User identifier, unique within the application. This identifier scopes data access; resources created with " + "one `user` value are only visible when queried with the same `user` value." +) +USER_PROPERTY_SCHEMA: dict[str, object] = {"description": USER_DESCRIPTION, "type": "string"} +USER_QUERY_PARAM: dict[str, object] = { + "description": "User identifier, used for end-user context.", + "in": "query", + "type": "string", +} +USER_FORM_PARAM: dict[str, object] = { + "description": USER_DESCRIPTION, + "in": "formData", + "type": "string", +} +FILE_FORM_PARAM: dict[str, object] = { + "description": "The file to upload.", + "in": "formData", + "required": True, + "type": "file", +} USER_FETCH_FROM_ATTR = "_dify_service_api_user_fetch_from" USER_REQUIRED_ATTR = "_dify_service_api_user_required" JSON_USER_FETCH_FROM = "JSON" +INPUT_FILE_ITEM_SCHEMA: dict[str, object] = { + "type": "object", + "required": ["type", "transfer_method"], + "properties": { + "type": { + "description": "File type.", + "enum": ["document", "image", "audio", "video", "custom"], + "type": "string", + }, + "transfer_method": { + "description": "Transfer method: `remote_url` for file URL, `local_file` for uploaded file.", + "enum": ["remote_url", "local_file"], + "type": "string", + }, + "url": { + "description": "File URL when `transfer_method` is `remote_url`.", + "format": "url", + "type": "string", + }, + "upload_file_id": { + "description": ( + "Uploaded file ID obtained from the [Upload File](/api-reference/files/upload-file) API when " + "`transfer_method` is `local_file`." + ), + "type": "string", + }, + }, +} +INPUT_FILE_LIST_SCHEMA: dict[str, object] = { + "anyOf": [{"items": INPUT_FILE_ITEM_SCHEMA, "type": "array"}, {"type": "null"}] +} +InputFileList = Annotated[list[dict[str, Any]] | None, WithJsonSchema(INPUT_FILE_LIST_SCHEMA)] + def expect_with_user(namespace: Namespace, model: type[BaseModel]): """Document a JSON request body as ``model`` plus Service API ``user``.""" @@ -54,8 +104,12 @@ def expect_user_json(namespace: Namespace): return decorator -def multipart_file_params(*, include_user: bool) -> dict[str, dict[str, object]]: - params: dict[str, dict[str, object]] = {"file": FILE_FORM_PARAM} +def multipart_file_params(*, include_user: bool, file_description: str | None = None) -> dict[str, dict[str, object]]: + file_param = deepcopy(FILE_FORM_PARAM) + if file_description is not None: + file_param["description"] = file_description + + params: dict[str, dict[str, object]] = {"file": file_param} if include_user: params["user"] = USER_FORM_PARAM return deepcopy(params) diff --git a/api/controllers/service_api/workspace/models.py b/api/controllers/service_api/workspace/models.py index 9d49866e87a..c2ea1a72cf5 100644 --- a/api/controllers/service_api/workspace/models.py +++ b/api/controllers/service_api/workspace/models.py @@ -9,6 +9,12 @@ from graphon.model_runtime.utils.encoders import jsonable_encoder from services.entities.model_provider_entities import ProviderWithModelsResponse from services.model_provider_service import ModelProviderService +MODEL_TYPE_PARAM = { + "description": "Type of model to retrieve.", + "enum": ["text-embedding", "rerank", "llm", "tts", "speech2text", "moderation"], + "type": "string", +} + class ProviderWithModelsListResponse(ResponseModel): data: list[ProviderWithModelsResponse] @@ -32,7 +38,7 @@ class ModelProviderAvailableModelApi(Resource): ) @service_api_ns.doc("get_available_models") @service_api_ns.doc(description="Get available models by model type") - @service_api_ns.doc(params={"model_type": "Type of model to retrieve"}) + @service_api_ns.doc(params={"model_type": MODEL_TYPE_PARAM}) @service_api_ns.doc( responses={ 200: "Models retrieved successfully", diff --git a/api/core/rag/entities/metadata_entities.py b/api/core/rag/entities/metadata_entities.py index a2ac44807f8..347d5df8237 100644 --- a/api/core/rag/entities/metadata_entities.py +++ b/api/core/rag/entities/metadata_entities.py @@ -1,7 +1,7 @@ from collections.abc import Sequence -from typing import Literal +from typing import Annotated, Literal -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, WithJsonSchema SupportedComparisonOperator = Literal[ # for string or array @@ -26,6 +26,19 @@ SupportedComparisonOperator = Literal[ "before", "after", ] +ConditionValue = Annotated[ + str | Sequence[str] | None | int | float, + WithJsonSchema( + { + "anyOf": [ + {"type": "string"}, + {"items": {"type": "string"}, "type": "array"}, + {"type": "number"}, + {"type": "null"}, + ] + } + ), +] class Condition(BaseModel): @@ -33,9 +46,23 @@ class Condition(BaseModel): Condition detail """ - name: str - comparison_operator: SupportedComparisonOperator - value: str | Sequence[str] | None | int | float = None + name: str = Field(description="Metadata field name to compare against.") + comparison_operator: SupportedComparisonOperator = Field( + description=( + "Comparison to apply. String operators (`contains`, `not contains`, `start with`, `end with`, `is`, " + "`is not`, `empty`, `not empty`, `in`, `not in`) act on string or array metadata; numeric operators " + "(`=`, `≠`, `>`, `<`, `≥`, `≤`) act on numeric metadata; time operators (`before`, `after`) act on " + "time metadata." + ) + ) + value: ConditionValue = Field( + default=None, + description=( + "Value to compare against. Type depends on `comparison_operator`: string for most string operators, " + "array of strings for `in` and `not in`, number for numeric operators, and omit or use `null` for " + "`empty` and `not empty`." + ), + ) class MetadataFilteringCondition(BaseModel): @@ -43,5 +70,12 @@ class MetadataFilteringCondition(BaseModel): Metadata Filtering Condition. """ - logical_operator: Literal["and", "or"] | None = "and" - conditions: list[Condition] | None = Field(default=None, deprecated=True) + logical_operator: Literal["and", "or"] | None = Field( + default="and", + description="How to combine multiple conditions.", + ) + conditions: list[Condition] | None = Field( + default=None, + deprecated=True, + description="List of metadata conditions to evaluate.", + ) diff --git a/api/core/rag/entities/processing_entities.py b/api/core/rag/entities/processing_entities.py index 1b54444a198..46360ec086f 100644 --- a/api/core/rag/entities/processing_entities.py +++ b/api/core/rag/entities/processing_entities.py @@ -1,7 +1,7 @@ from enum import StrEnum -from typing import Literal +from typing import Annotated, Literal -from pydantic import BaseModel +from pydantic import BaseModel, Field, WithJsonSchema class ParentMode(StrEnum): @@ -9,19 +9,39 @@ class ParentMode(StrEnum): PARAGRAPH = "paragraph" +PreProcessingRuleID = Annotated[ + str, + WithJsonSchema( + { + "enum": ["remove_stopwords", "remove_extra_spaces", "remove_urls_emails"], + "type": "string", + } + ), +] + + class PreProcessingRule(BaseModel): - id: str - enabled: bool + id: PreProcessingRuleID = Field(description="Rule identifier.") + enabled: bool = Field(description="Whether this preprocessing rule is enabled.") class Segmentation(BaseModel): - separator: str = "\n" - max_tokens: int - chunk_overlap: int = 0 + separator: str = Field(default="\n", description="Custom separator for splitting text.") + max_tokens: int = Field(description="Maximum token count per chunk.") + chunk_overlap: int = Field(default=0, description="Token overlap between chunks.") class Rule(BaseModel): - pre_processing_rules: list[PreProcessingRule] | None = None - segmentation: Segmentation | None = None - parent_mode: Literal["full-doc", "paragraph"] | None = None - subchunk_segmentation: Segmentation | None = None + pre_processing_rules: list[PreProcessingRule] | None = Field( + default=None, + description="Pre-processing rules to apply before segmentation.", + ) + segmentation: Segmentation | None = Field(default=None, description="Parent chunk segmentation settings.") + parent_mode: Literal["full-doc", "paragraph"] | None = Field( + default=None, + description="Parent-child segmentation mode.", + ) + subchunk_segmentation: Segmentation | None = Field( + default=None, + description="Child chunk segmentation settings.", + ) diff --git a/api/libs/flask_restx_compat.py b/api/libs/flask_restx_compat.py index a442be01d84..75f1121af00 100644 --- a/api/libs/flask_restx_compat.py +++ b/api/libs/flask_restx_compat.py @@ -122,6 +122,7 @@ def install_swagger_compatibility() -> None: original_description_for = Swagger.description_for original_serialize_operation = Swagger.serialize_operation original_parameters_and_request_body_for = Swagger.parameters_and_request_body_for + original_request_body_from_form_params = Swagger.request_body_from_form_params original_as_dict = Swagger.as_dict def get_or_create_inline_model(self: Swagger, nested_fields: dict[object, object]) -> object: @@ -203,6 +204,35 @@ def install_swagger_compatibility() -> None: path[method] = operation return not_none(path) + def request_body_from_form_params_with_file_description(self: Swagger, params: list[dict[str, object]]): + request_body = original_request_body_from_form_params(self, params) + for param in params: + if param.get("type") != "file": + continue + + name = param.get("name") + description = param.get("description") + if not isinstance(name, str) or not isinstance(description, str): + continue + + content = request_body.get("content") + if not isinstance(content, dict): + continue + multipart = content.get("multipart/form-data") + if not isinstance(multipart, dict): + continue + schema = multipart.get("schema") + if not isinstance(schema, dict): + continue + properties = schema.get("properties") + if not isinstance(properties, dict): + continue + file_schema = properties.get(name) + if isinstance(file_schema, dict): + file_schema["description"] = description + + return request_body + def as_dict_with_inline_dict_support(self: Swagger): # Temporary set RESTX_INCLUDE_ALL_MODELS = false to prevent "length changed while iterating" error include_all_models = current_app.config.get("RESTX_INCLUDE_ALL_MODELS", False) @@ -219,5 +249,6 @@ def install_swagger_compatibility() -> None: Swagger.description_for = description_for_with_explicit_summary Swagger.serialize_operation = serialize_operation_with_explicit_summary_tags Swagger.serialize_resource = serialize_resource_with_explicit_operation_tags + Swagger.request_body_from_form_params = request_body_from_form_params_with_file_description Swagger.as_dict = as_dict_with_inline_dict_support Swagger._dify_swagger_compatibility_installed = True diff --git a/api/openapi/markdown/console-openapi.md b/api/openapi/markdown/console-openapi.md index 70c7a1aa9f1..22177033426 100644 --- a/api/openapi/markdown/console-openapi.md +++ b/api/openapi/markdown/console-openapi.md @@ -6390,9 +6390,9 @@ Request body: | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| conversation_id | query | Conversation UUID | Yes | string | -| first_id | query | First message ID for pagination | No | string | -| limit | query | Number of messages to return (1-100) | No | integer,
**Default:** 20 | +| conversation_id | query | Conversation ID. | Yes | string | +| first_id | query | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No | string | +| limit | query | Number of chat history messages to return per request. | No | integer,
**Default:** 20 | | installed_app_id | path | | Yes | string (uuid) | #### Responses @@ -13352,7 +13352,7 @@ Button styles for user actions. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| content | string | | Yes | +| content | string | Child chunk text content. | Yes | #### ChildChunkDetailResponse @@ -13395,14 +13395,14 @@ Button styles for user actions. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| content | string | | Yes | -| id | string | | No | +| content | string | Child chunk text content. | Yes | +| id | string | Existing child chunk ID. Omit to create a new child chunk. | No | #### ChildChunkUpdatePayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| content | string | | Yes | +| content | string | Child chunk text content. | Yes | #### CliToolSuggestion @@ -13560,9 +13560,9 @@ Condition detail | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| comparison_operator | string,
**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes | -| name | string | | Yes | -| value | string
[ string ]
integer
number | | No | +| comparison_operator | string,
**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | Comparison to apply. String operators (`contains`, `not contains`, `start with`, `end with`, `is`, `is not`, `empty`, `not empty`, `in`, `not in`) act on string or array metadata; numeric operators (`=`, `≠`, `>`, `<`, `≥`, `≤`) act on numeric metadata; time operators (`before`, `after`) act on time metadata.
*Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes | +| name | string | Metadata field name to compare against. | Yes | +| value | string
[ string ]
number | Value to compare against. Type depends on `comparison_operator`: string for most string operators, array of strings for `in` and `not in`, number for numeric operators, and omit or use `null` for `empty` and `not empty`. | No | #### ConfigurateMethod @@ -13704,8 +13704,8 @@ Enum class for configurate method of provider model. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| auto_generate | boolean | | No | -| name | string | | No | +| auto_generate | boolean | Automatically generate the conversation name. When `true`, the `name` field is ignored. | No | +| name | string | Conversation name. Required when `auto_generate` is `false`. | No | #### ConversationVariableResponse @@ -14682,15 +14682,15 @@ Request payload for bulk downloading documents as a zip archive. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| document_ids | [ string (uuid) ] | | Yes | +| document_ids | [ string (uuid) ] | List of document IDs to include in the ZIP download. | Yes | #### DocumentMetadataOperation | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| document_id | string | | Yes | -| metadata_list | [ [MetadataDetail](#metadatadetail) ] | | Yes | -| partial_update | boolean | | No | +| document_id | string | Document ID whose metadata should be updated. | Yes | +| metadata_list | [ [MetadataDetail](#metadatadetail) ] | Metadata fields to update. | Yes | +| partial_update | boolean | Whether to partially update metadata, keeping existing values for unspecified fields. | No | #### DocumentMetadataResponse @@ -15472,10 +15472,10 @@ Enum class for form type. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| attachment_ids | [ string ] | | No | -| external_retrieval_model | object | | No | -| query | string | | Yes | -| retrieval_model | [RetrievalModel](#retrievalmodel) | | No | +| attachment_ids | [ string ] | List of attachment IDs to include in the retrieval context. | No | +| external_retrieval_model | object | Retrieval settings for external knowledge bases. | No | +| query | string | Search query text. | Yes | +| retrieval_model | [RetrievalModel](#retrievalmodel) | Retrieval model configuration. Controls how chunks are searched and ranked. | No | #### HitTestingQuery @@ -15857,19 +15857,19 @@ Input field definition for snippet parameters. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| data_source | [DataSource](#datasource) | | No | -| doc_form | string,
**Default:** text_model | | No | -| doc_language | string,
**Default:** English | | No | -| duplicate | boolean,
**Default:** true | | No | -| embedding_model | string | | No | -| embedding_model_provider | string | | No | -| indexing_technique | string,
**Available values:** "economy", "high_quality" | *Enum:* `"economy"`, `"high_quality"` | Yes | -| is_multimodal | boolean | | No | -| name | string | | No | -| original_document_id | string | | No | -| process_rule | [ProcessRule](#processrule) | | No | -| retrieval_model | [RetrievalModel](#retrievalmodel) | | No | -| summary_index_setting | object | | No | +| data_source | [DataSource](#datasource) | Document data source configuration. | No | +| doc_form | string,
**Available values:** "hierarchical_model", "qa_model", "text_model",
**Default:** text_model | `text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, `qa_model` for question-answer pair extraction.
*Enum:* `"hierarchical_model"`, `"qa_model"`, `"text_model"` | No | +| doc_language | string,
**Default:** English | Language of the document for processing optimization. | No | +| duplicate | boolean,
**Default:** true | Whether duplicate document content is allowed. | No | +| embedding_model | string | Embedding model name. Use the `model` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`. | No | +| embedding_model_provider | string | Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`. | No | +| indexing_technique | string,
**Available values:** "economy", "high_quality" | `high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing. Required when adding the first document to a knowledge base; subsequent documents inherit the knowledge base's indexing technique if omitted.
*Enum:* `"economy"`, `"high_quality"` | Yes | +| is_multimodal | boolean | Whether the document uses multimodal indexing. | No | +| name | string | Document name. | No | +| original_document_id | string | Original document ID for replacement updates. | No | +| process_rule | [ProcessRule](#processrule) | Processing rules for chunking. | No | +| retrieval_model | [RetrievalModel](#retrievalmodel) | Retrieval model configuration. Controls how chunks are searched and ranked in this knowledge base. | No | +| summary_index_setting | object | Summary index configuration. | No | #### KnowledgePipeline @@ -16142,9 +16142,9 @@ Enum class for large language model mode. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| content | string | | No | +| content | string | Optional text feedback providing additional detail. | No | | message_id | string | Message ID | Yes | -| rating | string | | No | +| rating | string | Feedback rating. Set to `null` to revoke previously submitted feedback. | No | #### MessageFile @@ -16199,24 +16199,24 @@ Enum class for large language model mode. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| conversation_id | string | Conversation UUID | Yes | -| first_id | string | First message ID for pagination | No | -| limit | integer,
**Default:** 20 | Number of messages to return (1-100) | No | +| conversation_id | string | Conversation ID. | Yes | +| first_id | string | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No | +| limit | integer,
**Default:** 20 | Number of chat history messages to return per request. | No | #### MetadataArgs | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| name | string | | Yes | -| type | string,
**Available values:** "number", "string", "time" | *Enum:* `"number"`, `"string"`, `"time"` | Yes | +| name | string | Metadata field name. | Yes | +| type | string,
**Available values:** "number", "string", "time" | `string` for text values, `number` for numeric values, `time` for date/time values.
*Enum:* `"number"`, `"string"`, `"time"` | Yes | #### MetadataDetail | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| id | string | | Yes | -| name | string | | Yes | -| value | string
integer
number | | No | +| id | string | Metadata field ID. | Yes | +| name | string | Metadata field name. | Yes | +| value | string
integer
number | Metadata value. Can be a string, number, or `null`. | No | #### MetadataFilteringCondition @@ -16224,8 +16224,8 @@ Metadata Filtering Condition. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| conditions | [ [Condition](#condition) ] | | No | -| logical_operator | string | | No | +| conditions | [ [Condition](#condition) ] | List of metadata conditions to evaluate. | No | +| logical_operator | string | How to combine multiple conditions. | No | #### MetadataOperationData @@ -16233,13 +16233,13 @@ Metadata operation data | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| operation_data | [ [DocumentMetadataOperation](#documentmetadataoperation) ] | | Yes | +| operation_data | [ [DocumentMetadataOperation](#documentmetadataoperation) ] | Array of document metadata update operations. Each entry maps a document ID to its metadata values. | Yes | #### MetadataUpdatePayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| name | string | | Yes | +| name | string | New metadata field name. | Yes | #### ModelConfig @@ -17415,8 +17415,8 @@ Shared permission levels for resources (datasets, credentials, etc.) | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| enabled | boolean | | Yes | -| id | string | | Yes | +| enabled | boolean | Whether this preprocessing rule is enabled. | Yes | +| id | string,
**Available values:** "remove_extra_spaces", "remove_stopwords", "remove_urls_emails" | Rule identifier.
*Enum:* `"remove_extra_spaces"`, `"remove_stopwords"`, `"remove_urls_emails"` | Yes | #### PreviewDetail @@ -17441,8 +17441,8 @@ Serialized pricing info with codegen-safe decimal string patterns. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| mode | [ProcessRuleMode](#processrulemode) | | Yes | -| rules | [Rule](#rule) | | No | +| mode | [ProcessRuleMode](#processrulemode) | Processing mode. `automatic` uses built-in rules, `custom` allows manual configuration, and `hierarchical` enables parent-child chunk structure for `doc_form: hierarchical_model`. | Yes | +| rules | [Rule](#rule) | Custom processing rules. | No | #### ProcessRuleMode @@ -17799,8 +17799,8 @@ Model class for provider quota configuration. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| reranking_model_name | string | | No | -| reranking_provider_name | string | | No | +| reranking_model_name | string | Name of the reranking model. | No | +| reranking_provider_name | string | Provider name of the reranking model. | No | #### RestrictModel @@ -17826,15 +17826,15 @@ Model class for provider quota configuration. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| metadata_filtering_conditions | [MetadataFilteringCondition](#metadatafilteringcondition) | | No | -| reranking_enable | boolean | | Yes | -| reranking_mode | string | | No | -| reranking_model | [RerankingModel](#rerankingmodel) | | No | -| score_threshold | number | | No | -| score_threshold_enabled | boolean | | Yes | -| search_method | [RetrievalMethod](#retrievalmethod) | | Yes | -| top_k | integer | | Yes | -| weights | [WeightModel](#weightmodel) | | No | +| metadata_filtering_conditions | [MetadataFilteringCondition](#metadatafilteringcondition) | Restrict retrieval to chunks whose document metadata matches the given conditions. Conditions are evaluated server-side against document metadata fields. | No | +| reranking_enable | boolean | Whether reranking is enabled. | Yes | +| reranking_mode | string | Reranking mode. Required when `reranking_enable` is `true`. | No | +| reranking_model | [RerankingModel](#rerankingmodel) | Reranking model configuration. | No | +| score_threshold | number | Minimum similarity score for results. Only effective when score threshold filtering is enabled. | No | +| score_threshold_enabled | boolean | Whether score threshold filtering is enabled. | Yes | +| search_method | [RetrievalMethod](#retrievalmethod) | Search method used for retrieval. | Yes | +| top_k | integer | Maximum number of results to return. | Yes | +| weights | [WeightModel](#weightmodel) | Weight configuration for hybrid search. | No | #### RetrievalSettingResponse @@ -17876,10 +17876,10 @@ Model class for provider quota configuration. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| parent_mode | string | | No | -| pre_processing_rules | [ [PreProcessingRule](#preprocessingrule) ] | | No | -| segmentation | [Segmentation](#segmentation) | | No | -| subchunk_segmentation | [Segmentation](#segmentation) | | No | +| parent_mode | string | Parent-child segmentation mode. | No | +| pre_processing_rules | [ [PreProcessingRule](#preprocessingrule) ] | Pre-processing rules to apply before segmentation. | No | +| segmentation | [Segmentation](#segmentation) | Parent chunk segmentation settings. | No | +| subchunk_segmentation | [Segmentation](#segmentation) | Child chunk segmentation settings. | No | #### RuleCodeGeneratePayload @@ -18083,10 +18083,10 @@ Model class for provider quota configuration. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| chunk_overlap | integer | | No | -| max_tokens | integer | | Yes | +| chunk_overlap | integer | Token overlap between chunks. | No | +| max_tokens | integer | Maximum token count per chunk. | Yes | | separator | string,
**Default:** - | | No | + | Custom separator for splitting text. | No | #### SelectInputConfig @@ -18690,10 +18690,10 @@ Tag type | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| message_id | string | Message ID | No | +| message_id | string | Message ID. Takes priority over `text` when both are provided. | No | | streaming | boolean | Reserved for compatibility; TTS response streaming is determined by the provider output. | No | -| text | string | Text to convert to audio | No | -| voice | string | Voice to use for TTS | No | +| text | string | Speech content to convert. | No | +| voice | string | Voice to use for text-to-speech. Available voices depend on the TTS provider configured for this app. Omit to use the app's configured voice when available; that value is exposed by [Get App Parameters](/api-reference/applications/get-app-parameters) as `text_to_speech.voice`. | No | #### TextToSpeechPayload @@ -19263,23 +19263,23 @@ in form definiton, or a variable while the workflow is running. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| keyword_weight | number | | Yes | +| keyword_weight | number | Weight assigned to keyword search results. | Yes | #### WeightModel | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| keyword_setting | [WeightKeywordSetting](#weightkeywordsetting) | | No | -| vector_setting | [WeightVectorSetting](#weightvectorsetting) | | No | -| weight_type | string | | No | +| keyword_setting | [WeightKeywordSetting](#weightkeywordsetting) | Keyword search weight settings. | No | +| vector_setting | [WeightVectorSetting](#weightvectorsetting) | Semantic search weight settings. | No | +| weight_type | string | Strategy for balancing semantic and keyword search weights. | No | #### WeightVectorSetting | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| embedding_model_name | string | | Yes | -| embedding_provider_name | string | | Yes | -| vector_weight | number | | Yes | +| embedding_model_name | string | Name of the embedding model used for vector search. | Yes | +| embedding_provider_name | string | Provider of the embedding model used for vector search. | Yes | +| vector_weight | number | Weight assigned to semantic vector search results. | Yes | #### WorkflowAgentBindingType @@ -19975,8 +19975,8 @@ can reuse its existing handler. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| files | [ object ] | | No | -| inputs | object | | Yes | +| files | [ object ] | File list for workflow system file inputs. Available when file upload is enabled for the workflow. To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned `id` as `upload_file_id` with `transfer_method: local_file`. | No | +| inputs | object | Key-value pairs for workflow input variables. Values for file-type variables should be arrays of file objects with `type`, `transfer_method`, and either `url` or `upload_file_id`. Refer to the `user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) response to discover the variable names and types expected by your app. | Yes | #### WorkflowRunQuery diff --git a/api/openapi/markdown/openapi-openapi.md b/api/openapi/markdown/openapi-openapi.md index 337be4f74ec..4b1da3f1c59 100644 --- a/api/openapi/markdown/openapi-openapi.md +++ b/api/openapi/markdown/openapi-openapi.md @@ -792,7 +792,7 @@ Liveness payload for `GET /openapi/v1/_health` — no auth required. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| action | string | | Yes | +| action | string | ID of the action button the recipient selected. Must match one of the `id` values from the form's `user_actions` list. | Yes | | inputs | object | Submitted human input values keyed by output variable name. Use a string for paragraph or select input values, a file mapping for file inputs, and a list of file mappings for file-list inputs. Local file mappings use `transfer_method=local_file` with `upload_file_id`; remote file mappings use `transfer_method=remote_url` with `url` or `remote_url`. | Yes | #### Import diff --git a/api/openapi/markdown/service-openapi.md b/api/openapi/markdown/service-openapi.md index fdce4d1a2c2..a58ca2766d7 100644 --- a/api/openapi/markdown/service-openapi.md +++ b/api/openapi/markdown/service-openapi.md @@ -29,7 +29,7 @@ Deprecated legacy alias for creating a new document by providing text content. U | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Request Body @@ -55,8 +55,8 @@ Permanently delete a document and all its chunks from the knowledge base. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | #### Responses @@ -77,9 +77,9 @@ Retrieve detailed information about a specific document, including its indexing | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | -| metadata | query | Metadata response mode | No | string,
**Available values:** "all", "only", "without",
**Default:** all | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | +| metadata | query | `all` returns all fields including metadata. `only` returns only `id`, `doc_type`, and `doc_metadata`. `without` returns all fields except `doc_metadata`. | No | string,
**Available values:** "all", "only", "without",
**Default:** all | #### Responses @@ -98,8 +98,8 @@ Update an existing document by uploading a file | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | #### Request Body @@ -126,8 +126,8 @@ Deprecated legacy alias for updating an existing document by providing text cont | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | #### Request Body @@ -156,8 +156,8 @@ Retrieve a paginated list of all feedback submitted for messages in this applica | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| limit | query | Number of feedbacks per page | No | integer,
**Default:** 20 | -| page | query | Page number | No | integer,
**Default:** 1 | +| limit | query | Number of records per page. | No | integer,
**Default:** 20 | +| page | query | Page number for pagination. | No | integer,
**Default:** 1 | #### Responses @@ -176,7 +176,7 @@ Submit feedback for a message. End users can rate messages as `like` or `dislike | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| message_id | path | Message ID | Yes | string (uuid) | +| message_id | path | Message ID. | Yes | string (uuid) | #### Request Body @@ -205,7 +205,7 @@ Enables or disables the annotation reply feature. Requires embedding model confi | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| action | path | Action to perform: 'enable' or 'disable' | Yes | string,
**Available values:** "disable", "enable" | +| action | path | Action to perform: `enable` or `disable`. | Yes | string,
**Available values:** "disable", "enable" | #### Request Body @@ -230,8 +230,8 @@ Retrieves the status of an asynchronous annotation reply configuration job start | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| action | path | Action type | Yes | string | -| job_id | path | Job ID | Yes | string (uuid) | +| action | path | Action to perform: `enable` or `disable`. | Yes | string,
**Available values:** "disable", "enable" | +| job_id | path | Job ID returned by [Configure Annotation Reply](/api-reference/annotations/configure-annotation-reply). | Yes | string (uuid) | #### Responses @@ -252,9 +252,9 @@ Retrieves a paginated list of annotations for the application. Supports keyword | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| keyword | query | Keyword to search annotations | No | string | -| limit | query | Number of annotations per page | No | integer,
**Default:** 20 | -| page | query | Page number | No | integer,
**Default:** 1 | +| keyword | query | Keyword to filter annotations by question or answer content. | No | string | +| limit | query | Number of items per page. | No | integer,
**Default:** 20 | +| page | query | Page number for pagination. | No | integer,
**Default:** 1 | #### Responses @@ -292,7 +292,7 @@ Deletes an annotation and its associated hit history. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| annotation_id | path | Annotation ID | Yes | string (uuid) | +| annotation_id | path | The unique identifier of the annotation to delete. | Yes | string (uuid) | #### Responses @@ -312,7 +312,7 @@ Updates the question and answer of an existing annotation. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| annotation_id | path | Annotation ID | Yes | string (uuid) | +| annotation_id | path | The unique identifier of the annotation to update. | Yes | string (uuid) | #### Request Body @@ -411,7 +411,7 @@ Stops a chat message generation task. Only supported in `streaming` mode. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| task_id | path | The ID of the task to stop | Yes | string | +| task_id | path | Task ID, obtained from a streaming chunk returned by the Send Chat Message API. | Yes | string | #### Request Body @@ -439,7 +439,7 @@ Get next questions suggestions for the current message. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | | message_id | path | Message ID | Yes | string (uuid) | -| user | query | End user identifier | Yes | string | +| user | query | User identifier, used for end-user context. | Yes | string | #### Responses @@ -461,10 +461,10 @@ Resume the Server-Sent Events stream for a workflow run after a pause or a dropp | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| task_id | path | Workflow run ID | Yes | string | -| continue_on_pause | query | Keep the stream open across workflow_paused events | No | boolean | -| include_state_snapshot | query | Replay from persisted state snapshot | No | boolean | -| user | query | End user identifier | Yes | string | +| task_id | path | Workflow run ID returned by the original workflow run request. | Yes | string | +| continue_on_pause | query | Set to `true` to keep the stream open across multiple `workflow_paused` events, which is useful when the workflow has more than one Human Input node in sequence. By default, the stream closes after the first pause. | No | boolean | +| include_state_snapshot | query | When `true`, replay from the persisted state snapshot to include a status summary of already-executed nodes before streaming new events. | No | boolean | +| user | query | End-user identifier that originally triggered the run. Must match the creator of the run. | Yes | string | #### Responses @@ -485,14 +485,14 @@ Retrieve paginated workflow execution logs with filtering options. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| created_at__after | query | | No | string | -| created_at__before | query | | No | string | -| created_by_account | query | | No | string | -| created_by_end_user_session_id | query | | No | string | -| keyword | query | | No | string | -| limit | query | | No | integer,
**Default:** 20 | -| page | query | | No | integer,
**Default:** 1 | -| status | query | | No | string,
**Available values:** "failed", "stopped", "succeeded" | +| created_at__after | query | Filter logs created after this ISO 8601 timestamp. | No | dateTime | +| created_at__before | query | Filter logs created before this ISO 8601 timestamp. | No | dateTime | +| created_by_account | query | Filter by account ID. | No | string | +| created_by_end_user_session_id | query | Filter by end user session ID. | No | string | +| keyword | query | Keyword to search in logs. | No | string | +| limit | query | Number of items per page. | No | integer,
**Default:** 20 | +| page | query | Page number for pagination. | No | integer,
**Default:** 1 | +| status | query | Filter by execution status. | No | string,
**Available values:** "failed", "stopped", "succeeded" | #### Responses @@ -511,7 +511,7 @@ Retrieve the current execution results of a workflow task based on the workflow | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| workflow_run_id | path | Workflow run ID | Yes | string | +| workflow_run_id | path | Workflow run ID, obtained from the workflow execution response or streaming events. | Yes | string | #### Responses @@ -558,7 +558,7 @@ Stops a chat message generation task. Only supported in `streaming` mode. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| task_id | path | The ID of the task to stop | Yes | string | +| task_id | path | Task ID, obtained from a streaming chunk returned by the Send Chat Message API. | Yes | string | #### Request Body @@ -586,7 +586,7 @@ Get next questions suggestions for the current message. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | | message_id | path | Message ID | Yes | string (uuid) | -| user | query | End user identifier | Yes | string | +| user | query | User identifier, used for end-user context. | Yes | string | #### Responses @@ -634,7 +634,7 @@ Stops a completion message generation task. Only supported in `streaming` mode. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| task_id | path | The ID of the task to stop | Yes | string | +| task_id | path | Task ID, obtained from a streaming chunk returned by the Send Completion Message API. | Yes | string | #### Request Body @@ -664,10 +664,10 @@ Retrieve the conversation list for the current user, ordered by most recently ac | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| last_id | query | Last conversation ID for pagination | No | string | -| limit | query | Number of conversations to return | No | integer,
**Default:** 20 | -| sort_by | query | Sort order for conversations | No | string,
**Available values:** "-created_at", "-updated_at", "created_at", "updated_at",
**Default:** -updated_at | -| user | query | End user identifier | No | string | +| last_id | query | The ID of the last record on the current page. Used to fetch the next page. | No | string | +| limit | query | Number of records to return. | No | integer,
**Default:** 20 | +| sort_by | query | Sorting field. Use the `-` prefix for descending order. | No | string,
**Available values:** "-created_at", "-updated_at", "created_at", "updated_at",
**Default:** -updated_at | +| user | query | User identifier, used for end-user context. | No | string | #### Responses @@ -688,7 +688,7 @@ Delete a conversation. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| c_id | path | Conversation ID | Yes | string (uuid) | +| c_id | path | Conversation ID. | Yes | string (uuid) | #### Request Body @@ -715,7 +715,7 @@ Rename a conversation or auto-generate a name. The conversation name is used for | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| c_id | path | Conversation ID | Yes | string (uuid) | +| c_id | path | Conversation ID. | Yes | string (uuid) | #### Request Body @@ -742,11 +742,11 @@ Retrieve variables from a specific conversation. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| c_id | path | Conversation ID | Yes | string (uuid) | -| last_id | query | Last variable ID for pagination | No | string | -| limit | query | Number of variables to return | No | integer,
**Default:** 20 | -| user | query | End user identifier | No | string | -| variable_name | query | Filter variables by name | No | string | +| c_id | path | Conversation ID. | Yes | string (uuid) | +| last_id | query | The ID of the last record on the current page. Used to fetch the next page. | No | string | +| limit | query | Number of records to return. | No | integer,
**Default:** 20 | +| user | query | User identifier, used for end-user context. | No | string | +| variable_name | query | Filter variables by a specific name. | No | string | #### Responses @@ -767,8 +767,8 @@ Update the value of a specific conversation variable. The value must match the e | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| c_id | path | Conversation ID | Yes | string (uuid) | -| variable_id | path | Variable ID | Yes | string (uuid) | +| c_id | path | Conversation ID. | Yes | string (uuid) | +| variable_id | path | Variable ID. | Yes | string (uuid) | #### Request Body @@ -795,10 +795,10 @@ Returns historical chat records in a scrolling load format, with the first page | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| conversation_id | query | Conversation UUID | Yes | string | -| first_id | query | First message ID for pagination | No | string | -| limit | query | Number of messages to return (1-100) | No | integer,
**Default:** 20 | -| user | query | End user identifier | No | string | +| conversation_id | query | Conversation ID. | Yes | string | +| first_id | query | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No | string | +| limit | query | Number of chat history messages to return per request. | No | integer,
**Default:** 20 | +| user | query | User identifier, used for end-user context. | No | string | #### Responses @@ -822,11 +822,11 @@ Returns a paginated list of knowledge bases. Supports filtering by keyword and t | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| include_all | query | Include all datasets | No | boolean | -| keyword | query | Search keyword | No | string | -| limit | query | Number of items per page | No | integer,
**Default:** 20 | -| page | query | Page number | No | integer,
**Default:** 1 | -| tag_ids | query | Filter by tag IDs | No | [ string ] | +| include_all | query | Whether to include all knowledge bases regardless of permissions. | No | boolean | +| keyword | query | Search keyword to filter by name. | No | string | +| limit | query | Number of items per page. Server caps at `100`. | No | integer,
**Default:** 20 | +| page | query | Page number to retrieve. | No | integer,
**Default:** 1 | +| tag_ids | query | Tag IDs to filter by. | No | [ string ] | #### Responses @@ -866,7 +866,7 @@ Permanently delete a knowledge base and all its documents. The knowledge base mu | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Responses @@ -887,7 +887,7 @@ Retrieve detailed information about a specific knowledge base, including its emb | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Responses @@ -907,7 +907,7 @@ Update the name, description, permissions, or retrieval settings of an existing | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Request Body @@ -933,7 +933,7 @@ Performs a search query against a knowledge base to retrieve the most relevant c | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Request Body @@ -961,7 +961,7 @@ Performs a search query against a knowledge base to retrieve the most relevant c | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Request Body @@ -1014,8 +1014,8 @@ List the datasource nodes configured in the knowledge pipeline. Each node includ | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| is_published | query | | No | boolean,
**Default:** true | -| dataset_id | path | | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| is_published | query | Whether to retrieve nodes from the published or draft pipeline. `true` returns nodes from the published version, `false` returns nodes from the draft. | No | boolean,
**Default:** true | #### Responses @@ -1035,8 +1035,8 @@ Execute a single datasource node within the knowledge pipeline. Returns a stream | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | | Yes | string (uuid) | -| node_id | path | | Yes | string | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| node_id | path | ID of the datasource node to execute. | Yes | string | #### Request Body @@ -1062,7 +1062,7 @@ Execute the full knowledge pipeline for a knowledge base. Supports both streamin | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Request Body @@ -1200,7 +1200,7 @@ Returns the list of tags bound to a specific knowledge base. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Responses @@ -1222,7 +1222,7 @@ Create a document by uploading a file. Supports common document formats (PDF, TX | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Request Body @@ -1248,7 +1248,7 @@ Create a document from raw text content. The document is processed asynchronousl | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Request Body @@ -1277,7 +1277,7 @@ Create a document by uploading a file. Supports common document formats (PDF, TX | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Request Body @@ -1303,11 +1303,11 @@ Returns a paginated list of documents in the knowledge base. Supports filtering | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| keyword | query | Search keyword | No | string | -| limit | query | Number of items per page | No | integer,
**Default:** 20 | -| page | query | Page number | No | integer,
**Default:** 1 | -| status | query | Document status filter | No | string | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| keyword | query | Search keyword to filter by document name. | No | string | +| limit | query | Number of items per page. Server caps at `100`. | No | integer,
**Default:** 20 | +| page | query | Page number to retrieve. | No | integer,
**Default:** 1 | +| status | query | Filter by display status. | No | string,
**Available values:** "archived", "available", "disabled", "error", "indexing", "paused", "queuing" | #### Responses @@ -1327,7 +1327,7 @@ Download multiple uploaded-file documents as a single ZIP archive. Accepts up to | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Request Body @@ -1354,7 +1354,7 @@ Enable, disable, archive, or unarchive multiple documents at once. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | | action | path | Action to perform: 'enable', 'disable', 'archive', or 'un_archive' | Yes | string,
**Available values:** "archive", "disable", "enable", "un_archive" | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Request Body @@ -1381,8 +1381,8 @@ Check the indexing progress of documents in a batch. Returns the current process | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| batch | path | Batch ID | Yes | string | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| batch | path | Batch ID. | Yes | string | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Responses @@ -1402,8 +1402,8 @@ Permanently delete a document and all its chunks from the knowledge base. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | #### Responses @@ -1424,9 +1424,9 @@ Retrieve detailed information about a specific document, including its indexing | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | -| metadata | query | Metadata response mode | No | string,
**Available values:** "all", "only", "without",
**Default:** all | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | +| metadata | query | `all` returns all fields including metadata. `only` returns only `id`, `doc_type`, and `doc_metadata`. `without` returns all fields except `doc_metadata`. | No | string,
**Available values:** "all", "only", "without",
**Default:** all | #### Responses @@ -1445,8 +1445,8 @@ Update an existing document by uploading a file | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | #### Request Body @@ -1472,8 +1472,8 @@ Get a signed download URL for a document's original uploaded file. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | #### Responses @@ -1496,8 +1496,8 @@ Update an existing document by uploading a new file. Re-triggers indexing — us | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | #### Request Body @@ -1524,8 +1524,8 @@ Update an existing document's text content, name, or processing configuration. R | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | #### Request Body @@ -1555,8 +1555,8 @@ Update an existing document by uploading a new file. Re-triggers indexing — us | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | #### Request Body @@ -1586,7 +1586,7 @@ Update metadata values for multiple documents at once. Each document in the requ | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Request Body @@ -1612,7 +1612,7 @@ Returns the list of all metadata fields (both custom and built-in) for the knowl | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Responses @@ -1632,7 +1632,7 @@ Create a custom metadata field for the knowledge base. Metadata fields can be us | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Request Body @@ -1658,7 +1658,7 @@ Returns the list of built-in metadata fields provided by the system (e.g., docum | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Responses @@ -1677,8 +1677,8 @@ Enable or disable built-in metadata fields for the knowledge base. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| action | path | Action to perform: 'enable' or 'disable' | Yes | string,
**Available values:** "disable", "enable" | -| dataset_id | path | Dataset ID | Yes | string (uuid) | +| action | path | `enable` to activate built-in metadata fields, `disable` to deactivate them. | Yes | string,
**Available values:** "disable", "enable" | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | #### Responses @@ -1698,8 +1698,8 @@ Permanently delete a custom metadata field. Documents using this field will lose | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| metadata_id | path | Metadata ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| metadata_id | path | Metadata field ID. | Yes | string (uuid) | #### Responses @@ -1719,8 +1719,8 @@ Rename a custom metadata field. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| metadata_id | path | Metadata ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| metadata_id | path | Metadata field ID. | Yes | string (uuid) | #### Request Body @@ -1749,12 +1749,12 @@ Returns a paginated list of chunks within a document. Supports filtering by keyw | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | -| keyword | query | | No | string | -| limit | query | | No | integer,
**Default:** 20 | -| page | query | | No | integer,
**Default:** 1 | -| status | query | | No | [ string ] | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | +| keyword | query | Search keyword. | No | string | +| limit | query | Number of items per page. Server caps at `100`. | No | integer,
**Default:** 20 | +| page | query | Page number to retrieve. | No | integer,
**Default:** 1 | +| status | query | Filter chunks by indexing status, such as `completed`, `indexing`, or `error`. | No | [ string ] | #### Responses @@ -1774,8 +1774,8 @@ Create one or more chunks within a document. Each chunk can include optional key | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | #### Request Body @@ -1802,9 +1802,9 @@ Permanently delete a chunk from the document. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | -| segment_id | path | Segment ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | +| segment_id | path | Chunk ID. | Yes | string (uuid) | #### Responses @@ -1824,9 +1824,9 @@ Retrieve detailed information about a specific chunk, including its content, key | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | -| segment_id | path | Segment ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | +| segment_id | path | Chunk ID. | Yes | string (uuid) | #### Responses @@ -1846,9 +1846,9 @@ Update a chunk's content, keywords, or answer. Re-triggers indexing for the modi | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | -| segment_id | path | Segment ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | +| segment_id | path | Chunk ID. | Yes | string (uuid) | #### Request Body @@ -1874,12 +1874,12 @@ Returns a paginated list of child chunks under a specific parent chunk. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | -| segment_id | path | Parent segment ID | Yes | string (uuid) | -| keyword | query | | No | string | -| limit | query | | No | integer,
**Default:** 20 | -| page | query | | No | integer,
**Default:** 1 | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | +| segment_id | path | Chunk ID. | Yes | string (uuid) | +| keyword | query | Search keyword. | No | string | +| limit | query | Number of items per page. Server caps at `100`. | No | integer,
**Default:** 20 | +| page | query | Page number to retrieve. | No | integer,
**Default:** 1 | #### Responses @@ -1899,9 +1899,9 @@ Create a child chunk under the specified segment. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | -| segment_id | path | Parent segment ID | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | +| segment_id | path | Chunk ID. | Yes | string (uuid) | #### Request Body @@ -1928,10 +1928,10 @@ Permanently delete a child chunk from its parent chunk. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| child_chunk_id | path | Child chunk ID | Yes | string (uuid) | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | -| segment_id | path | Parent segment ID | Yes | string (uuid) | +| child_chunk_id | path | Child chunk ID. | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | +| segment_id | path | Chunk ID. | Yes | string (uuid) | #### Responses @@ -1952,10 +1952,10 @@ Update the content of an existing child chunk. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| child_chunk_id | path | Child chunk ID | Yes | string (uuid) | -| dataset_id | path | Dataset ID | Yes | string (uuid) | -| document_id | path | Document ID | Yes | string (uuid) | -| segment_id | path | Parent segment ID | Yes | string (uuid) | +| child_chunk_id | path | Child chunk ID. | Yes | string (uuid) | +| dataset_id | path | Knowledge base ID. | Yes | string (uuid) | +| document_id | path | Document ID. | Yes | string (uuid) | +| segment_id | path | Chunk ID. | Yes | string (uuid) | #### Request Body @@ -2030,9 +2030,9 @@ Preview or download uploaded files previously uploaded via the [Upload File](/ap | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| file_id | path | UUID of the file to preview | Yes | string (uuid) | -| as_attachment | query | Download as attachment | No | boolean | -| user | query | End user identifier | No | string | +| file_id | path | The unique identifier of the file to preview, obtained from the [Upload File](/api-reference/files/upload-file) API response. | Yes | string (uuid) | +| as_attachment | query | If `true`, forces the file to download as an attachment instead of previewing in browser. | No | boolean | +| user | query | User identifier, used for end-user context. | No | string | #### Responses @@ -2166,10 +2166,10 @@ Resume the Server-Sent Events stream for a workflow run after a pause or a dropp | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| task_id | path | Workflow run ID | Yes | string | -| continue_on_pause | query | Keep the stream open across workflow_paused events | No | boolean | -| include_state_snapshot | query | Replay from persisted state snapshot | No | boolean | -| user | query | End user identifier | Yes | string | +| task_id | path | Workflow run ID returned by the original workflow run request. | Yes | string | +| continue_on_pause | query | Set to `true` to keep the stream open across multiple `workflow_paused` events, which is useful when the workflow has more than one Human Input node in sequence. By default, the stream closes after the first pause. | No | boolean | +| include_state_snapshot | query | When `true`, replay from the persisted state snapshot to include a status summary of already-executed nodes before streaming new events. | No | boolean | +| user | query | End-user identifier that originally triggered the run. Must match the creator of the run. | Yes | string | #### Responses @@ -2190,14 +2190,14 @@ Retrieve paginated workflow execution logs with filtering options. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| created_at__after | query | | No | string | -| created_at__before | query | | No | string | -| created_by_account | query | | No | string | -| created_by_end_user_session_id | query | | No | string | -| keyword | query | | No | string | -| limit | query | | No | integer,
**Default:** 20 | -| page | query | | No | integer,
**Default:** 1 | -| status | query | | No | string,
**Available values:** "failed", "stopped", "succeeded" | +| created_at__after | query | Filter logs created after this ISO 8601 timestamp. | No | dateTime | +| created_at__before | query | Filter logs created before this ISO 8601 timestamp. | No | dateTime | +| created_by_account | query | Filter by account ID. | No | string | +| created_by_end_user_session_id | query | Filter by end user session ID. | No | string | +| keyword | query | Keyword to search in logs. | No | string | +| limit | query | Number of items per page. | No | integer,
**Default:** 20 | +| page | query | Page number for pagination. | No | integer,
**Default:** 1 | +| status | query | Filter by execution status. | No | string,
**Available values:** "failed", "stopped", "succeeded" | #### Responses @@ -2239,7 +2239,7 @@ Retrieve the current execution results of a workflow task based on the workflow | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| workflow_run_id | path | Workflow run ID | Yes | string | +| workflow_run_id | path | Workflow run ID, obtained from the workflow execution response or streaming events. | Yes | string | #### Responses @@ -2260,7 +2260,7 @@ Stop a running workflow task. Only supported in `streaming` mode. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| task_id | path | Task ID to stop | Yes | string | +| task_id | path | Task ID, obtained from the streaming chunk returned by the Run Workflow API. | Yes | string | #### Request Body @@ -2287,7 +2287,7 @@ Execute a specific workflow version identified by its ID. Useful for running a p | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| workflow_id | path | Workflow ID to execute | Yes | string | +| workflow_id | path | Workflow ID of the specific version to execute. This value is returned in the `workflow_id` field of workflow run responses. | Yes | string | #### Request Body @@ -2319,7 +2319,7 @@ Retrieve the list of available models by type. Primarily used to query `text-emb | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| model_type | path | Type of model to retrieve | Yes | string | +| model_type | path | Type of model to retrieve. | Yes | string,
**Available values:** "llm", "moderation", "rerank", "speech2text", "text-embedding", "tts" | #### Responses @@ -2361,8 +2361,8 @@ Retrieve the list of available models by type. Primarily used to query `text-emb | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| answer | string | Annotation answer | Yes | -| question | string | Annotation question | Yes | +| answer | string | Annotation answer. | Yes | +| question | string | Annotation question. | Yes | #### AnnotationJobStatusResponse @@ -2386,17 +2386,17 @@ Retrieve the list of available models by type. Primarily used to query `text-emb | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| keyword | string | Keyword to search annotations | No | -| limit | integer,
**Default:** 20 | Number of annotations per page | No | -| page | integer,
**Default:** 1 | Page number | No | +| keyword | string | Keyword to filter annotations by question or answer content. | No | +| limit | integer,
**Default:** 20 | Number of items per page. | No | +| page | integer,
**Default:** 1 | Page number for pagination. | No | #### AnnotationReplyActionPayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| embedding_model_name | string | Embedding model name | Yes | -| embedding_provider_name | string | Embedding provider name | Yes | -| score_threshold | number | Score threshold for annotation matching | Yes | +| embedding_model_name | string | Name of the embedding model to use for annotation matching. | Yes | +| embedding_provider_name | string | Name of the embedding model provider. | Yes | +| score_threshold | float | Minimum similarity score for an annotation to be considered a match. Higher values require closer matches. | Yes | #### AppFeedbackListResponse @@ -2466,36 +2466,32 @@ Button styles for user actions. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| auto_generate_name | boolean,
**Default:** true | Auto generate conversation name | No | -| conversation_id | string | Conversation UUID | No | -| files | [ object ] | | No | -| inputs | object | | Yes | -| query | string | | Yes | -| response_mode | string | | No | -| retriever_from | string,
**Default:** dev | | No | -| trace_session_id | string | Trace session ID for observability grouping | No | -| workflow_id | string | Workflow ID for advanced chat | No | +| auto_generate_name | boolean,
**Default:** true | Auto-generate the conversation title. If `false`, use the Rename Conversation API with `auto_generate: true` to generate the title asynchronously. | No | +| conversation_id | string | Conversation ID to continue a conversation. Omit this field or pass an empty string to start a new conversation, then pass the returned `conversation_id` in subsequent requests. | No | +| files | [ object ] | File list for multimodal understanding, including images, documents, audio, and video. To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned `id` as `upload_file_id` with `transfer_method: local_file`. | No | +| inputs | object | Values for app-defined variables. Refer to the `user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) response to discover expected variable names and types. | Yes | +| query | string | User input or question content. | Yes | +| response_mode | string | Response mode. `streaming` uses Server-Sent Events; `blocking` returns after completion. New Agent app mode supports streaming only. When omitted, non-Agent apps run in blocking mode and new Agent apps stream. | No | +| workflow_id | string | Published workflow version ID to execute for advanced chat. If omitted, the app's current published workflow is used. | No | #### ChatRequestPayloadWithUser | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| auto_generate_name | boolean,
**Default:** true | Auto generate conversation name | No | -| conversation_id | string | Conversation UUID | No | -| files | [ object ] | | No | -| inputs | object | | Yes | -| query | string | | Yes | -| response_mode | string | | No | -| retriever_from | string,
**Default:** dev | | No | -| trace_session_id | string | Trace session ID for observability grouping | No | -| user | string | End user identifier | Yes | -| workflow_id | string | Workflow ID for advanced chat | No | +| auto_generate_name | boolean,
**Default:** true | Auto-generate the conversation title. If `false`, use the Rename Conversation API with `auto_generate: true` to generate the title asynchronously. | No | +| conversation_id | string | Conversation ID to continue a conversation. Omit this field or pass an empty string to start a new conversation, then pass the returned `conversation_id` in subsequent requests. | No | +| files | [ object ] | File list for multimodal understanding, including images, documents, audio, and video. To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned `id` as `upload_file_id` with `transfer_method: local_file`. | No | +| inputs | object | Values for app-defined variables. Refer to the `user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) response to discover expected variable names and types. | Yes | +| query | string | User input or question content. | Yes | +| response_mode | string | Response mode. `streaming` uses Server-Sent Events; `blocking` returns after completion. New Agent app mode supports streaming only. When omitted, non-Agent apps run in blocking mode and new Agent apps stream. | No | +| user | string | User identifier, unique within the application. This identifier scopes data access; resources created with one `user` value are only visible when queried with the same `user` value. | Yes | +| workflow_id | string | Published workflow version ID to execute for advanced chat. If omitted, the app's current published workflow is used. | No | #### ChildChunkCreatePayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| content | string | | Yes | +| content | string | Child chunk text content. | Yes | #### ChildChunkDetailResponse @@ -2507,9 +2503,9 @@ Button styles for user actions. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| keyword | string | | No | -| limit | integer,
**Default:** 20 | | No | -| page | integer,
**Default:** 1 | | No | +| keyword | string | Search keyword. | No | +| limit | integer,
**Default:** 20 | Number of items per page. Server caps at `100`. | No | +| page | integer,
**Default:** 1 | Page number to retrieve. | No | #### ChildChunkListResponse @@ -2538,30 +2534,26 @@ Button styles for user actions. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| content | string | | Yes | +| content | string | Child chunk text content. | Yes | #### CompletionRequestPayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| files | [ object ] | | No | -| inputs | object | | Yes | -| query | string | | No | -| response_mode | string | | No | -| retriever_from | string,
**Default:** dev | | No | -| trace_session_id | string | Trace session ID for observability grouping | No | +| files | [ object ] | File list for multimodal understanding, including images, documents, audio, and video. To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned `id` as `upload_file_id` with `transfer_method: local_file`. | No | +| inputs | object | Values for app-defined variables. Refer to the `user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) response to discover expected variable names and types. | Yes | +| query | string | User input or prompt content. | No | +| response_mode | string | Response mode. `streaming` uses Server-Sent Events; `blocking` returns after completion. When omitted, the request runs in blocking mode. | No | #### CompletionRequestPayloadWithUser | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| files | [ object ] | | No | -| inputs | object | | Yes | -| query | string | | No | -| response_mode | string | | No | -| retriever_from | string,
**Default:** dev | | No | -| trace_session_id | string | Trace session ID for observability grouping | No | -| user | string | End user identifier | Yes | +| files | [ object ] | File list for multimodal understanding, including images, documents, audio, and video. To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned `id` as `upload_file_id` with `transfer_method: local_file`. | No | +| inputs | object | Values for app-defined variables. Refer to the `user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) response to discover expected variable names and types. | Yes | +| query | string | User input or prompt content. | No | +| response_mode | string | Response mode. `streaming` uses Server-Sent Events; `blocking` returns after completion. When omitted, the request runs in blocking mode. | No | +| user | string | User identifier, unique within the application. This identifier scopes data access; resources created with one `user` value are only visible when queried with the same `user` value. | Yes | #### Condition @@ -2569,9 +2561,9 @@ Condition detail | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| comparison_operator | string,
**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes | -| name | string | | Yes | -| value | string
[ string ]
integer
number | | No | +| comparison_operator | string,
**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | Comparison to apply. String operators (`contains`, `not contains`, `start with`, `end with`, `is`, `is not`, `empty`, `not empty`, `in`, `not in`) act on string or array metadata; numeric operators (`=`, `≠`, `>`, `<`, `≥`, `≤`) act on numeric metadata; time operators (`before`, `after`) act on time metadata.
*Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes | +| name | string | Metadata field name to compare against. | Yes | +| value | string
[ string ]
number | Value to compare against. Type depends on `comparison_operator`: string for most string operators, array of strings for `in` and `not in`, number for numeric operators, and omit or use `null` for `empty` and `not empty`. | No | #### ConversationInfiniteScrollPagination @@ -2585,24 +2577,24 @@ Condition detail | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| last_id | string | Last conversation ID for pagination | No | -| limit | integer,
**Default:** 20 | Number of conversations to return | No | -| sort_by | string,
**Available values:** "-created_at", "-updated_at", "created_at", "updated_at",
**Default:** -updated_at | Sort order for conversations
*Enum:* `"-created_at"`, `"-updated_at"`, `"created_at"`, `"updated_at"` | No | +| last_id | string | The ID of the last record on the current page. Used to fetch the next page. | No | +| limit | integer,
**Default:** 20 | Number of records to return. | No | +| sort_by | string,
**Available values:** "-created_at", "-updated_at", "created_at", "updated_at",
**Default:** -updated_at | Sorting field. Use the `-` prefix for descending order.
*Enum:* `"-created_at"`, `"-updated_at"`, `"created_at"`, `"updated_at"` | No | #### ConversationRenamePayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| auto_generate | boolean | | No | -| name | string | | No | +| auto_generate | boolean | Automatically generate the conversation name. When `true`, the `name` field is ignored. | No | +| name | string | Conversation name. Required when `auto_generate` is `false`. | No | #### ConversationRenamePayloadWithUser | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| auto_generate | boolean | | No | -| name | string | | No | -| user | string | End user identifier | No | +| auto_generate | boolean | Automatically generate the conversation name. When `true`, the `name` field is ignored. | No | +| name | string | Conversation name. Required when `auto_generate` is `false`. | No | +| user | string | User identifier, unique within the application. This identifier scopes data access; resources created with one `user` value are only visible when queried with the same `user` value. | No | #### ConversationVariableInfiniteScrollPaginationResponse @@ -2628,22 +2620,22 @@ Condition detail | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| value | | | Yes | +| value | | The new value for the variable. Must match the variable's expected type. | Yes | #### ConversationVariableUpdatePayloadWithUser | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| user | string | End user identifier | No | -| value | | | Yes | +| user | string | User identifier, unique within the application. This identifier scopes data access; resources created with one `user` value are only visible when queried with the same `user` value. | No | +| value | | The new value for the variable. Must match the variable's expected type. | Yes | #### ConversationVariablesQuery | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| last_id | string | Last variable ID for pagination | No | -| limit | integer,
**Default:** 20 | Number of variables to return | No | -| variable_name | string | Filter variables by name | No | +| last_id | string | The ID of the last record on the current page. Used to fetch the next page. | No | +| limit | integer,
**Default:** 20 | Number of records to return. | No | +| variable_name | string | Filter variables by a specific name. | No | #### CustomConfigurationStatus @@ -2671,17 +2663,17 @@ Enum class for custom configuration status. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| description | string | Dataset description (max 400 chars) | No | -| embedding_model | string | | No | -| embedding_model_provider | string | | No | -| external_knowledge_api_id | string | | No | -| external_knowledge_id | string | | No | -| indexing_technique | string | | No | -| name | string | | Yes | -| permission | [PermissionEnum](#permissionenum) | | No | -| provider | string,
**Default:** vendor | | No | -| retrieval_model | [RetrievalModel](#retrievalmodel) | | No | -| summary_index_setting | object | | No | +| description | string | Description of the knowledge base. | No | +| embedding_model | string | Embedding model name. Use the `model` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`. | No | +| embedding_model_provider | string | Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`. | No | +| external_knowledge_api_id | string | ID of the external knowledge API. | No | +| external_knowledge_id | string | ID of the external knowledge base. | No | +| indexing_technique | string | `high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing. | No | +| name | string | Name of the knowledge base. | Yes | +| permission | [PermissionEnum](#permissionenum) | Controls who can access this knowledge base. `only_me` restricts access to the creator, `all_team_members` grants workspace-wide access, and `partial_members` grants access to specified members. | No | +| provider | string,
**Available values:** "external", "vendor",
**Default:** vendor | Knowledge base provider: `vendor` for internal knowledge bases, `external` for external ones.
*Enum:* `"external"`, `"vendor"` | No | +| retrieval_model | [RetrievalModel](#retrievalmodel) | Retrieval model configuration. Controls how chunks are searched and ranked. | No | +| summary_index_setting | object | Summary index configuration. | No | #### DatasetDetailResponse @@ -2808,11 +2800,11 @@ Enum class for custom configuration status. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| include_all | boolean | Include all datasets | No | -| keyword | string | Search keyword | No | -| limit | integer,
**Default:** 20 | Number of items per page | No | -| page | integer,
**Default:** 1 | Page number | No | -| tag_ids | [ string ] | Filter by tag IDs | No | +| include_all | boolean | Whether to include all knowledge bases regardless of permissions. | No | +| keyword | string | Search keyword to filter by name. | No | +| limit | integer,
**Default:** 20 | Number of items per page. Server caps at `100`. | No | +| page | integer,
**Default:** 1 | Page number to retrieve. | No | +| tag_ids | [ string ] | Tag IDs to filter by. | No | #### DatasetListResponse @@ -2908,17 +2900,17 @@ Enum class for custom configuration status. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| description | string | Dataset description (max 400 chars) | No | -| embedding_model | string | | No | -| embedding_model_provider | string | | No | -| external_knowledge_api_id | string | | No | -| external_knowledge_id | string | | No | -| external_retrieval_model | object | | No | -| indexing_technique | string | | No | -| name | string | | No | -| partial_member_list | [ object ] | | No | -| permission | [PermissionEnum](#permissionenum) | | No | -| retrieval_model | [RetrievalModel](#retrievalmodel) | | No | +| description | string | Description of the knowledge base. | No | +| embedding_model | string | Embedding model name. Use the `model` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`. | No | +| embedding_model_provider | string | Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`. | No | +| external_knowledge_api_id | string | ID of the external knowledge API. | No | +| external_knowledge_id | string | ID of the external knowledge base. | No | +| external_retrieval_model | object | Retrieval settings for external knowledge bases. | No | +| indexing_technique | string | `high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing. | No | +| name | string | Name of the knowledge base. | No | +| partial_member_list | [ object ] | List of team members with access when `permission` is `partial_members`. | No | +| permission | [PermissionEnum](#permissionenum) | Controls who can access this knowledge base. `only_me` restricts access to the creator, `all_team_members` grants workspace-wide access, and `partial_members` grants access to specified members. | No | +| retrieval_model | [RetrievalModel](#retrievalmodel) | Retrieval model configuration. Controls how chunks are searched and ranked. | No | #### DatasetVectorSettingResponse @@ -2949,10 +2941,10 @@ Enum class for custom configuration status. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| credential_id | string | | No | -| datasource_type | string | | Yes | -| inputs | object | | Yes | -| is_published | boolean | | Yes | +| credential_id | string | Datasource credential ID. Uses the default if omitted. | No | +| datasource_type | string,
**Available values:** "local_file", "online_document", "online_drive", "website_crawl" | Type of the datasource.
*Enum:* `"local_file"`, `"online_document"`, `"online_drive"`, `"website_crawl"` | Yes | +| inputs | object | Input variables for the datasource node. | Yes | +| is_published | boolean | Whether to run the published or draft version of the node. `true` runs the published version, `false` runs the draft. | Yes | #### DatasourcePluginListResponse @@ -2976,7 +2968,7 @@ Enum class for custom configuration status. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| is_published | boolean,
**Default:** true | | No | +| is_published | boolean,
**Default:** true | Whether to retrieve nodes from the published or draft pipeline. `true` returns nodes from the published version, `false` returns nodes from the draft. | No | #### DocumentAndBatchResponse @@ -2991,7 +2983,7 @@ Request payload for bulk downloading documents as a zip archive. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| document_ids | [ string (uuid) ] | | Yes | +| document_ids | [ string (uuid) ] | List of document IDs to include in the ZIP download. | Yes | #### DocumentDetailResponse @@ -3033,16 +3025,16 @@ Request payload for bulk downloading documents as a zip archive. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| metadata | string,
**Available values:** "all", "only", "without",
**Default:** all | Metadata response mode
*Enum:* `"all"`, `"only"`, `"without"` | No | +| metadata | string,
**Available values:** "all", "only", "without",
**Default:** all | `all` returns all fields including metadata. `only` returns only `id`, `doc_type`, and `doc_metadata`. `without` returns all fields except `doc_metadata`.
*Enum:* `"all"`, `"only"`, `"without"` | No | #### DocumentListQuery | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| keyword | string | Search keyword | No | -| limit | integer,
**Default:** 20 | Number of items per page | No | -| page | integer,
**Default:** 1 | Page number | No | -| status | string | Document status filter | No | +| keyword | string | Search keyword to filter by document name. | No | +| limit | integer,
**Default:** 20 | Number of items per page. Server caps at `100`. | No | +| page | integer,
**Default:** 1 | Page number to retrieve. | No | +| status | string | Filter by display status. | No | #### DocumentListResponse @@ -3058,9 +3050,9 @@ Request payload for bulk downloading documents as a zip archive. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| document_id | string | | Yes | -| metadata_list | [ [MetadataDetail](#metadatadetail) ] | | Yes | -| partial_update | boolean | | No | +| document_id | string | Document ID whose metadata should be updated. | Yes | +| metadata_list | [ [MetadataDetail](#metadatadetail) ] | Metadata fields to update. | Yes | +| partial_update | boolean | Whether to partially update metadata, keeping existing values for unspecified fields. | No | #### DocumentMetadataResponse @@ -3110,7 +3102,7 @@ Request payload for bulk downloading documents as a zip archive. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| document_ids | [ string ] | Document IDs to update | No | +| document_ids | [ string ] | List of document IDs to update. | No | #### DocumentStatusResponse @@ -3133,27 +3125,27 @@ Request payload for bulk downloading documents as a zip archive. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| doc_form | string,
**Default:** text_model | | No | -| doc_language | string,
**Default:** English | | No | -| embedding_model | string | | No | -| embedding_model_provider | string | | No | -| indexing_technique | string | | No | -| name | string | | Yes | -| original_document_id | string | | No | -| process_rule | [ProcessRule](#processrule) | | No | -| retrieval_model | [RetrievalModel](#retrievalmodel) | | No | -| text | string | | Yes | +| doc_form | string,
**Available values:** "hierarchical_model", "qa_model", "text_model",
**Default:** text_model | `text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, `qa_model` for question-answer pair extraction.
*Enum:* `"hierarchical_model"`, `"qa_model"`, `"text_model"` | No | +| doc_language | string,
**Default:** English | Language of the document for processing optimization. | No | +| embedding_model | string | Embedding model name. Use the `model` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`. | No | +| embedding_model_provider | string | Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`. | No | +| indexing_technique | string | `high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing. Required when adding the first document to a knowledge base; subsequent documents inherit the knowledge base's indexing technique if omitted. | No | +| name | string | Document name. | Yes | +| original_document_id | string | Original document ID for replacement. | No | +| process_rule | [ProcessRule](#processrule) | Processing rules for chunking. | No | +| retrieval_model | [RetrievalModel](#retrievalmodel) | Retrieval model configuration. Controls how chunks are searched and ranked. | No | +| text | string | Document text content. | Yes | #### DocumentTextUpdate | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| doc_form | string,
**Default:** text_model | | No | -| doc_language | string,
**Default:** English | | No | -| name | string | | No | -| process_rule | [ProcessRule](#processrule) | | No | -| retrieval_model | [RetrievalModel](#retrievalmodel) | | No | -| text | string | | No | +| doc_form | string,
**Available values:** "hierarchical_model", "qa_model", "text_model",
**Default:** text_model | `text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, `qa_model` for question-answer pair extraction.
*Enum:* `"hierarchical_model"`, `"qa_model"`, `"text_model"` | No | +| doc_language | string,
**Default:** English | Language of the document for processing optimization. | No | +| name | string | Document name. Required when `text` is provided. | No | +| process_rule | [ProcessRule](#processrule) | Processing rules for chunking. | No | +| retrieval_model | [RetrievalModel](#retrievalmodel) | Retrieval model configuration. Controls how chunks are searched and ranked. | No | +| text | string | Document text content. | No | #### EndUserDetail @@ -3192,8 +3184,8 @@ Note: The SQLAlchemy model defines an `is_anonymous` property for Flask-Login se | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| limit | integer,
**Default:** 20 | Number of feedbacks per page | No | -| page | integer,
**Default:** 1 | Page number | No | +| limit | integer,
**Default:** 20 | Number of records per page. | No | +| page | integer,
**Default:** 1 | Page number for pagination. | No | #### FetchFrom @@ -3228,7 +3220,7 @@ Enum class for fetch from. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| as_attachment | boolean | Download as attachment | No | +| as_attachment | boolean | If `true`, forces the file to download as an attachment instead of previewing in browser. | No | #### FileResponse @@ -3308,10 +3300,10 @@ Enum class for fetch from. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| attachment_ids | [ string ] | | No | -| external_retrieval_model | object | | No | -| query | string | | Yes | -| retrieval_model | [RetrievalModel](#retrievalmodel) | | No | +| attachment_ids | [ string ] | List of attachment IDs to include in the retrieval context. | No | +| external_retrieval_model | object | Retrieval settings for external knowledge bases. | No | +| query | string | Search query text. | Yes | +| retrieval_model | [RetrievalModel](#retrievalmodel) | Retrieval model configuration. Controls how chunks are searched and ranked. | No | #### HitTestingQuery @@ -3415,16 +3407,16 @@ Enum class for fetch from. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| action | string | | Yes | +| action | string | ID of the action button the recipient selected. Must match one of the `id` values from the form's `user_actions` list. | Yes | | inputs | object | Submitted human input values keyed by output variable name. Use a string for paragraph or select input values, a file mapping for file inputs, and a list of file mappings for file-list inputs. Local file mappings use `transfer_method=local_file` with `upload_file_id`; remote file mappings use `transfer_method=remote_url` with `url` or `remote_url`. | Yes | #### HumanInputFormSubmitPayloadWithUser | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| action | string | | Yes | +| action | string | ID of the action button the recipient selected. Must match one of the `id` values from the form's `user_actions` list. | Yes | | inputs | object | Submitted human input values keyed by output variable name. Use a string for paragraph or select input values, a file mapping for file inputs, and a list of file mappings for file-list inputs. Local file mappings use `transfer_method=local_file` with `upload_file_id`; remote file mappings use `transfer_method=remote_url` with `url` or `remote_url`. | Yes | -| user | string | End user identifier | Yes | +| user | string | User identifier, unique within the application. This identifier scopes data access; resources created with one `user` value are only visible when queried with the same `user` value. | Yes | #### HumanInputFormSubmitResponse @@ -3491,16 +3483,16 @@ Model class for i18n object. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| content | string | | No | -| rating | string | | No | +| content | string | Optional text feedback providing additional detail. | No | +| rating | string | Feedback rating. Set to `null` to revoke previously submitted feedback. | No | #### MessageFeedbackPayloadWithUser | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| content | string | | No | -| rating | string | | No | -| user | string | End user identifier | Yes | +| content | string | Optional text feedback providing additional detail. | No | +| rating | string | Feedback rating. Set to `null` to revoke previously submitted feedback. | No | +| user | string | User identifier, unique within the application. This identifier scopes data access; resources created with one `user` value are only visible when queried with the same `user` value. | Yes | #### MessageFile @@ -3547,24 +3539,24 @@ Model class for i18n object. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| conversation_id | string | Conversation UUID | Yes | -| first_id | string | First message ID for pagination | No | -| limit | integer,
**Default:** 20 | Number of messages to return (1-100) | No | +| conversation_id | string | Conversation ID. | Yes | +| first_id | string | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No | +| limit | integer,
**Default:** 20 | Number of chat history messages to return per request. | No | #### MetadataArgs | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| name | string | | Yes | -| type | string,
**Available values:** "number", "string", "time" | *Enum:* `"number"`, `"string"`, `"time"` | Yes | +| name | string | Metadata field name. | Yes | +| type | string,
**Available values:** "number", "string", "time" | `string` for text values, `number` for numeric values, `time` for date/time values.
*Enum:* `"number"`, `"string"`, `"time"` | Yes | #### MetadataDetail | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| id | string | | Yes | -| name | string | | Yes | -| value | string
integer
number | | No | +| id | string | Metadata field ID. | Yes | +| name | string | Metadata field name. | Yes | +| value | string
integer
number | Metadata value. Can be a string, number, or `null`. | No | #### MetadataFilteringCondition @@ -3572,8 +3564,8 @@ Metadata Filtering Condition. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| conditions | [ [Condition](#condition) ] | | No | -| logical_operator | string | | No | +| conditions | [ [Condition](#condition) ] | List of metadata conditions to evaluate. | No | +| logical_operator | string | How to combine multiple conditions. | No | #### MetadataOperationData @@ -3581,13 +3573,13 @@ Metadata operation data | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| operation_data | [ [DocumentMetadataOperation](#documentmetadataoperation) ] | | Yes | +| operation_data | [ [DocumentMetadataOperation](#documentmetadataoperation) ] | Array of document metadata update operations. Each entry maps a document ID to its metadata values. | Yes | #### MetadataUpdatePayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| name | string | | Yes | +| name | string | New metadata field name. | Yes | #### ModelFeature @@ -3625,7 +3617,7 @@ Enum class for model type. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| user | string | End user identifier | No | +| user | string | User identifier, unique within the application. This identifier scopes data access; resources created with one `user` value are only visible when queried with the same `user` value. | No | #### ParagraphInputConfig @@ -3666,12 +3658,12 @@ Shared permission levels for resources (datasets, credentials, etc.) | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| datasource_info_list | [ object ] | | Yes | -| datasource_type | string | | Yes | -| inputs | object | | Yes | -| is_published | boolean | | Yes | -| response_mode | string | | Yes | -| start_node_id | string | | Yes | +| datasource_info_list | [ ] | List of datasource objects to process. The expected item structure depends on `datasource_type`. | Yes | +| datasource_type | string,
**Available values:** "local_file", "online_document", "online_drive", "website_crawl" | Type of the datasource. Determines which fields are expected in `datasource_info_list` items.
*Enum:* `"local_file"`, `"online_document"`, `"online_drive"`, `"website_crawl"` | Yes | +| inputs | object | Key-value pairs for pipeline input variables defined in the workflow. Pass `{}` if the pipeline has no input variables. | Yes | +| is_published | boolean | Whether to run the published or draft version of the pipeline. `true` runs the latest published version; `false` runs the current draft (useful for testing unpublished changes). | Yes | +| response_mode | string,
**Available values:** "blocking", "streaming" | Response mode. Use `streaming` for SSE or `blocking` for JSON.
*Enum:* `"blocking"`, `"streaming"` | Yes | +| start_node_id | string | ID of the datasource node where the run starts. | Yes | #### PipelineUploadFileResponse @@ -3689,15 +3681,15 @@ Shared permission levels for resources (datasets, credentials, etc.) | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| enabled | boolean | | Yes | -| id | string | | Yes | +| enabled | boolean | Whether this preprocessing rule is enabled. | Yes | +| id | string,
**Available values:** "remove_extra_spaces", "remove_stopwords", "remove_urls_emails" | Rule identifier.
*Enum:* `"remove_extra_spaces"`, `"remove_stopwords"`, `"remove_urls_emails"` | Yes | #### ProcessRule | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| mode | [ProcessRuleMode](#processrulemode) | | Yes | -| rules | [Rule](#rule) | | No | +| mode | [ProcessRuleMode](#processrulemode) | Processing mode. `automatic` uses built-in rules, `custom` allows manual configuration, and `hierarchical` enables parent-child chunk structure for `doc_form: hierarchical_model`. | Yes | +| rules | [Rule](#rule) | Custom processing rules. | No | #### ProcessRuleMode @@ -3748,14 +3740,14 @@ Model class for provider with models response. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| user | string | End user identifier | Yes | +| user | string | User identifier, unique within the application. This identifier scopes data access; resources created with one `user` value are only visible when queried with the same `user` value. | Yes | #### RerankingModel | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| reranking_model_name | string | | No | -| reranking_provider_name | string | | No | +| reranking_model_name | string | Name of the reranking model. | No | +| reranking_provider_name | string | Provider name of the reranking model. | No | #### ResultResponse @@ -3773,15 +3765,15 @@ Model class for provider with models response. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| metadata_filtering_conditions | [MetadataFilteringCondition](#metadatafilteringcondition) | | No | -| reranking_enable | boolean | | Yes | -| reranking_mode | string | | No | -| reranking_model | [RerankingModel](#rerankingmodel) | | No | -| score_threshold | number | | No | -| score_threshold_enabled | boolean | | Yes | -| search_method | [RetrievalMethod](#retrievalmethod) | | Yes | -| top_k | integer | | Yes | -| weights | [WeightModel](#weightmodel) | | No | +| metadata_filtering_conditions | [MetadataFilteringCondition](#metadatafilteringcondition) | Restrict retrieval to chunks whose document metadata matches the given conditions. Conditions are evaluated server-side against document metadata fields. | No | +| reranking_enable | boolean | Whether reranking is enabled. | Yes | +| reranking_mode | string | Reranking mode. Required when `reranking_enable` is `true`. | No | +| reranking_model | [RerankingModel](#rerankingmodel) | Reranking model configuration. | No | +| score_threshold | number | Minimum similarity score for results. Only effective when score threshold filtering is enabled. | No | +| score_threshold_enabled | boolean | Whether score threshold filtering is enabled. | Yes | +| search_method | [RetrievalMethod](#retrievalmethod) | Search method used for retrieval. | Yes | +| top_k | integer | Maximum number of results to return. | Yes | +| weights | [WeightModel](#weightmodel) | Weight configuration for hybrid search. | No | #### RetrieverResource @@ -3809,10 +3801,10 @@ Model class for provider with models response. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| parent_mode | string | | No | -| pre_processing_rules | [ [PreProcessingRule](#preprocessingrule) ] | | No | -| segmentation | [Segmentation](#segmentation) | | No | -| subchunk_segmentation | [Segmentation](#segmentation) | | No | +| parent_mode | string | Parent-child segmentation mode. | No | +| pre_processing_rules | [ [PreProcessingRule](#preprocessingrule) ] | Pre-processing rules to apply before segmentation. | No | +| segmentation | [Segmentation](#segmentation) | Parent chunk segmentation settings. | No | +| subchunk_segmentation | [Segmentation](#segmentation) | Child chunk segmentation settings. | No | #### SegmentAttachmentResponse @@ -3829,10 +3821,10 @@ Model class for provider with models response. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| answer | string | | No | -| attachment_ids | [ string ] | | No | -| content | string | | Yes | -| keywords | [ string ] | | No | +| answer | string | Answer content for QA mode. | No | +| attachment_ids | [ string ] | Attachment file IDs. | No | +| content | string | Chunk text content. | Yes | +| keywords | [ string ] | Keywords for the chunk. | No | #### SegmentCreateListResponse @@ -3845,7 +3837,7 @@ Model class for provider with models response. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| segments | [ [SegmentCreateItemPayload](#segmentcreateitempayload) ] | | Yes | +| segments | [ [SegmentCreateItemPayload](#segmentcreateitempayload) ] | Array of chunk objects to create. | Yes | #### SegmentDetailResponse @@ -3858,10 +3850,10 @@ Model class for provider with models response. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| keyword | string | | No | -| limit | integer,
**Default:** 20 | | No | -| page | integer,
**Default:** 1 | | No | -| status | [ string ] | | No | +| keyword | string | Search keyword. | No | +| limit | integer,
**Default:** 20 | Number of items per page. Server caps at `100`. | No | +| page | integer,
**Default:** 1 | Page number to retrieve. | No | +| status | [ string ] | Filter chunks by indexing status, such as `completed`, `indexing`, or `error`. | No | #### SegmentListResponse @@ -3910,28 +3902,28 @@ Model class for provider with models response. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| answer | string | | No | -| attachment_ids | [ string ] | | No | -| content | string | | No | -| enabled | boolean | | No | -| keywords | [ string ] | | No | -| regenerate_child_chunks | boolean | | No | -| summary | string | | No | +| answer | string | Updated answer content for QA mode. | No | +| attachment_ids | [ string ] | Attachment file IDs. | No | +| content | string | Updated chunk text content. | No | +| enabled | boolean | Whether the chunk is enabled. | No | +| keywords | [ string ] | Updated keywords for the chunk. | No | +| regenerate_child_chunks | boolean | Whether to regenerate child chunks after updating a parent chunk. | No | +| summary | string | Summary content for summary index. | No | #### SegmentUpdatePayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| segment | [SegmentUpdateArgs](#segmentupdateargs) | | Yes | +| segment | [SegmentUpdateArgs](#segmentupdateargs) | Chunk update payload. | Yes | #### Segmentation | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| chunk_overlap | integer | | No | -| max_tokens | integer | | Yes | +| chunk_overlap | integer | Token overlap between chunks. | No | +| max_tokens | integer | Maximum token count per chunk. | Yes | | separator | string,
**Default:** - | | No | + | Custom separator for splitting text. | No | #### SelectInputConfig @@ -4040,20 +4032,20 @@ Default configuration for form inputs. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| tag_ids | [ string ] | | Yes | -| target_id | string | | Yes | +| tag_ids | [ string ] | Tag IDs to bind. | Yes | +| target_id | string | Knowledge base ID to bind the tags to. | Yes | #### TagCreatePayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| name | string | | Yes | +| name | string | Tag name. | Yes | #### TagDeletePayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| tag_id | string | | Yes | +| tag_id | string | Tag ID to delete. | Yes | #### TagUnbindingPayload @@ -4067,27 +4059,27 @@ Accepts either the legacy tag_id payload or the normalized tag_ids payload. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| name | string | | Yes | -| tag_id | string | | Yes | +| name | string | Tag name. | Yes | +| tag_id | string | Tag ID to update. | Yes | #### TextToAudioPayload | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| message_id | string | Message ID | No | +| message_id | string | Message ID. Takes priority over `text` when both are provided. | No | | streaming | boolean | Reserved for compatibility; TTS response streaming is determined by the provider output. | No | -| text | string | Text to convert to audio | No | -| voice | string | Voice to use for TTS | No | +| text | string | Speech content to convert. | No | +| voice | string | Voice to use for text-to-speech. Available voices depend on the TTS provider configured for this app. Omit to use the app's configured voice when available; that value is exposed by [Get App Parameters](/api-reference/applications/get-app-parameters) as `text_to_speech.voice`. | No | #### TextToAudioPayloadWithUser | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| message_id | string | Message ID | No | +| message_id | string | Message ID. Takes priority over `text` when both are provided. | No | | streaming | boolean | Reserved for compatibility; TTS response streaming is determined by the provider output. | No | -| text | string | Text to convert to audio | No | -| user | string | End user identifier | No | -| voice | string | Voice to use for TTS | No | +| text | string | Speech content to convert. | No | +| user | string | User identifier, unique within the application. This identifier scopes data access; resources created with one `user` value are only visible when queried with the same `user` value. | No | +| voice | string | Voice to use for text-to-speech. Available voices depend on the TTS provider configured for this app. Omit to use the app's configured voice when available; that value is exposed by [Get App Parameters](/api-reference/applications/get-app-parameters) as `text_to_speech.voice`. | No | #### UrlResponse @@ -4118,23 +4110,23 @@ in form definiton, or a variable while the workflow is running. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| keyword_weight | number | | Yes | +| keyword_weight | number | Weight assigned to keyword search results. | Yes | #### WeightModel | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| keyword_setting | [WeightKeywordSetting](#weightkeywordsetting) | | No | -| vector_setting | [WeightVectorSetting](#weightvectorsetting) | | No | -| weight_type | string | | No | +| keyword_setting | [WeightKeywordSetting](#weightkeywordsetting) | Keyword search weight settings. | No | +| vector_setting | [WeightVectorSetting](#weightvectorsetting) | Semantic search weight settings. | No | +| weight_type | string | Strategy for balancing semantic and keyword search weights. | No | #### WeightVectorSetting | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| embedding_model_name | string | | Yes | -| embedding_provider_name | string | | Yes | -| vector_weight | number | | Yes | +| embedding_model_name | string | Name of the embedding model used for vector search. | Yes | +| embedding_provider_name | string | Provider of the embedding model used for vector search. | Yes | +| vector_weight | number | Weight assigned to semantic vector search results. | Yes | #### WorkflowAppLogPaginationResponse @@ -4163,22 +4155,22 @@ in form definiton, or a variable while the workflow is running. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| continue_on_pause | boolean | Keep the stream open across workflow_paused events | No | -| include_state_snapshot | boolean | Replay from persisted state snapshot | No | -| user | string | End user identifier | Yes | +| continue_on_pause | boolean | Set to `true` to keep the stream open across multiple `workflow_paused` events, which is useful when the workflow has more than one Human Input node in sequence. By default, the stream closes after the first pause. | No | +| include_state_snapshot | boolean | When `true`, replay from the persisted state snapshot to include a status summary of already-executed nodes before streaming new events. | No | +| user | string | End-user identifier that originally triggered the run. Must match the creator of the run. | Yes | #### WorkflowLogQuery | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| created_at__after | string | | No | -| created_at__before | string | | No | -| created_by_account | string | | No | -| created_by_end_user_session_id | string | | No | -| keyword | string | | No | -| limit | integer,
**Default:** 20 | | No | -| page | integer,
**Default:** 1 | | No | -| status | string | | No | +| created_at__after | string | Filter logs created after this ISO 8601 timestamp. | No | +| created_at__before | string | Filter logs created before this ISO 8601 timestamp. | No | +| created_by_account | string | Filter by account ID. | No | +| created_by_end_user_session_id | string | Filter by end user session ID. | No | +| keyword | string | Keyword to search in logs. | No | +| limit | integer,
**Default:** 20 | Number of items per page. | No | +| page | integer,
**Default:** 1 | Page number for pagination. | No | +| status | string | Filter by execution status. | No | #### WorkflowRunForLogResponse @@ -4200,20 +4192,18 @@ in form definiton, or a variable while the workflow is running. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| files | [ object ] | | No | -| inputs | object | | Yes | -| response_mode | string | | No | -| trace_session_id | string | Trace session ID for observability grouping | No | +| files | [ object ] | File list for workflow system file inputs. Available when file upload is enabled for the workflow. To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned `id` as `upload_file_id` with `transfer_method: local_file`. | No | +| inputs | object | Key-value pairs for workflow input variables. Values for file-type variables should be arrays of file objects with `type`, `transfer_method`, and either `url` or `upload_file_id`. Refer to the `user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) response to discover the variable names and types expected by your app. | Yes | +| response_mode | string | Response mode. Use `blocking` for synchronous responses or `streaming` for Server-Sent Events. When omitted, the request runs in blocking mode. | No | #### WorkflowRunPayloadWithUser | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| files | [ object ] | | No | -| inputs | object | | Yes | -| response_mode | string | | No | -| trace_session_id | string | Trace session ID for observability grouping | No | -| user | string | End user identifier | Yes | +| files | [ object ] | File list for workflow system file inputs. Available when file upload is enabled for the workflow. To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned `id` as `upload_file_id` with `transfer_method: local_file`. | No | +| inputs | object | Key-value pairs for workflow input variables. Values for file-type variables should be arrays of file objects with `type`, `transfer_method`, and either `url` or `upload_file_id`. Refer to the `user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) response to discover the variable names and types expected by your app. | Yes | +| response_mode | string | Response mode. Use `blocking` for synchronous responses or `streaming` for Server-Sent Events. When omitted, the request runs in blocking mode. | No | +| user | string | User identifier, unique within the application. This identifier scopes data access; resources created with one `user` value are only visible when queried with the same `user` value. | Yes | #### WorkflowRunResponse diff --git a/api/openapi/markdown/web-openapi.md b/api/openapi/markdown/web-openapi.md index 33f73dca648..bedaf964748 100644 --- a/api/openapi/markdown/web-openapi.md +++ b/api/openapi/markdown/web-openapi.md @@ -471,9 +471,9 @@ Retrieve paginated list of messages from a conversation in a chat application. | Name | Located in | Description | Required | Schema | | ---- | ---------- | ----------- | -------- | ------ | -| conversation_id | query | Conversation UUID | Yes | string | -| first_id | query | First message ID for pagination | No | string | -| limit | query | Number of messages to return (1-100) | No | integer,
**Default:** 20 | +| conversation_id | query | Conversation ID. | Yes | string | +| first_id | query | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No | string | +| limit | query | Number of chat history messages to return per request. | No | integer,
**Default:** 20 | #### Responses @@ -1091,8 +1091,8 @@ Button styles for user actions. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| auto_generate | boolean | | No | -| name | string | | No | +| auto_generate | boolean | Automatically generate the conversation name. When `true`, the `name` field is ignored. | No | +| name | string | Conversation name. Required when `auto_generate` is `false`. | No | #### EmailCodeLoginSendPayload @@ -1281,7 +1281,7 @@ Parsed multipart form fields for HITL uploads. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| action | string | | Yes | +| action | string | ID of the action button the recipient selected. Must match one of the `id` values from the form's `user_actions` list. | Yes | | inputs | object | Submitted human input values keyed by output variable name. Use a string for paragraph or select input values, a file mapping for file inputs, and a list of file mappings for file-list inputs. Local file mappings use `transfer_method=local_file` with `upload_file_id`; remote file mappings use `transfer_method=remote_url` with `url` or `remote_url`. | Yes | #### HumanInputFormSubmitResponse @@ -1371,8 +1371,8 @@ Parsed multipart form fields for HITL uploads. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| content | string | | No | -| rating | string | | No | +| content | string | Optional text feedback providing additional detail. | No | +| rating | string | Feedback rating. Set to `null` to revoke previously submitted feedback. | No | #### MessageFile @@ -1392,9 +1392,9 @@ Parsed multipart form fields for HITL uploads. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| conversation_id | string | Conversation UUID | Yes | -| first_id | string | First message ID for pagination | No | -| limit | integer,
**Default:** 20 | Number of messages to return (1-100) | No | +| conversation_id | string | Conversation ID. | Yes | +| first_id | string | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No | +| limit | integer,
**Default:** 20 | Number of chat history messages to return per request. | No | #### MessageMoreLikeThisQuery @@ -1631,10 +1631,10 @@ Default configuration for form inputs. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| message_id | string | Message ID | No | +| message_id | string | Message ID. Takes priority over `text` when both are provided. | No | | streaming | boolean | Reserved for compatibility; TTS response streaming is determined by the provider output. | No | -| text | string | Text to convert to audio | No | -| voice | string | Voice to use for TTS | No | +| text | string | Speech content to convert. | No | +| voice | string | Voice to use for text-to-speech. Available voices depend on the TTS provider configured for this app. Omit to use the app's configured voice when available; that value is exposed by [Get App Parameters](/api-reference/applications/get-app-parameters) as `text_to_speech.voice`. | No | #### UserActionConfig @@ -1711,5 +1711,5 @@ in form definiton, or a variable while the workflow is running. | Name | Type | Description | Required | | ---- | ---- | ----------- | -------- | -| files | [ object ] | | No | -| inputs | object | | Yes | +| files | [ object ] | File list for workflow system file inputs. Available when file upload is enabled for the workflow. To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned `id` as `upload_file_id` with `transfer_method: local_file`. | No | +| inputs | object | Key-value pairs for workflow input variables. Values for file-type variables should be arrays of file objects with `type`, `transfer_method`, and either `url` or `upload_file_id`. Refer to the `user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) response to discover the variable names and types expected by your app. | Yes | diff --git a/api/services/entities/knowledge_entities/knowledge_entities.py b/api/services/entities/knowledge_entities/knowledge_entities.py index 1b233076927..8cd99c4a98d 100644 --- a/api/services/entities/knowledge_entities/knowledge_entities.py +++ b/api/services/entities/knowledge_entities/knowledge_entities.py @@ -1,6 +1,6 @@ -from typing import Any, Literal +from typing import Annotated, Any, Literal -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field, WithJsonSchema, field_validator from core.rag.entities import Rule from core.rag.entities.metadata_entities import MetadataFilteringCondition @@ -8,10 +8,79 @@ from core.rag.index_processor.constant.index_type import IndexStructureType from core.rag.retrieval.retrieval_methods import RetrievalMethod from models.enums import ProcessRuleMode +DocForm = Annotated[ + str, + WithJsonSchema({"enum": ["text_model", "hierarchical_model", "qa_model"], "type": "string"}), +] +IndexingTechnique = Annotated[ + str | None, + WithJsonSchema({"anyOf": [{"enum": ["high_quality", "economy"], "type": "string"}, {"type": "null"}]}), +] +KnowledgeProvider = Annotated[ + str, + WithJsonSchema({"enum": ["vendor", "external"], "type": "string"}), +] +RerankingMode = Annotated[ + str | None, + WithJsonSchema({"anyOf": [{"enum": ["reranking_model", "weighted_score"], "type": "string"}, {"type": "null"}]}), +] +SummaryIndexSetting = Annotated[ + dict[str, Any] | None, + WithJsonSchema( + { + "anyOf": [ + { + "properties": { + "enable": {"description": "Whether to enable summary indexing.", "type": "boolean"}, + "model_name": { + "description": "Name of the model used for generating summaries.", + "type": "string", + }, + "model_provider_name": { + "description": "Provider of the summary generation model.", + "type": "string", + }, + "summary_prompt": { + "description": "Custom prompt template for summary generation.", + "type": "string", + }, + }, + "type": "object", + }, + {"type": "null"}, + ] + } + ), +] +ExternalRetrievalModel = Annotated[ + dict[str, Any] | None, + WithJsonSchema( + { + "anyOf": [ + { + "properties": { + "top_k": {"description": "Maximum number of results to return.", "type": "integer"}, + "score_threshold": { + "description": "Minimum similarity score threshold for filtering results.", + "type": "number", + }, + "score_threshold_enabled": { + "description": "Whether score threshold filtering is enabled.", + "type": "boolean", + }, + }, + "type": "object", + }, + {"type": "null"}, + ] + } + ), +] + class RerankingModel(BaseModel): - reranking_provider_name: str | None = None - reranking_model_name: str | None = None + reranking_provider_name: str | None = Field(default=None, description="Provider name of the reranking model.") + reranking_model_name: str | None = Field(default=None, description="Name of the reranking model.") class NotionIcon(BaseModel): @@ -56,36 +125,56 @@ class DataSource(BaseModel): class ProcessRule(BaseModel): - mode: ProcessRuleMode - rules: Rule | None = None + mode: ProcessRuleMode = Field( + description=( + "Processing mode. `automatic` uses built-in rules, `custom` allows manual configuration, and " + "`hierarchical` enables parent-child chunk structure for `doc_form: hierarchical_model`." + ) + ) + rules: Rule | None = Field(default=None, description="Custom processing rules.") class WeightVectorSetting(BaseModel): - vector_weight: float - embedding_provider_name: str - embedding_model_name: str + vector_weight: float = Field(description="Weight assigned to semantic vector search results.") + embedding_provider_name: str = Field(description="Provider of the embedding model used for vector search.") + embedding_model_name: str = Field(description="Name of the embedding model used for vector search.") class WeightKeywordSetting(BaseModel): - keyword_weight: float + keyword_weight: float = Field(description="Weight assigned to keyword search results.") class WeightModel(BaseModel): - weight_type: Literal["semantic_first", "keyword_first", "customized"] | None = None - vector_setting: WeightVectorSetting | None = None - keyword_setting: WeightKeywordSetting | None = None + weight_type: Literal["semantic_first", "keyword_first", "customized"] | None = Field( + default=None, + description="Strategy for balancing semantic and keyword search weights.", + ) + vector_setting: WeightVectorSetting | None = Field(default=None, description="Semantic search weight settings.") + keyword_setting: WeightKeywordSetting | None = Field(default=None, description="Keyword search weight settings.") class RetrievalModel(BaseModel): - search_method: RetrievalMethod - reranking_enable: bool - reranking_model: RerankingModel | None = None - reranking_mode: str | None = None - top_k: int - score_threshold_enabled: bool - score_threshold: float | None = None - weights: WeightModel | None = None - metadata_filtering_conditions: MetadataFilteringCondition | None = None + search_method: RetrievalMethod = Field(description="Search method used for retrieval.") + reranking_enable: bool = Field(description="Whether reranking is enabled.") + reranking_model: RerankingModel | None = Field(default=None, description="Reranking model configuration.") + reranking_mode: RerankingMode = Field( + default=None, + description="Reranking mode. Required when `reranking_enable` is `true`.", + ) + top_k: int = Field(description="Maximum number of results to return.") + score_threshold_enabled: bool = Field(description="Whether score threshold filtering is enabled.") + score_threshold: float | None = Field( + default=None, + description="Minimum similarity score for results. Only effective when score threshold filtering is enabled.", + ) + weights: WeightModel | None = Field(default=None, description="Weight configuration for hybrid search.") + metadata_filtering_conditions: MetadataFilteringCondition | None = Field( + default=None, + description=( + "Restrict retrieval to chunks whose document metadata matches the given conditions. Conditions are " + "evaluated server-side against document metadata fields." + ), + ) class MetaDataConfig(BaseModel): @@ -94,19 +183,51 @@ class MetaDataConfig(BaseModel): class KnowledgeConfig(BaseModel): - original_document_id: str | None = None - duplicate: bool = True - indexing_technique: Literal["high_quality", "economy"] - data_source: DataSource | None = None - process_rule: ProcessRule | None = None - retrieval_model: RetrievalModel | None = None - summary_index_setting: dict[str, Any] | None = Field(default=None) - doc_form: str = "text_model" - doc_language: str = "English" - embedding_model: str | None = None - embedding_model_provider: str | None = None - name: str | None = None - is_multimodal: bool = False + original_document_id: str | None = Field(default=None, description="Original document ID for replacement updates.") + duplicate: bool = Field(default=True, description="Whether duplicate document content is allowed.") + indexing_technique: Literal["high_quality", "economy"] = Field( + description=( + "`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing. " + "Required when adding the first document to a knowledge base; subsequent documents inherit the " + "knowledge base's indexing technique if omitted." + ) + ) + data_source: DataSource | None = Field(default=None, description="Document data source configuration.") + process_rule: ProcessRule | None = Field(default=None, description="Processing rules for chunking.") + retrieval_model: RetrievalModel | None = Field( + default=None, + description=( + "Retrieval model configuration. Controls how chunks are searched and ranked in this knowledge base." + ), + ) + summary_index_setting: SummaryIndexSetting = Field( + default=None, + description="Summary index configuration.", + ) + doc_form: DocForm = Field( + default="text_model", + description=( + "`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, " + "`qa_model` for question-answer pair extraction." + ), + ) + doc_language: str = Field(default="English", description="Language of the document for processing optimization.") + embedding_model: str | None = Field( + default=None, + description=( + "Embedding model name. Use the `model` field from " + "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." + ), + ) + embedding_model_provider: str | None = Field( + default=None, + description=( + "Embedding model provider. Use the `provider` field from " + "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`." + ), + ) + name: str | None = Field(default=None, description="Document name.") + is_multimodal: bool = Field(default=False, description="Whether the document uses multimodal indexing.") @field_validator("doc_form") @classmethod @@ -122,47 +243,61 @@ class KnowledgeConfig(BaseModel): class SegmentCreateArgs(BaseModel): - content: str | None = None - answer: str | None = None - keywords: list[str] | None = None - attachment_ids: list[str] | None = None + content: str | None = Field(default=None, description="Chunk text content.") + answer: str | None = Field(default=None, description="Answer content for QA mode.") + keywords: list[str] | None = Field(default=None, description="Keywords for the chunk.") + attachment_ids: list[str] | None = Field(default=None, description="Attachment file IDs.") class SegmentUpdateArgs(BaseModel): - content: str | None = None - answer: str | None = None - keywords: list[str] | None = None - regenerate_child_chunks: bool = False - enabled: bool | None = None - attachment_ids: list[str] | None = None - summary: str | None = None # Summary content for summary index + content: str | None = Field(default=None, description="Updated chunk text content.") + answer: str | None = Field(default=None, description="Updated answer content for QA mode.") + keywords: list[str] | None = Field(default=None, description="Updated keywords for the chunk.") + regenerate_child_chunks: bool = Field( + default=False, + description="Whether to regenerate child chunks after updating a parent chunk.", + ) + enabled: bool | None = Field(default=None, description="Whether the chunk is enabled.") + attachment_ids: list[str] | None = Field(default=None, description="Attachment file IDs.") + summary: str | None = Field(default=None, description="Summary content for summary index.") class ChildChunkUpdateArgs(BaseModel): - id: str | None = None - content: str + id: str | None = Field(default=None, description="Existing child chunk ID. Omit to create a new child chunk.") + content: str = Field(description="Child chunk text content.") class MetadataArgs(BaseModel): - type: Literal["string", "number", "time"] - name: str + type: Literal["string", "number", "time"] = Field( + description="`string` for text values, `number` for numeric values, `time` for date/time values." + ) + name: str = Field(description="Metadata field name.") class MetadataUpdateArgs(BaseModel): - name: str - value: str | int | float | None = None + name: str = Field(description="Metadata field name.") + value: str | int | float | None = Field( + default=None, + description="Metadata value. Can be a string, number, or `null`.", + ) class MetadataDetail(BaseModel): - id: str - name: str - value: str | int | float | None = None + id: str = Field(description="Metadata field ID.") + name: str = Field(description="Metadata field name.") + value: str | int | float | None = Field( + default=None, + description="Metadata value. Can be a string, number, or `null`.", + ) class DocumentMetadataOperation(BaseModel): - document_id: str - metadata_list: list[MetadataDetail] - partial_update: bool = False + document_id: str = Field(description="Document ID whose metadata should be updated.") + metadata_list: list[MetadataDetail] = Field(description="Metadata fields to update.") + partial_update: bool = Field( + default=False, + description="Whether to partially update metadata, keeping existing values for unspecified fields.", + ) class MetadataOperationData(BaseModel): @@ -170,4 +305,8 @@ class MetadataOperationData(BaseModel): Metadata operation data """ - operation_data: list[DocumentMetadataOperation] + operation_data: list[DocumentMetadataOperation] = Field( + description=( + "Array of document metadata update operations. Each entry maps a document ID to its metadata values." + ) + ) diff --git a/api/services/rag_pipeline/entity/pipeline_service_api_entities.py b/api/services/rag_pipeline/entity/pipeline_service_api_entities.py index ec25adac8bf..c80b7604099 100644 --- a/api/services/rag_pipeline/entity/pipeline_service_api_entities.py +++ b/api/services/rag_pipeline/entity/pipeline_service_api_entities.py @@ -1,22 +1,142 @@ from collections.abc import Mapping -from typing import Any +from typing import Annotated, Any -from pydantic import BaseModel +from pydantic import BaseModel, Field, WithJsonSchema + +DatasourceType = Annotated[ + str, + WithJsonSchema({"enum": ["local_file", "online_document", "website_crawl", "online_drive"], "type": "string"}), +] +PipelineResponseMode = Annotated[ + str, + WithJsonSchema({"enum": ["streaming", "blocking"], "type": "string"}), +] +DatasourceInfoList = Annotated[ + list[Mapping[str, Any]], + WithJsonSchema( + { + "items": { + "oneOf": [ + { + "properties": { + "reference": { + "description": ( + "Use the `id` returned by the " + "[Upload Pipeline File](/api-reference/knowledge-pipeline/upload-pipeline-file) " + "endpoint. `related_id` is accepted as an alias." + ), + "type": "string", + }, + "name": {"description": "Document title. Defaults to `untitled`.", "type": "string"}, + }, + "required": ["reference"], + "title": "Local File", + "type": "object", + }, + { + "properties": { + "workspace_id": { + "description": "ID of the workspace or database in the external platform.", + "type": "string", + }, + "page": { + "description": "Page details.", + "properties": { + "page_id": {"description": "Page identifier.", "type": "string"}, + "type": { + "description": "Page type defined by the datasource plugin.", + "type": "string", + }, + "page_name": { + "description": "Display name. Defaults to `untitled`.", + "type": "string", + }, + }, + "required": ["page_id", "type"], + "type": "object", + }, + "credential_id": { + "description": ( + "Credential for authenticating with the external platform. If omitted, the " + "provider's default credential is used." + ), + "type": "string", + }, + }, + "required": ["workspace_id", "page"], + "title": "Online Document", + "type": "object", + }, + { + "properties": { + "url": {"description": "URL to crawl.", "type": "string"}, + "title": { + "description": "Used as the document name. Defaults to `untitled`.", + "type": "string", + }, + }, + "required": ["url"], + "title": "Website Crawl", + "type": "object", + }, + { + "properties": { + "id": {"description": "File or folder ID.", "type": "string"}, + "type": { + "description": "Whether this entry is a single file or a folder to expand.", + "enum": ["file", "folder"], + "type": "string", + }, + "bucket": { + "description": ( + "Storage bucket name. Required by some drive providers, such as S3-compatible " + "stores; omit if the provider does not use buckets." + ), + "type": "string", + }, + "name": {"description": "File name. Defaults to `untitled`.", "type": "string"}, + }, + "required": ["id", "type"], + "title": "Online Drive", + "type": "object", + }, + ] + }, + "type": "array", + } + ), +] class DatasourceNodeRunApiEntity(BaseModel): pipeline_id: str node_id: str inputs: dict[str, Any] - datasource_type: str + datasource_type: DatasourceType credential_id: str | None = None is_published: bool class PipelineRunApiEntity(BaseModel): - inputs: Mapping[str, Any] - datasource_type: str - datasource_info_list: list[Mapping[str, Any]] - start_node_id: str - is_published: bool - response_mode: str + inputs: Mapping[str, Any] = Field( + description=( + "Key-value pairs for pipeline input variables defined in the workflow. Pass `{}` if the pipeline has " + "no input variables." + ) + ) + datasource_type: DatasourceType = Field( + description="Type of the datasource. Determines which fields are expected in `datasource_info_list` items." + ) + datasource_info_list: DatasourceInfoList = Field( + description="List of datasource objects to process. The expected item structure depends on `datasource_type`." + ) + start_node_id: str = Field(description="ID of the datasource node where the run starts.") + is_published: bool = Field( + description=( + "Whether to run the published or draft version of the pipeline. `true` runs the latest published " + "version; `false` runs the current draft (useful for testing unpublished changes)." + ) + ) + response_mode: PipelineResponseMode = Field( + description="Response mode. Use `streaming` for SSE or `blocking` for JSON." + ) diff --git a/api/tests/unit_tests/controllers/test_swagger.py b/api/tests/unit_tests/controllers/test_swagger.py index ebdc300a8c6..3667c8ef2df 100644 --- a/api/tests/unit_tests/controllers/test_swagger.py +++ b/api/tests/unit_tests/controllers/test_swagger.py @@ -6,6 +6,32 @@ from collections.abc import Iterator import pytest from flask import Flask +USER_PROPERTY_SCHEMA = { + "description": ( + "User identifier, unique within the application. This identifier scopes data access; resources created with " + "one `user` value are only visible when queried with the same `user` value." + ), + "type": "string", +} +GENERIC_FILE_SCHEMA = {"description": "The file to upload.", "format": "binary", "type": "string"} +DOCUMENT_CREATE_DATA_SCHEMA = { + "description": ( + "JSON string containing configuration. Accepts the same fields as " + "[Create Document by Text](/api-reference/documents/create-document-by-text) (`indexing_technique`, " + "`doc_form`, `doc_language`, `process_rule`, `retrieval_model`, `embedding_model`, " + "`embedding_model_provider`) except `name` and `text`." + ), + "type": "string", +} +DOCUMENT_UPDATE_DATA_SCHEMA = { + "description": ( + "JSON string containing document update settings such as `doc_form`, `doc_language`, `process_rule`, " + "`retrieval_model`, `embedding_model`, and `embedding_model_provider`. `name` and `text` are not used " + "for file updates." + ), + "type": "string", +} + def _schema_refs(value: object) -> set[str]: refs: set[str] = set() @@ -180,11 +206,12 @@ def test_service_document_file_routes_document_multipart_form_data(monkeypatch: create_schema = _multipart_form_schema(create_operation) create_properties = create_schema["properties"] assert isinstance(create_properties, dict) - assert create_properties["file"] == {"type": "string", "format": "binary"} - assert create_properties["data"] == { - "description": "Optional JSON string with document creation settings.", + assert create_properties["file"] == { + "description": "Document file to upload.", + "format": "binary", "type": "string", } + assert create_properties["data"] == DOCUMENT_CREATE_DATA_SCHEMA assert create_schema["required"] == ["file"] assert create_operation["requestBody"]["required"] is True @@ -197,11 +224,12 @@ def test_service_document_file_routes_document_multipart_form_data(monkeypatch: update_schema = _multipart_form_schema(update_operation) update_properties = update_schema["properties"] assert isinstance(update_properties, dict) - assert update_properties["file"] == {"type": "string", "format": "binary"} - assert update_properties["data"] == { - "description": "Optional JSON string with document update settings.", + assert update_properties["file"] == { + "description": "Replacement document file to upload.", + "format": "binary", "type": "string", } + assert update_properties["data"] == DOCUMENT_UPDATE_DATA_SCHEMA assert "required" not in update_schema assert update_operation["requestBody"]["required"] is False @@ -228,7 +256,7 @@ def test_service_openapi_merges_public_api_reference_descriptions(monkeypatch: p rename_operation = payload["paths"]["/conversations/{c_id}/name"]["post"] assert rename_operation["summary"] == "Rename Conversation" assert rename_operation["tags"] == ["Conversations"] - assert _parameters_by_name(rename_operation)["c_id"]["description"] == "Conversation ID" + assert _parameters_by_name(rename_operation)["c_id"]["description"] == "Conversation ID." def test_service_document_list_documents_query_params_render(monkeypatch: pytest.MonkeyPatch): @@ -277,7 +305,7 @@ def test_service_openapi_documents_decorator_user_contracts(monkeypatch: pytest. ) for path, method in required_json_user_operations: schema = _json_body_schema(payload, paths[path][method]) - assert schema["properties"]["user"] == {"description": "End user identifier", "type": "string"} + assert schema["properties"]["user"] == USER_PROPERTY_SCHEMA assert "user" in schema["required"] optional_json_user_operations = ( @@ -288,7 +316,7 @@ def test_service_openapi_documents_decorator_user_contracts(monkeypatch: pytest. ) for path, method in optional_json_user_operations: schema = _json_body_schema(payload, paths[path][method]) - assert schema["properties"]["user"] == {"description": "End user identifier", "type": "string"} + assert schema["properties"]["user"] == USER_PROPERTY_SCHEMA assert "user" not in schema.get("required", []) messages_params = _parameters_by_name(paths["/messages"]["get"]) @@ -316,12 +344,22 @@ def test_service_openapi_documents_app_multipart_contracts(monkeypatch: pytest.M for path in ("/files/upload", "/audio-to-text"): schema = _multipart_form_schema(paths[path]["post"]) - assert schema["properties"]["file"] == {"format": "binary", "type": "string"} - assert schema["properties"]["user"] == {"description": "End user identifier", "type": "string"} + if path == "/audio-to-text": + assert schema["properties"]["file"] == { + "description": ( + "Audio file to transcribe. Supported MIME types: `audio/mp3`, `audio/mpga`, `audio/m4a`, " + "`audio/wav`, and `audio/amr`. File size limit is `30 MB`." + ), + "format": "binary", + "type": "string", + } + else: + assert schema["properties"]["file"] == GENERIC_FILE_SCHEMA + assert schema["properties"]["user"] == USER_PROPERTY_SCHEMA assert schema["required"] == ["file"] pipeline_schema = _multipart_form_schema(paths["/datasets/pipeline/file-upload"]["post"]) - assert pipeline_schema["properties"]["file"] == {"format": "binary", "type": "string"} + assert pipeline_schema["properties"]["file"] == GENERIC_FILE_SCHEMA assert pipeline_schema["required"] == ["file"] @@ -385,14 +423,14 @@ def test_service_openapi_documents_uuid_params_and_deprecated_routes(monkeypatch dataset_params = _parameters_by_name(paths["/datasets/{dataset_id}"]["get"]) assert dataset_params["dataset_id"]["schema"] == { - "description": "Dataset ID", + "description": "Knowledge base ID.", "format": "uuid", "type": "string", } conversation_params = _parameters_by_name(paths["/conversations/{c_id}"]["delete"]) assert conversation_params["c_id"]["schema"] == { - "description": "Conversation ID", + "description": "Conversation ID.", "format": "uuid", "type": "string", } @@ -447,7 +485,7 @@ def test_service_openapi_documents_conditional_payload_schemas(monkeypatch: pyte assert manual_name_branch["properties"]["name"]["pattern"] == r".*\S.*" assert manual_name_branch["required"] == ["name"] for branch in rename_schema["anyOf"]: - assert branch["properties"]["user"] == {"description": "End user identifier", "type": "string"} + assert branch["properties"]["user"] == USER_PROPERTY_SCHEMA document_update_schema = payload["components"]["schemas"]["DocumentTextUpdate"] with_text_branch, without_text_branch = document_update_schema["anyOf"] diff --git a/packages/contracts/generated/api/console/datasets/types.gen.ts b/packages/contracts/generated/api/console/datasets/types.gen.ts index a24c6cf92ae..987837ab4ed 100644 --- a/packages/contracts/generated/api/console/datasets/types.gen.ts +++ b/packages/contracts/generated/api/console/datasets/types.gen.ts @@ -187,7 +187,7 @@ export type IndexingEstimateResponse = { export type KnowledgeConfig = { data_source?: DataSource | null - doc_form?: string + doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model' doc_language?: string duplicate?: boolean embedding_model?: string | null @@ -199,7 +199,10 @@ export type KnowledgeConfig = { process_rule?: ProcessRule | null retrieval_model?: RetrievalModel | null summary_index_setting?: { - [key: string]: unknown + enable?: boolean + model_name?: string + model_provider_name?: string + summary_prompt?: string } | null } @@ -482,7 +485,9 @@ export type ExternalRetrievalTestResponse export type HitTestingPayload = { attachment_ids?: Array | null external_retrieval_model?: { - [key: string]: unknown + score_threshold?: number + score_threshold_enabled?: boolean + top_k?: number } | null query: string retrieval_model?: RetrievalModel | null @@ -707,7 +712,7 @@ export type ProcessRule = { export type RetrievalModel = { metadata_filtering_conditions?: MetadataFilteringCondition | null reranking_enable: boolean - reranking_mode?: string | null + reranking_mode?: 'reranking_model' | 'weighted_score' | null reranking_model?: RerankingModel | null score_threshold?: number | null score_threshold_enabled: boolean @@ -1035,7 +1040,7 @@ export type WebsiteInfo = { export type PreProcessingRule = { enabled: boolean - id: string + id: 'remove_extra_spaces' | 'remove_stopwords' | 'remove_urls_emails' } export type Segmentation = { @@ -1065,7 +1070,7 @@ export type Condition = { | '≤' | '≥' name: string - value?: string | Array | number | number | null + value?: string | Array | number | null } export type WeightKeywordSetting = { diff --git a/packages/contracts/generated/api/console/datasets/zod.gen.ts b/packages/contracts/generated/api/console/datasets/zod.gen.ts index 9609f8ad310..fc6a33c3e65 100644 --- a/packages/contracts/generated/api/console/datasets/zod.gen.ts +++ b/packages/contracts/generated/api/console/datasets/zod.gen.ts @@ -1176,7 +1176,7 @@ export const zWebsiteInfo = z.object({ */ export const zPreProcessingRule = z.object({ enabled: z.boolean(), - id: z.string(), + id: z.enum(['remove_extra_spaces', 'remove_stopwords', 'remove_urls_emails']), }) /** @@ -1233,7 +1233,7 @@ export const zCondition = z.object({ '≥', ]), name: z.string(), - value: z.union([z.string(), z.array(z.string()), z.int(), z.number()]).nullish(), + value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(), }) /** @@ -1277,7 +1277,7 @@ export const zWeightModel = z.object({ export const zRetrievalModel = z.object({ metadata_filtering_conditions: zMetadataFilteringCondition.nullish(), reranking_enable: z.boolean(), - reranking_mode: z.string().nullish(), + reranking_mode: z.enum(['reranking_model', 'weighted_score']).nullish(), reranking_model: zRerankingModel.nullish(), score_threshold: z.number().nullish(), score_threshold_enabled: z.boolean(), @@ -1291,7 +1291,13 @@ export const zRetrievalModel = z.object({ */ export const zHitTestingPayload = z.object({ attachment_ids: z.array(z.string()).nullish(), - external_retrieval_model: z.record(z.string(), z.unknown()).nullish(), + external_retrieval_model: z + .object({ + score_threshold: z.number().optional(), + score_threshold_enabled: z.boolean().optional(), + top_k: z.int().optional(), + }) + .nullish(), query: z.string().max(250), retrieval_model: zRetrievalModel.nullish(), }) @@ -1451,7 +1457,10 @@ export const zDataSource = z.object({ */ export const zKnowledgeConfig = z.object({ data_source: zDataSource.nullish(), - doc_form: z.string().optional().default('text_model'), + doc_form: z + .enum(['hierarchical_model', 'qa_model', 'text_model']) + .optional() + .default('text_model'), doc_language: z.string().optional().default('English'), duplicate: z.boolean().optional().default(true), embedding_model: z.string().nullish(), @@ -1462,7 +1471,14 @@ export const zKnowledgeConfig = z.object({ original_document_id: z.string().nullish(), process_rule: zProcessRule.nullish(), retrieval_model: zRetrievalModel.nullish(), - summary_index_setting: z.record(z.string(), z.unknown()).nullish(), + summary_index_setting: z + .object({ + enable: z.boolean().optional(), + model_name: z.string().optional(), + model_provider_name: z.string().optional(), + summary_prompt: z.string().optional(), + }) + .nullish(), }) export const zGetDatasetsQuery = z.object({ diff --git a/packages/contracts/generated/api/console/installed-apps/types.gen.ts b/packages/contracts/generated/api/console/installed-apps/types.gen.ts index 9070a3c3d9c..75cc4efa2c4 100644 --- a/packages/contracts/generated/api/console/installed-apps/types.gen.ts +++ b/packages/contracts/generated/api/console/installed-apps/types.gen.ts @@ -156,7 +156,10 @@ export type AudioBinaryResponse = Blob | File export type WorkflowRunPayload = { files?: Array<{ - [key: string]: unknown + transfer_method: 'local_file' | 'remote_url' + type: 'audio' | 'custom' | 'document' | 'image' | 'video' + upload_file_id?: string + url?: string }> | null inputs: { [key: string]: unknown diff --git a/packages/contracts/generated/api/console/installed-apps/zod.gen.ts b/packages/contracts/generated/api/console/installed-apps/zod.gen.ts index bdffbfb6d4d..a9a06fe3f7e 100644 --- a/packages/contracts/generated/api/console/installed-apps/zod.gen.ts +++ b/packages/contracts/generated/api/console/installed-apps/zod.gen.ts @@ -128,7 +128,7 @@ export const zSavedMessageCreatePayload = z.object({ * TextToAudioPayload */ export const zTextToAudioPayload = z.object({ - message_id: z.string().nullish(), + message_id: z.uuid().nullish(), streaming: z.boolean().nullish(), text: z.string().nullish(), voice: z.string().nullish(), @@ -143,7 +143,16 @@ export const zAudioBinaryResponse = z.custom() * WorkflowRunPayload */ export const zWorkflowRunPayload = z.object({ - files: z.array(z.record(z.string(), z.unknown())).nullish(), + files: z + .array( + z.object({ + transfer_method: z.enum(['local_file', 'remote_url']), + type: z.enum(['audio', 'custom', 'document', 'image', 'video']), + upload_file_id: z.string().optional(), + url: z.string().optional(), + }), + ) + .nullish(), inputs: z.record(z.string(), z.unknown()), }) diff --git a/packages/contracts/generated/api/service/types.gen.ts b/packages/contracts/generated/api/service/types.gen.ts index 75d6a4b5a97..687fc29f1db 100644 --- a/packages/contracts/generated/api/service/types.gen.ts +++ b/packages/contracts/generated/api/service/types.gen.ts @@ -103,15 +103,16 @@ export type ChatRequestPayload = { auto_generate_name?: boolean conversation_id?: string | null files?: Array<{ - [key: string]: unknown + transfer_method: 'local_file' | 'remote_url' + type: 'audio' | 'custom' | 'document' | 'image' | 'video' + upload_file_id?: string + url?: string }> | null inputs: { [key: string]: unknown } query: string response_mode?: 'blocking' | 'streaming' | null - retriever_from?: string - trace_session_id?: string | null workflow_id?: string | null } @@ -119,15 +120,16 @@ export type ChatRequestPayloadWithUser = { auto_generate_name?: boolean conversation_id?: string | null files?: Array<{ - [key: string]: unknown + transfer_method: 'local_file' | 'remote_url' + type: 'audio' | 'custom' | 'document' | 'image' | 'video' + upload_file_id?: string + url?: string }> | null inputs: { [key: string]: unknown } query: string response_mode?: 'blocking' | 'streaming' | null - retriever_from?: string - trace_session_id?: string | null user: string workflow_id?: string | null } @@ -171,28 +173,30 @@ export type ChildChunkUpdatePayload = { export type CompletionRequestPayload = { files?: Array<{ - [key: string]: unknown + transfer_method: 'local_file' | 'remote_url' + type: 'audio' | 'custom' | 'document' | 'image' | 'video' + upload_file_id?: string + url?: string }> | null inputs: { [key: string]: unknown } query?: string response_mode?: 'blocking' | 'streaming' | null - retriever_from?: string - trace_session_id?: string | null } export type CompletionRequestPayloadWithUser = { files?: Array<{ - [key: string]: unknown + transfer_method: 'local_file' | 'remote_url' + type: 'audio' | 'custom' | 'document' | 'image' | 'video' + upload_file_id?: string + url?: string }> | null inputs: { [key: string]: unknown } query?: string response_mode?: 'blocking' | 'streaming' | null - retriever_from?: string - trace_session_id?: string | null user: string } @@ -217,7 +221,7 @@ export type Condition = { | '≤' | '≥' name: string - value?: string | Array | number | number | null + value?: string | Array | number | null } export type ConversationInfiniteScrollPagination = { @@ -315,10 +319,13 @@ export type DatasetCreatePayload = { indexing_technique?: 'economy' | 'high_quality' | null name: string permission?: PermissionEnum | null - provider?: string + provider?: 'external' | 'vendor' retrieval_model?: RetrievalModel | null summary_index_setting?: { - [key: string]: unknown + enable?: boolean + model_name?: string + model_provider_name?: string + summary_prompt?: string } | null } @@ -512,12 +519,14 @@ export type DatasetUpdatePayload = { external_knowledge_api_id?: string | null external_knowledge_id?: string | null external_retrieval_model?: { - [key: string]: unknown + score_threshold?: number + score_threshold_enabled?: boolean + top_k?: number } | null indexing_technique?: 'economy' | 'high_quality' | null name?: string | null partial_member_list?: Array<{ - [key: string]: string + user_id?: string }> | null permission?: PermissionEnum | null retrieval_model?: RetrievalModel | null @@ -544,7 +553,7 @@ export type DatasourceCredentialInfoResponse = { export type DatasourceNodeRunPayload = { credential_id?: string | null - datasource_type: string + datasource_type: 'local_file' | 'online_document' | 'online_drive' | 'website_crawl' inputs: { [key: string]: unknown } @@ -626,7 +635,15 @@ export type DocumentListQuery = { keyword?: string | null limit?: number page?: number - status?: string | null + status?: + | 'archived' + | 'available' + | 'disabled' + | 'error' + | 'indexing' + | 'paused' + | 'queuing' + | null } export type DocumentListResponse = { @@ -701,11 +718,11 @@ export type DocumentStatusResponse = { } export type DocumentTextCreatePayload = { - doc_form?: string + doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model' doc_language?: string embedding_model?: string | null embedding_model_provider?: string | null - indexing_technique?: string | null + indexing_technique?: 'economy' | 'high_quality' | null name: string original_document_id?: string | null process_rule?: ProcessRule | null @@ -715,7 +732,7 @@ export type DocumentTextCreatePayload = { export type DocumentTextUpdate = ( | { - doc_form?: string + doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model' doc_language?: string name: string process_rule?: ProcessRule | null @@ -723,7 +740,7 @@ export type DocumentTextUpdate = ( text: string } | { - doc_form?: string + doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model' doc_language?: string name?: string | null process_rule?: ProcessRule | null @@ -731,7 +748,7 @@ export type DocumentTextUpdate = ( text?: null } ) & { - doc_form?: string + doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model' doc_language?: string name?: string | null process_rule?: ProcessRule | null @@ -849,7 +866,9 @@ export type HitTestingFile = { export type HitTestingPayload = { attachment_ids?: Array | null external_retrieval_model?: { - [key: string]: unknown + score_threshold?: number + score_threshold_enabled?: boolean + top_k?: number } | null query: string retrieval_model?: RetrievalModel | null @@ -1146,15 +1165,37 @@ export type Parameters = { export type PermissionEnum = 'all_team_members' | 'only_me' | 'partial_members' export type PipelineRunApiEntity = { - datasource_info_list: Array<{ - [key: string]: unknown - }> - datasource_type: string + datasource_info_list: Array< + | { + name?: string + reference: string + } + | { + credential_id?: string + page: { + page_id: string + page_name?: string + type: string + } + workspace_id: string + } + | { + title?: string + url: string + } + | { + bucket?: string + id: string + name?: string + type: 'file' | 'folder' + } + > + datasource_type: 'local_file' | 'online_document' | 'online_drive' | 'website_crawl' inputs: { [key: string]: unknown } is_published: boolean - response_mode: string + response_mode: 'blocking' | 'streaming' start_node_id: string } @@ -1170,7 +1211,7 @@ export type PipelineUploadFileResponse = { export type PreProcessingRule = { enabled: boolean - id: string + id: 'remove_extra_spaces' | 'remove_stopwords' | 'remove_urls_emails' } export type ProcessRule = { @@ -1231,7 +1272,7 @@ export type RetrievalMethod export type RetrievalModel = { metadata_filtering_conditions?: MetadataFilteringCondition | null reranking_enable: boolean - reranking_mode?: string | null + reranking_mode?: 'reranking_model' | 'weighted_score' | null reranking_model?: RerankingModel | null score_threshold?: number | null score_threshold_enabled: boolean @@ -1578,24 +1619,28 @@ export type WorkflowRunForLogResponse = { export type WorkflowRunPayload = { files?: Array<{ - [key: string]: unknown + transfer_method: 'local_file' | 'remote_url' + type: 'audio' | 'custom' | 'document' | 'image' | 'video' + upload_file_id?: string + url?: string }> | null inputs: { [key: string]: unknown } response_mode?: 'blocking' | 'streaming' | null - trace_session_id?: string | null } export type WorkflowRunPayloadWithUser = { files?: Array<{ - [key: string]: unknown + transfer_method: 'local_file' | 'remote_url' + type: 'audio' | 'custom' | 'document' | 'image' | 'video' + upload_file_id?: string + url?: string }> | null inputs: { [key: string]: unknown } response_mode?: 'blocking' | 'streaming' | null - trace_session_id?: string | null user: string } @@ -1704,7 +1749,7 @@ export type PostAppsAnnotationReplyByActionResponse export type GetAppsAnnotationReplyByActionStatusByJobIdData = { body?: never path: { - action: string + action: 'disable' | 'enable' job_id: string } query?: never @@ -2399,7 +2444,7 @@ export type GetDatasetsByDatasetIdDocumentsData = { keyword?: string limit?: number page?: number - status?: string + status?: 'archived' | 'available' | 'disabled' | 'error' | 'indexing' | 'paused' | 'queuing' } url: '/datasets/{dataset_id}/documents' } @@ -3661,7 +3706,7 @@ export type PostWorkflowsByWorkflowIdRunResponse export type GetWorkspacesCurrentModelsModelTypesByModelTypeData = { body?: never path: { - model_type: string + model_type: 'llm' | 'moderation' | 'rerank' | 'speech2text' | 'text-embedding' | 'tts' } query?: never url: '/workspaces/current/models/model-types/{model_type}' diff --git a/packages/contracts/generated/api/service/zod.gen.ts b/packages/contracts/generated/api/service/zod.gen.ts index efc05030887..5dcac8cf9db 100644 --- a/packages/contracts/generated/api/service/zod.gen.ts +++ b/packages/contracts/generated/api/service/zod.gen.ts @@ -131,12 +131,19 @@ export const zButtonStyle = z.enum(['accent', 'default', 'ghost', 'primary']) export const zChatRequestPayload = z.object({ auto_generate_name: z.boolean().optional().default(true), conversation_id: z.string().nullish(), - files: z.array(z.record(z.string(), z.unknown())).nullish(), + files: z + .array( + z.object({ + transfer_method: z.enum(['local_file', 'remote_url']), + type: z.enum(['audio', 'custom', 'document', 'image', 'video']), + upload_file_id: z.string().optional(), + url: z.string().optional(), + }), + ) + .nullish(), inputs: z.record(z.string(), z.unknown()), query: z.string(), response_mode: z.enum(['blocking', 'streaming']).nullish(), - retriever_from: z.string().optional().default('dev'), - trace_session_id: z.string().nullish(), workflow_id: z.string().nullish(), }) @@ -146,12 +153,19 @@ export const zChatRequestPayload = z.object({ export const zChatRequestPayloadWithUser = z.object({ auto_generate_name: z.boolean().optional().default(true), conversation_id: z.string().nullish(), - files: z.array(z.record(z.string(), z.unknown())).nullish(), + files: z + .array( + z.object({ + transfer_method: z.enum(['local_file', 'remote_url']), + type: z.enum(['audio', 'custom', 'document', 'image', 'video']), + upload_file_id: z.string().optional(), + url: z.string().optional(), + }), + ) + .nullish(), inputs: z.record(z.string(), z.unknown()), query: z.string(), response_mode: z.enum(['blocking', 'streaming']).nullish(), - retriever_from: z.string().optional().default('dev'), - trace_session_id: z.string().nullish(), user: z.string(), workflow_id: z.string().nullish(), }) @@ -215,24 +229,38 @@ export const zChildChunkUpdatePayload = z.object({ * CompletionRequestPayload */ export const zCompletionRequestPayload = z.object({ - files: z.array(z.record(z.string(), z.unknown())).nullish(), + files: z + .array( + z.object({ + transfer_method: z.enum(['local_file', 'remote_url']), + type: z.enum(['audio', 'custom', 'document', 'image', 'video']), + upload_file_id: z.string().optional(), + url: z.string().optional(), + }), + ) + .nullish(), inputs: z.record(z.string(), z.unknown()), query: z.string().optional().default(''), response_mode: z.enum(['blocking', 'streaming']).nullish(), - retriever_from: z.string().optional().default('dev'), - trace_session_id: z.string().nullish(), }) /** * CompletionRequestPayload */ export const zCompletionRequestPayloadWithUser = z.object({ - files: z.array(z.record(z.string(), z.unknown())).nullish(), + files: z + .array( + z.object({ + transfer_method: z.enum(['local_file', 'remote_url']), + type: z.enum(['audio', 'custom', 'document', 'image', 'video']), + upload_file_id: z.string().optional(), + url: z.string().optional(), + }), + ) + .nullish(), inputs: z.record(z.string(), z.unknown()), query: z.string().optional().default(''), response_mode: z.enum(['blocking', 'streaming']).nullish(), - retriever_from: z.string().optional().default('dev'), - trace_session_id: z.string().nullish(), user: z.string(), }) @@ -263,7 +291,7 @@ export const zCondition = z.object({ '≥', ]), name: z.string(), - value: z.union([z.string(), z.array(z.string()), z.int(), z.number()]).nullish(), + value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(), }) /** @@ -657,7 +685,7 @@ export const zDatasourceCredentialInfoResponse = z.object({ */ export const zDatasourceNodeRunPayload = z.object({ credential_id: z.string().nullish(), - datasource_type: z.string(), + datasource_type: z.enum(['local_file', 'online_document', 'online_drive', 'website_crawl']), inputs: z.record(z.string(), z.unknown()), is_published: z.boolean(), }) @@ -710,7 +738,9 @@ export const zDocumentListQuery = z.object({ keyword: z.string().nullish(), limit: z.int().optional().default(20), page: z.int().optional().default(1), - status: z.string().nullish(), + status: z + .enum(['archived', 'available', 'disabled', 'error', 'indexing', 'paused', 'queuing']) + .nullish(), }) /** @@ -1337,11 +1367,37 @@ export const zPermissionEnum = z.enum(['all_team_members', 'only_me', 'partial_m * PipelineRunApiEntity */ export const zPipelineRunApiEntity = z.object({ - datasource_info_list: z.array(z.record(z.string(), z.unknown())), - datasource_type: z.string(), + datasource_info_list: z.array( + z.union([ + z.object({ + name: z.string().optional(), + reference: z.string(), + }), + z.object({ + credential_id: z.string().optional(), + page: z.object({ + page_id: z.string(), + page_name: z.string().optional(), + type: z.string(), + }), + workspace_id: z.string(), + }), + z.object({ + title: z.string().optional(), + url: z.string(), + }), + z.object({ + bucket: z.string().optional(), + id: z.string(), + name: z.string().optional(), + type: z.enum(['file', 'folder']), + }), + ]), + ), + datasource_type: z.enum(['local_file', 'online_document', 'online_drive', 'website_crawl']), inputs: z.record(z.string(), z.unknown()), is_published: z.boolean(), - response_mode: z.string(), + response_mode: z.enum(['blocking', 'streaming']), start_node_id: z.string(), }) @@ -1363,7 +1419,7 @@ export const zPipelineUploadFileResponse = z.object({ */ export const zPreProcessingRule = z.object({ enabled: z.boolean(), - id: z.string(), + id: z.enum(['remove_extra_spaces', 'remove_stopwords', 'remove_urls_emails']), }) /** @@ -1779,7 +1835,7 @@ export const zTagUpdatePayload = z.object({ * TextToAudioPayload */ export const zTextToAudioPayload = z.object({ - message_id: z.string().nullish(), + message_id: z.uuid().nullish(), streaming: z.boolean().nullish(), text: z.string().nullish(), voice: z.string().nullish(), @@ -1789,7 +1845,7 @@ export const zTextToAudioPayload = z.object({ * TextToAudioPayload */ export const zTextToAudioPayloadWithUser = z.object({ - message_id: z.string().nullish(), + message_id: z.uuid().nullish(), streaming: z.boolean().nullish(), text: z.string().nullish(), user: z.string().optional(), @@ -1956,7 +2012,7 @@ export const zWeightModel = z.object({ export const zRetrievalModel = z.object({ metadata_filtering_conditions: zMetadataFilteringCondition.nullish(), reranking_enable: z.boolean(), - reranking_mode: z.string().nullish(), + reranking_mode: z.enum(['reranking_model', 'weighted_score']).nullish(), reranking_model: zRerankingModel.nullish(), score_threshold: z.number().nullish(), score_threshold_enabled: z.boolean(), @@ -1977,9 +2033,16 @@ export const zDatasetCreatePayload = z.object({ indexing_technique: z.enum(['economy', 'high_quality']).nullish(), name: z.string().min(1).max(40), permission: zPermissionEnum.nullish().default('only_me'), - provider: z.string().optional().default('vendor'), + provider: z.enum(['external', 'vendor']).optional().default('vendor'), retrieval_model: zRetrievalModel.nullish(), - summary_index_setting: z.record(z.string(), z.unknown()).nullish(), + summary_index_setting: z + .object({ + enable: z.boolean().optional(), + model_name: z.string().optional(), + model_provider_name: z.string().optional(), + summary_prompt: z.string().optional(), + }) + .nullish(), }) /** @@ -1991,10 +2054,22 @@ export const zDatasetUpdatePayload = z.object({ embedding_model_provider: z.string().nullish(), external_knowledge_api_id: z.string().nullish(), external_knowledge_id: z.string().nullish(), - external_retrieval_model: z.record(z.string(), z.unknown()).nullish(), + external_retrieval_model: z + .object({ + score_threshold: z.number().optional(), + score_threshold_enabled: z.boolean().optional(), + top_k: z.int().optional(), + }) + .nullish(), indexing_technique: z.enum(['economy', 'high_quality']).nullish(), name: z.string().min(1).max(40).nullish(), - partial_member_list: z.array(z.record(z.string(), z.string())).nullish(), + partial_member_list: z + .array( + z.object({ + user_id: z.string().optional(), + }), + ) + .nullish(), permission: zPermissionEnum.nullish(), retrieval_model: zRetrievalModel.nullish(), }) @@ -2003,11 +2078,14 @@ export const zDatasetUpdatePayload = z.object({ * DocumentTextCreatePayload */ export const zDocumentTextCreatePayload = z.object({ - doc_form: z.string().optional().default('text_model'), + doc_form: z + .enum(['hierarchical_model', 'qa_model', 'text_model']) + .optional() + .default('text_model'), doc_language: z.string().optional().default('English'), embedding_model: z.string().nullish(), embedding_model_provider: z.string().nullish(), - indexing_technique: z.string().nullish(), + indexing_technique: z.enum(['economy', 'high_quality']).nullish(), name: z.string(), original_document_id: z.string().nullish(), process_rule: zProcessRule.nullish(), @@ -2018,7 +2096,10 @@ export const zDocumentTextCreatePayload = z.object({ export const zDocumentTextUpdate = z.intersection( z.union([ z.object({ - doc_form: z.string().optional().default('text_model'), + doc_form: z + .enum(['hierarchical_model', 'qa_model', 'text_model']) + .optional() + .default('text_model'), doc_language: z.string().optional().default('English'), name: z.string(), process_rule: zProcessRule.nullish(), @@ -2026,7 +2107,10 @@ export const zDocumentTextUpdate = z.intersection( text: z.string(), }), z.object({ - doc_form: z.string().optional().default('text_model'), + doc_form: z + .enum(['hierarchical_model', 'qa_model', 'text_model']) + .optional() + .default('text_model'), doc_language: z.string().optional().default('English'), name: z.string().nullish(), process_rule: zProcessRule.nullish(), @@ -2035,7 +2119,10 @@ export const zDocumentTextUpdate = z.intersection( }), ]), z.object({ - doc_form: z.string().optional().default('text_model'), + doc_form: z + .enum(['hierarchical_model', 'qa_model', 'text_model']) + .optional() + .default('text_model'), doc_language: z.string().optional().default('English'), name: z.string().nullish(), process_rule: zProcessRule.nullish(), @@ -2049,7 +2136,13 @@ export const zDocumentTextUpdate = z.intersection( */ export const zHitTestingPayload = z.object({ attachment_ids: z.array(z.string()).nullish(), - external_retrieval_model: z.record(z.string(), z.unknown()).nullish(), + external_retrieval_model: z + .object({ + score_threshold: z.number().optional(), + score_threshold_enabled: z.boolean().optional(), + top_k: z.int().optional(), + }) + .nullish(), query: z.string().max(250), retrieval_model: zRetrievalModel.nullish(), }) @@ -2132,20 +2225,36 @@ export const zWorkflowAppLogPaginationResponse = z.object({ * WorkflowRunPayload */ export const zWorkflowRunPayload = z.object({ - files: z.array(z.record(z.string(), z.unknown())).nullish(), + files: z + .array( + z.object({ + transfer_method: z.enum(['local_file', 'remote_url']), + type: z.enum(['audio', 'custom', 'document', 'image', 'video']), + upload_file_id: z.string().optional(), + url: z.string().optional(), + }), + ) + .nullish(), inputs: z.record(z.string(), z.unknown()), response_mode: z.enum(['blocking', 'streaming']).nullish(), - trace_session_id: z.string().nullish(), }) /** * WorkflowRunPayload */ export const zWorkflowRunPayloadWithUser = z.object({ - files: z.array(z.record(z.string(), z.unknown())).nullish(), + files: z + .array( + z.object({ + transfer_method: z.enum(['local_file', 'remote_url']), + type: z.enum(['audio', 'custom', 'document', 'image', 'video']), + upload_file_id: z.string().optional(), + url: z.string().optional(), + }), + ) + .nullish(), inputs: z.record(z.string(), z.unknown()), response_mode: z.enum(['blocking', 'streaming']).nullish(), - trace_session_id: z.string().nullish(), user: z.string(), }) @@ -2231,7 +2340,7 @@ export const zPostAppsAnnotationReplyByActionPath = z.object({ export const zPostAppsAnnotationReplyByActionResponse = zAnnotationJobStatusResponse export const zGetAppsAnnotationReplyByActionStatusByJobIdPath = z.object({ - action: z.string(), + action: z.enum(['disable', 'enable']), job_id: z.uuid(), }) @@ -2553,7 +2662,9 @@ export const zGetDatasetsByDatasetIdDocumentsQuery = z.object({ keyword: z.string().optional(), limit: z.int().optional().default(20), page: z.int().optional().default(1), - status: z.string().optional(), + status: z + .enum(['archived', 'available', 'disabled', 'error', 'indexing', 'paused', 'queuing']) + .optional(), }) /** @@ -3112,8 +3223,8 @@ export const zGetWorkflowByTaskIdEventsQuery = z.object({ export const zGetWorkflowByTaskIdEventsResponse = zEventStreamResponse export const zGetWorkflowsLogsQuery = z.object({ - created_at__after: z.string().optional(), - created_at__before: z.string().optional(), + created_at__after: z.iso.datetime().optional(), + created_at__before: z.iso.datetime().optional(), created_by_account: z.string().optional(), created_by_end_user_session_id: z.string().optional(), keyword: z.string().optional(), @@ -3172,7 +3283,7 @@ export const zPostWorkflowsByWorkflowIdRunPath = z.object({ export const zPostWorkflowsByWorkflowIdRunResponse = zGeneratedAppResponse export const zGetWorkspacesCurrentModelsModelTypesByModelTypePath = z.object({ - model_type: z.string(), + model_type: z.enum(['llm', 'moderation', 'rerank', 'speech2text', 'text-embedding', 'tts']), }) /** diff --git a/packages/contracts/generated/api/web/types.gen.ts b/packages/contracts/generated/api/web/types.gen.ts index 524942838c3..a1f03e0b3c3 100644 --- a/packages/contracts/generated/api/web/types.gen.ts +++ b/packages/contracts/generated/api/web/types.gen.ts @@ -640,7 +640,10 @@ export type WebMessageListItem = { export type WorkflowRunPayload = { files?: Array<{ - [key: string]: unknown + transfer_method: 'local_file' | 'remote_url' + type: 'audio' | 'custom' | 'document' | 'image' | 'video' + upload_file_id?: string + url?: string }> | null inputs: { [key: string]: unknown diff --git a/packages/contracts/generated/api/web/zod.gen.ts b/packages/contracts/generated/api/web/zod.gen.ts index 011a9f83054..8c35ac0ca54 100644 --- a/packages/contracts/generated/api/web/zod.gen.ts +++ b/packages/contracts/generated/api/web/zod.gen.ts @@ -696,7 +696,7 @@ export const zParameters = z.object({ * TextToAudioPayload */ export const zTextToAudioPayload = z.object({ - message_id: z.string().nullish(), + message_id: z.uuid().nullish(), streaming: z.boolean().nullish(), text: z.string().nullish(), voice: z.string().nullish(), @@ -906,7 +906,16 @@ export const zWebMessageInfiniteScrollPagination = z.object({ * WorkflowRunPayload */ export const zWorkflowRunPayload = z.object({ - files: z.array(z.record(z.string(), z.unknown())).nullish(), + files: z + .array( + z.object({ + transfer_method: z.enum(['local_file', 'remote_url']), + type: z.enum(['audio', 'custom', 'document', 'image', 'video']), + upload_file_id: z.string().optional(), + url: z.string().optional(), + }), + ) + .nullish(), inputs: z.record(z.string(), z.unknown()), })