docs: enrich generated service API descriptions (#37615)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-06-22 19:21:13 +08:00 · 2026-06-18 16:43:39 +08:00 · 2026-06-18 16:43:39 +08:00 · c52eafe2ca
commit c52eafe2ca
parent 2f72b576f0
37 changed files with 1992 additions and 918 deletions
--- a/api/controllers/common/controller_schemas.py
+++ b/api/controllers/common/controller_schemas.py
@ -1,8 +1,8 @@
 from copy import deepcopy
-from typing import Any, Literal, override
+from typing import Annotated, Any, Literal, override
 from uuid import UUID

-from pydantic import BaseModel, Field, GetJsonSchemaHandler, model_validator
+from pydantic import BaseModel, Field, GetJsonSchemaHandler, WithJsonSchema, model_validator

 from libs.helper import UUIDStrOrEmpty

@ -10,8 +10,14 @@ from libs.helper import UUIDStrOrEmpty


 class ConversationRenamePayload(BaseModel):
-    name: str | None = None
-    auto_generate: bool = False
+    name: str | None = Field(
+        default=None,
+        description="Conversation name. Required when `auto_generate` is `false`.",
+    )
+    auto_generate: bool = Field(
+        default=False,
+        description="Automatically generate the conversation name. When `true`, the `name` field is ignored.",
+    )

    @classmethod
    @override
@ -64,14 +70,28 @@ class ConversationRenamePayload(BaseModel):


 class MessageListQuery(BaseModel):
-    conversation_id: UUIDStrOrEmpty = Field(description="Conversation UUID")
-    first_id: UUIDStrOrEmpty | None = Field(default=None, description="First message ID for pagination")
-    limit: int = Field(default=20, ge=1, le=100, description="Number of messages to return (1-100)")
+    conversation_id: UUIDStrOrEmpty = Field(description="Conversation ID.")
+    first_id: UUIDStrOrEmpty | None = Field(
+        default=None,
+        description=(
+            "The ID of the first chat record on the current page. Omit this value to fetch the latest messages; "
+            "for subsequent pages, use the first message ID from the current list to fetch older messages."
+        ),
+    )
+    limit: int = Field(
+        default=20,
+        ge=1,
+        le=100,
+        description="Number of chat history messages to return per request.",
+    )


 class MessageFeedbackPayload(BaseModel):
-    rating: Literal["like", "dislike"] | None = None
-    content: str | None = None
+    rating: Literal["like", "dislike"] | None = Field(
+        default=None,
+        description="Feedback rating. Set to `null` to revoke previously submitted feedback.",
+    )
+    content: str | None = Field(default=None, description="Optional text feedback providing additional detail.")


 # --- Saved message schemas ---
@ -88,6 +108,39 @@ class SavedMessageCreatePayload(BaseModel):

 # --- Workflow schemas ---

+WORKFLOW_INPUT_FILE_ITEM_SCHEMA: dict[str, object] = {
+    "type": "object",
+    "required": ["type", "transfer_method"],
+    "properties": {
+        "type": {
+            "description": "File type.",
+            "enum": ["document", "image", "audio", "video", "custom"],
+            "type": "string",
+        },
+        "transfer_method": {
+            "description": "Transfer method: `remote_url` for file URL, `local_file` for uploaded file.",
+            "enum": ["remote_url", "local_file"],
+            "type": "string",
+        },
+        "url": {
+            "description": "File URL when `transfer_method` is `remote_url`.",
+            "format": "url",
+            "type": "string",
+        },
+        "upload_file_id": {
+            "description": (
+                "Uploaded file ID obtained from the [Upload File](/api-reference/files/upload-file) API when "
+                "`transfer_method` is `local_file`."
+            ),
+            "type": "string",
+        },
+    },
+}
+WORKFLOW_INPUT_FILE_LIST_SCHEMA: dict[str, object] = {
+    "anyOf": [{"items": WORKFLOW_INPUT_FILE_ITEM_SCHEMA, "type": "array"}, {"type": "null"}]
+}
+WorkflowInputFileList = Annotated[list[dict[str, Any]] | None, WithJsonSchema(WORKFLOW_INPUT_FILE_LIST_SCHEMA)]
+

 class DefaultBlockConfigQuery(BaseModel):
    q: str | None = None
@ -101,8 +154,22 @@ class WorkflowListQuery(BaseModel):


 class WorkflowRunPayload(BaseModel):
-    inputs: dict[str, Any]
-    files: list[dict[str, Any]] | None = Field(default=None)
+    inputs: dict[str, Any] = Field(
+        description=(
+            "Key-value pairs for workflow input variables. Values for file-type variables should be arrays of "
+            "file objects with `type`, `transfer_method`, and either `url` or `upload_file_id`. Refer to the "
+            "`user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) "
+            "response to discover the variable names and types expected by your app."
+        )
+    )
+    files: WorkflowInputFileList = Field(
+        default=None,
+        description=(
+            "File list for workflow system file inputs. Available when file upload is enabled for the workflow. "
+            "To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use "
+            "the returned `id` as `upload_file_id` with `transfer_method: local_file`."
+        ),
+    )


 class WorkflowUpdatePayload(BaseModel):
@ -117,30 +184,48 @@ DOCUMENT_BATCH_DOWNLOAD_ZIP_MAX_DOCS = 100


 class ChildChunkCreatePayload(BaseModel):
-    content: str
+    content: str = Field(description="Child chunk text content.")


 class ChildChunkUpdatePayload(BaseModel):
-    content: str
+    content: str = Field(description="Child chunk text content.")


 class DocumentBatchDownloadZipPayload(BaseModel):
    """Request payload for bulk downloading documents as a zip archive."""

-    document_ids: list[UUID] = Field(..., min_length=1, max_length=DOCUMENT_BATCH_DOWNLOAD_ZIP_MAX_DOCS)
+    document_ids: list[UUID] = Field(
+        ...,
+        min_length=1,
+        max_length=DOCUMENT_BATCH_DOWNLOAD_ZIP_MAX_DOCS,
+        description="List of document IDs to include in the ZIP download.",
+    )


 class MetadataUpdatePayload(BaseModel):
-    name: str
+    name: str = Field(description="New metadata field name.")


 # --- Audio schemas ---


+UUIDString = Annotated[str, WithJsonSchema({"format": "uuid", "type": "string"})]
+
+
 class TextToAudioPayload(BaseModel):
-    message_id: str | None = Field(default=None, description="Message ID")
-    voice: str | None = Field(default=None, description="Voice to use for TTS")
-    text: str | None = Field(default=None, description="Text to convert to audio")
+    message_id: UUIDString | None = Field(
+        default=None,
+        description="Message ID. Takes priority over `text` when both are provided.",
+    )
+    voice: str | None = Field(
+        default=None,
+        description=(
+            "Voice to use for text-to-speech. Available voices depend on the TTS provider configured for this app. "
+            "Omit to use the app's configured voice when available; that value is exposed by "
+            "[Get App Parameters](/api-reference/applications/get-app-parameters) as `text_to_speech.voice`."
+        ),
+    )
+    text: str | None = Field(default=None, description="Speech content to convert.")
    streaming: bool | None = Field(
        default=None,
        description="Reserved for compatibility; TTS response streaming is determined by the provider output.",
--- a/api/controllers/common/human_input.py
+++ b/api/controllers/common/human_input.py
@ -35,7 +35,12 @@ class HumanInputFormSubmitPayload(BaseModel):
        ),
        examples=[HUMAN_INPUT_FORM_INPUT_EXAMPLE],
    )
-    action: str
+    action: str = Field(
+        description=(
+            "ID of the action button the recipient selected. Must match one of the `id` values from the form's "
+            "`user_actions` list."
+        )
+    )


 def stringify_form_default_values(values: dict[str, object]) -> dict[str, str]:
--- a/api/controllers/console/datasets/hit_testing_base.py
+++ b/api/controllers/console/datasets/hit_testing_base.py
@ -23,17 +23,26 @@ from libs.login import resolve_account_fallback
 from models.account import Account
 from models.dataset import Dataset
 from services.dataset_service import DatasetService
-from services.entities.knowledge_entities.knowledge_entities import RetrievalModel
+from services.entities.knowledge_entities.knowledge_entities import ExternalRetrievalModel, RetrievalModel
 from services.hit_testing_service import HitTestingService

 logger = logging.getLogger(__name__)


 class HitTestingPayload(BaseModel):
-    query: str = Field(max_length=250)
-    retrieval_model: RetrievalModel | None = None
-    external_retrieval_model: dict[str, Any] | None = Field(default=None)
-    attachment_ids: list[str] | None = None
+    query: str = Field(description="Search query text.", max_length=250)
+    retrieval_model: RetrievalModel | None = Field(
+        default=None,
+        description="Retrieval model configuration. Controls how chunks are searched and ranked.",
+    )
+    external_retrieval_model: ExternalRetrievalModel = Field(
+        default=None,
+        description="Retrieval settings for external knowledge bases.",
+    )
+    attachment_ids: list[str] | None = Field(
+        default=None,
+        description="List of attachment IDs to include in the retrieval context.",
+    )


 class DatasetsHitTestingBase:
--- a/api/controllers/service_api/app/annotation.py
+++ b/api/controllers/service_api/app/annotation.py
@ -23,20 +23,25 @@ from services.annotation_service import (


 class AnnotationCreatePayload(BaseModel):
-    question: str = Field(description="Annotation question")
-    answer: str = Field(description="Annotation answer")
+    question: str = Field(description="Annotation question.")
+    answer: str = Field(description="Annotation answer.")


 class AnnotationReplyActionPayload(BaseModel):
-    score_threshold: float = Field(description="Score threshold for annotation matching")
-    embedding_provider_name: str = Field(description="Embedding provider name")
-    embedding_model_name: str = Field(description="Embedding model name")
+    score_threshold: float = Field(
+        description=(
+            "Minimum similarity score for an annotation to be considered a match. Higher values require closer matches."
+        ),
+        json_schema_extra={"format": "float"},
+    )
+    embedding_provider_name: str = Field(description="Name of the embedding model provider.")
+    embedding_model_name: str = Field(description="Name of the embedding model to use for annotation matching.")


 class AnnotationListQuery(BaseModel):
-    page: int = Field(default=1, ge=1, description="Page number")
-    limit: int = Field(default=20, ge=1, description="Number of annotations per page")
-    keyword: str = Field(default="", description="Keyword to search annotations")
+    page: int = Field(default=1, ge=1, description="Page number for pagination.")
+    limit: int = Field(default=20, ge=1, description="Number of items per page.")
+    keyword: str = Field(default="", description="Keyword to filter annotations by question or answer content.")


 class AnnotationJobStatusResponse(ResponseModel):
@ -46,7 +51,7 @@ class AnnotationJobStatusResponse(ResponseModel):


 ANNOTATION_REPLY_ACTION_PARAM = {
-    "description": "Action to perform: 'enable' or 'disable'",
+    "description": "Action to perform: `enable` or `disable`.",
    "enum": ["enable", "disable"],
    "type": "string",
 }
@ -125,7 +130,15 @@ class AnnotationReplyActionStatusApi(Resource):
    )
    @service_api_ns.doc("get_annotation_reply_action_status")
    @service_api_ns.doc(description="Get the status of an annotation reply action job")
-    @service_api_ns.doc(params={"action": "Action type", "job_id": "Job ID"})
+    @service_api_ns.doc(
+        params={
+            "action": ANNOTATION_REPLY_ACTION_PARAM,
+            "job_id": (
+                "Job ID returned by "
+                "[Configure Annotation Reply](/api-reference/annotations/configure-annotation-reply)."
+            ),
+        }
+    )
    @service_api_ns.doc(
        responses={
            200: "Job status retrieved successfully",
@ -248,7 +261,7 @@ class AnnotationUpdateDeleteApi(Resource):
    @service_api_ns.expect(service_api_ns.models[AnnotationCreatePayload.__name__])
    @service_api_ns.doc("update_annotation")
    @service_api_ns.doc(description="Update an existing annotation")
-    @service_api_ns.doc(params={"annotation_id": "Annotation ID"})
+    @service_api_ns.doc(params={"annotation_id": "The unique identifier of the annotation to update."})
    @service_api_ns.doc(
        responses={
            200: "Annotation updated successfully",
@ -284,7 +297,7 @@ class AnnotationUpdateDeleteApi(Resource):
    )
    @service_api_ns.doc("delete_annotation")
    @service_api_ns.doc(description="Delete an annotation")
-    @service_api_ns.doc(params={"annotation_id": "Annotation ID"})
+    @service_api_ns.doc(params={"annotation_id": "The unique identifier of the annotation to delete."})
    @service_api_ns.doc(
        responses={
            204: "Annotation deleted successfully",
--- a/api/controllers/service_api/app/audio.py
+++ b/api/controllers/service_api/app/audio.py
@ -64,7 +64,16 @@ class AudioApi(Resource):
    )
    @service_api_ns.doc("audio_to_text")
    @service_api_ns.doc(description="Convert audio to text using speech-to-text")
-    @service_api_ns.doc(consumes=["multipart/form-data"], params=multipart_file_params(include_user=True))
+    @service_api_ns.doc(
+        consumes=["multipart/form-data"],
+        params=multipart_file_params(
+            include_user=True,
+            file_description=(
+                "Audio file to transcribe. Supported MIME types: `audio/mp3`, `audio/mpga`, `audio/m4a`, "
+                "`audio/wav`, and `audio/amr`. File size limit is `30 MB`."
+            ),
+        ),
+    )
    @service_api_ns.doc(
        responses={
            200: "Audio successfully transcribed",
--- a/api/controllers/service_api/app/completion.py
+++ b/api/controllers/service_api/app/completion.py
@ -5,6 +5,7 @@ from uuid import UUID
 from flask import request
 from flask_restx import Resource
 from pydantic import BaseModel, Field, field_validator
+from pydantic.json_schema import SkipJsonSchema
 from werkzeug.exceptions import BadRequest, InternalServerError, NotFound

 import services
@ -20,7 +21,12 @@ from controllers.service_api.app.error import (
    ProviderNotInitializeError,
    ProviderQuotaExceededError,
 )
-from controllers.service_api.schema import expect_user_json, expect_with_user, json_or_event_stream_response
+from controllers.service_api.schema import (
+    InputFileList,
+    expect_user_json,
+    expect_with_user,
+    json_or_event_stream_response,
+)
 from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
 from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
 from core.app.entities.app_invoke_entities import InvokeFrom
@ -52,24 +58,84 @@ def _resolve_agent_app_streaming(*, app_mode: AppMode, response_mode: str | None


 class CompletionRequestPayload(BaseModel):
-    inputs: dict[str, Any]
-    query: str = Field(default="")
-    files: list[dict[str, Any]] | None = Field(default=None)
-    response_mode: Literal["blocking", "streaming"] | None = None
-    retriever_from: str = Field(default="dev")
-    trace_session_id: str | None = Field(default=None, description="Trace session ID for observability grouping")
+    inputs: dict[str, Any] = Field(
+        description=(
+            "Values for app-defined variables. Refer to the `user_input_form` field in the "
+            "[Get App Parameters](/api-reference/applications/get-app-parameters) response to discover expected "
+            "variable names and types."
+        )
+    )
+    query: str = Field(default="", description="User input or prompt content.")
+    files: InputFileList = Field(
+        default=None,
+        description=(
+            "File list for multimodal understanding, including images, documents, audio, and video. To attach a "
+            "local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned "
+            "`id` as `upload_file_id` with `transfer_method: local_file`."
+        ),
+    )
+    response_mode: Literal["blocking", "streaming"] | None = Field(
+        default=None,
+        description=(
+            "Response mode. `streaming` uses Server-Sent Events; `blocking` returns after completion. When omitted, "
+            "the request runs in blocking mode."
+        ),
+    )
+    retriever_from: SkipJsonSchema[str] = Field(default="dev")
+    trace_session_id: SkipJsonSchema[str | None] = Field(
+        default=None, description="Trace session ID for observability grouping"
+    )


 class ChatRequestPayload(BaseModel):
-    inputs: dict[str, Any]
-    query: str
-    files: list[dict[str, Any]] | None = Field(default=None)
-    response_mode: Literal["blocking", "streaming"] | None = None
-    conversation_id: UUIDStrOrEmpty | None = Field(default=None, description="Conversation UUID")
-    retriever_from: str = Field(default="dev")
-    auto_generate_name: bool = Field(default=True, description="Auto generate conversation name")
-    workflow_id: str | None = Field(default=None, description="Workflow ID for advanced chat")
-    trace_session_id: str | None = Field(default=None, description="Trace session ID for observability grouping")
+    inputs: dict[str, Any] = Field(
+        description=(
+            "Values for app-defined variables. Refer to the `user_input_form` field in the "
+            "[Get App Parameters](/api-reference/applications/get-app-parameters) response to discover expected "
+            "variable names and types."
+        )
+    )
+    query: str = Field(description="User input or question content.")
+    files: InputFileList = Field(
+        default=None,
+        description=(
+            "File list for multimodal understanding, including images, documents, audio, and video. To attach a "
+            "local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned "
+            "`id` as `upload_file_id` with `transfer_method: local_file`."
+        ),
+    )
+    response_mode: Literal["blocking", "streaming"] | None = Field(
+        default=None,
+        description=(
+            "Response mode. `streaming` uses Server-Sent Events; `blocking` returns after completion. New Agent app "
+            "mode supports streaming only. When omitted, non-Agent apps run in blocking mode and new Agent apps stream."
+        ),
+    )
+    conversation_id: UUIDStrOrEmpty | None = Field(
+        default=None,
+        description=(
+            "Conversation ID to continue a conversation. Omit this field or pass an empty string to start a new "
+            "conversation, then pass the returned `conversation_id` in subsequent requests."
+        ),
+    )
+    retriever_from: SkipJsonSchema[str] = Field(default="dev")
+    auto_generate_name: bool = Field(
+        default=True,
+        description=(
+            "Auto-generate the conversation title. If `false`, use the Rename Conversation API with "
+            "`auto_generate: true` to generate the title asynchronously."
+        ),
+    )
+    workflow_id: str | None = Field(
+        default=None,
+        description=(
+            "Published workflow version ID to execute for advanced chat. If omitted, the app's current published "
+            "workflow is used."
+        ),
+    )
+    trace_session_id: SkipJsonSchema[str | None] = Field(
+        default=None, description="Trace session ID for observability grouping"
+    )

    @field_validator("conversation_id", mode="before")
    @classmethod
@ -206,7 +272,9 @@ class CompletionStopApi(Resource):
    @expect_user_json(service_api_ns)
    @service_api_ns.doc("stop_completion")
    @service_api_ns.doc(description="Stop a running completion task")
-    @service_api_ns.doc(params={"task_id": "The ID of the task to stop"})
+    @service_api_ns.doc(
+        params={"task_id": ("Task ID, obtained from a streaming chunk returned by the Send Completion Message API.")}
+    )
    @service_api_ns.doc(
        responses={
            200: "Task stopped successfully",
@ -355,7 +423,9 @@ class ChatStopApi(Resource):
    @expect_user_json(service_api_ns)
    @service_api_ns.doc("stop_chat_message")
    @service_api_ns.doc(description="Stop a running chat message generation")
-    @service_api_ns.doc(params={"task_id": "The ID of the task to stop"})
+    @service_api_ns.doc(
+        params={"task_id": "Task ID, obtained from a streaming chunk returned by the Send Chat Message API."}
+    )
    @service_api_ns.doc(
        responses={
            200: "Task stopped successfully",
--- a/api/controllers/service_api/app/conversation.py
+++ b/api/controllers/service_api/app/conversation.py
@ -30,18 +30,28 @@ from services.conversation_service import ConversationService


 class ConversationListQuery(BaseModel):
-    last_id: UUIDStrOrEmpty | None = Field(default=None, description="Last conversation ID for pagination")
-    limit: int = Field(default=20, ge=1, le=100, description="Number of conversations to return")
+    last_id: UUIDStrOrEmpty | None = Field(
+        default=None,
+        description="The ID of the last record on the current page. Used to fetch the next page.",
+    )
+    limit: int = Field(default=20, ge=1, le=100, description="Number of records to return.")
    sort_by: Literal["created_at", "-created_at", "updated_at", "-updated_at"] = Field(
-        default="-updated_at", description="Sort order for conversations"
+        default="-updated_at",
+        description="Sorting field. Use the `-` prefix for descending order.",
    )


 class ConversationVariablesQuery(BaseModel):
-    last_id: UUIDStrOrEmpty | None = Field(default=None, description="Last variable ID for pagination")
-    limit: int = Field(default=20, ge=1, le=100, description="Number of variables to return")
+    last_id: UUIDStrOrEmpty | None = Field(
+        default=None,
+        description="The ID of the last record on the current page. Used to fetch the next page.",
+    )
+    limit: int = Field(default=20, ge=1, le=100, description="Number of records to return.")
    variable_name: str | None = Field(
-        default=None, description="Filter variables by name", min_length=1, max_length=255
+        default=None,
+        description="Filter variables by a specific name.",
+        min_length=1,
+        max_length=255,
    )

    @field_validator("variable_name", mode="before")
@ -69,7 +79,7 @@ class ConversationVariablesQuery(BaseModel):


 class ConversationVariableUpdatePayload(BaseModel):
-    value: Any
+    value: Any = Field(description="The new value for the variable. Must match the variable's expected type.")


 class ConversationVariableResponse(ResponseModel):
@ -221,7 +231,7 @@ class ConversationDetailApi(Resource):
    @expect_user_json(service_api_ns)
    @service_api_ns.doc("delete_conversation")
    @service_api_ns.doc(description="Delete a specific conversation")
-    @service_api_ns.doc(params={"c_id": "Conversation ID"})
+    @service_api_ns.doc(params={"c_id": "Conversation ID."})
    @service_api_ns.doc(
        responses={
            204: "Conversation deleted successfully",
@ -263,7 +273,7 @@ class ConversationRenameApi(Resource):
    @expect_with_user(service_api_ns, ConversationRenamePayload)
    @service_api_ns.doc("rename_conversation")
    @service_api_ns.doc(description="Rename a conversation or auto-generate a name")
-    @service_api_ns.doc(params={"c_id": "Conversation ID"})
+    @service_api_ns.doc(params={"c_id": "Conversation ID."})
    @service_api_ns.doc(
        responses={
            200: "Conversation renamed successfully",
@ -315,7 +325,7 @@ class ConversationVariablesApi(Resource):
    @service_api_ns.doc(params=query_params_from_model(ConversationVariablesQuery))
    @service_api_ns.doc("list_conversation_variables")
    @service_api_ns.doc(description="List all variables for a conversation")
-    @service_api_ns.doc(params={"c_id": "Conversation ID"})
+    @service_api_ns.doc(params={"c_id": "Conversation ID."})
    @service_api_ns.doc(
        responses={
            200: "Variables retrieved successfully",
@ -375,7 +385,7 @@ class ConversationVariableDetailApi(Resource):
    @expect_with_user(service_api_ns, ConversationVariableUpdatePayload)
    @service_api_ns.doc("update_conversation_variable")
    @service_api_ns.doc(description="Update a conversation variable's value")
-    @service_api_ns.doc(params={"c_id": "Conversation ID", "variable_id": "Variable ID"})
+    @service_api_ns.doc(params={"c_id": "Conversation ID.", "variable_id": "Variable ID."})
    @service_api_ns.doc(
        responses={
            200: "Variable updated successfully",
--- a/api/controllers/service_api/app/file_preview.py
+++ b/api/controllers/service_api/app/file_preview.py
@ -25,7 +25,10 @@ logger = logging.getLogger(__name__)


 class FilePreviewQuery(BaseModel):
-    as_attachment: bool = Field(default=False, description="Download as attachment")
+    as_attachment: bool = Field(
+        default=False,
+        description="If `true`, forces the file to download as an attachment instead of previewing in browser.",
+    )


 register_schema_model(service_api_ns, FilePreviewQuery)
@ -83,7 +86,14 @@ class FilePreviewApi(Resource):
    @binary_response(service_api_ns, FILE_PREVIEW_RESPONSE_MEDIA_TYPES)
    @service_api_ns.doc("preview_file")
    @service_api_ns.doc(description="Preview or download a file uploaded via Service API")
-    @service_api_ns.doc(params={"file_id": "UUID of the file to preview"})
+    @service_api_ns.doc(
+        params={
+            "file_id": (
+                "The unique identifier of the file to preview, obtained from the "
+                "[Upload File](/api-reference/files/upload-file) API response."
+            )
+        }
+    )
    @service_api_ns.doc(
        responses={
            200: "File retrieved successfully",
--- a/api/controllers/service_api/app/message.py
+++ b/api/controllers/service_api/app/message.py
@ -31,8 +31,8 @@ logger = logging.getLogger(__name__)


 class FeedbackListQuery(BaseModel):
-    page: int = Field(default=1, ge=1, description="Page number")
-    limit: int = Field(default=20, ge=1, le=101, description="Number of feedbacks per page")
+    page: int = Field(default=1, ge=1, description="Page number for pagination.")
+    limit: int = Field(default=20, ge=1, le=101, description="Number of records per page.")


 class AppFeedbackResponse(ResponseModel):
@ -142,7 +142,7 @@ class MessageFeedbackApi(Resource):
    @service_api_ns.response(200, "Feedback submitted successfully", service_api_ns.models[ResultResponse.__name__])
    @service_api_ns.doc("create_message_feedback")
    @service_api_ns.doc(description="Submit feedback for a message")
-    @service_api_ns.doc(params={"message_id": "Message ID"})
+    @service_api_ns.doc(params={"message_id": "Message ID."})
    @service_api_ns.doc(
        responses={
            200: "Feedback submitted successfully",
--- a/api/controllers/service_api/app/workflow.py
+++ b/api/controllers/service_api/app/workflow.py
@ -7,6 +7,7 @@ from dateutil.parser import isoparse
 from flask import request
 from flask_restx import Resource, fields
 from pydantic import BaseModel, Field, field_validator
+from pydantic.json_schema import SkipJsonSchema
 from sqlalchemy.orm import sessionmaker
 from werkzeug.exceptions import BadRequest, InternalServerError, NotFound

@ -58,19 +59,41 @@ logger = logging.getLogger(__name__)


 class WorkflowRunPayload(WorkflowRunPayloadBase):
-    response_mode: Literal["blocking", "streaming"] | None = None
-    trace_session_id: str | None = Field(default=None, description="Trace session ID for observability grouping")
+    response_mode: Literal["blocking", "streaming"] | None = Field(
+        default=None,
+        description=(
+            "Response mode. Use `blocking` for synchronous responses or `streaming` for Server-Sent Events. "
+            "When omitted, the request runs in blocking mode."
+        ),
+    )
+    trace_session_id: SkipJsonSchema[str | None] = Field(
+        default=None, description="Trace session ID for observability grouping"
+    )


 class WorkflowLogQuery(BaseModel):
-    keyword: str | None = None
-    status: Literal["succeeded", "failed", "stopped"] | None = None
-    created_at__before: str | None = None
-    created_at__after: str | None = None
-    created_by_end_user_session_id: str | None = None
-    created_by_account: str | None = None
-    page: int = Field(default=1, ge=1, le=99999)
-    limit: int = Field(default=20, ge=1, le=100)
+    keyword: str | None = Field(default=None, description="Keyword to search in logs.")
+    status: Literal["succeeded", "failed", "stopped"] | None = Field(
+        default=None,
+        description="Filter by execution status.",
+    )
+    created_at__before: str | None = Field(
+        default=None,
+        description="Filter logs created before this ISO 8601 timestamp.",
+        json_schema_extra={"format": "date-time"},
+    )
+    created_at__after: str | None = Field(
+        default=None,
+        description="Filter logs created after this ISO 8601 timestamp.",
+        json_schema_extra={"format": "date-time"},
+    )
+    created_by_end_user_session_id: str | None = Field(
+        default=None,
+        description="Filter by end user session ID.",
+    )
+    created_by_account: str | None = Field(default=None, description="Filter by account ID.")
+    page: int = Field(default=1, ge=1, le=99999, description="Page number for pagination.")
+    limit: int = Field(default=20, ge=1, le=100, description="Number of items per page.")


 register_schema_models(service_api_ns, WorkflowRunPayload, WorkflowLogQuery)
@ -226,7 +249,11 @@ class WorkflowRunDetailApi(Resource):
    )
    @service_api_ns.doc("get_workflow_run_detail")
    @service_api_ns.doc(description="Get workflow run details")
-    @service_api_ns.doc(params={"workflow_run_id": "Workflow run ID"})
+    @service_api_ns.doc(
+        params={
+            "workflow_run_id": "Workflow run ID, obtained from the workflow execution response or streaming events."
+        }
+    )
    @service_api_ns.doc(
        responses={
            200: "Workflow run details retrieved successfully",
@ -397,7 +424,14 @@ class WorkflowRunByIdApi(Resource):
    @json_or_event_stream_response(service_api_ns)
    @service_api_ns.doc("run_workflow_by_id")
    @service_api_ns.doc(description="Execute a specific workflow by ID")
-    @service_api_ns.doc(params={"workflow_id": "Workflow ID to execute"})
+    @service_api_ns.doc(
+        params={
+            "workflow_id": (
+                "Workflow ID of the specific version to execute. This value is returned in the `workflow_id` field "
+                "of workflow run responses."
+            )
+        }
+    )
    @service_api_ns.doc(
        responses={
            200: "Workflow executed successfully",
@ -482,7 +516,9 @@ class WorkflowTaskStopApi(Resource):
    @expect_user_json(service_api_ns)
    @service_api_ns.doc("stop_workflow_task")
    @service_api_ns.doc(description="Stop a running workflow task")
-    @service_api_ns.doc(params={"task_id": "Task ID to stop"})
+    @service_api_ns.doc(
+        params={"task_id": "Task ID, obtained from the streaming chunk returned by the Run Workflow API."}
+    )
    @service_api_ns.doc(
        responses={
            200: "Task stopped successfully",
--- a/api/controllers/service_api/app/workflow_events.py
+++ b/api/controllers/service_api/app/workflow_events.py
@ -32,9 +32,25 @@ from services.workflow_event_snapshot_service import build_workflow_event_stream


 class WorkflowEventsQuery(BaseModel):
-    user: str = Field(..., description="End user identifier")
-    include_state_snapshot: bool = Field(default=False, description="Replay from persisted state snapshot")
-    continue_on_pause: bool = Field(default=False, description="Keep the stream open across workflow_paused events")
+    user: str = Field(
+        ...,
+        description="End-user identifier that originally triggered the run. Must match the creator of the run.",
+    )
+    include_state_snapshot: bool = Field(
+        default=False,
+        description=(
+            "When `true`, replay from the persisted state snapshot to include a status summary of already-executed "
+            "nodes before streaming new events."
+        ),
+    )
+    continue_on_pause: bool = Field(
+        default=False,
+        description=(
+            "Set to `true` to keep the stream open across multiple `workflow_paused` events, which is useful when "
+            "the workflow has more than one Human Input node in sequence. By default, the stream closes after the "
+            "first pause."
+        ),
+    )


 register_schema_models(service_api_ns, WorkflowEventsQuery)
@ -65,7 +81,7 @@ class WorkflowEventsApi(Resource):
    @event_stream_response(service_api_ns)
    @service_api_ns.doc("get_workflow_events")
    @service_api_ns.doc(description="Get workflow execution events stream after resume")
-    @service_api_ns.doc(params={"task_id": "Workflow run ID"})
+    @service_api_ns.doc(params={"task_id": "Workflow run ID returned by the original workflow run request."})
    @service_api_ns.doc(params=query_params_from_model(WorkflowEventsQuery))
    @service_api_ns.doc(
        responses={
--- a/api/controllers/service_api/dataset/dataset.py
+++ b/api/controllers/service_api/dataset/dataset.py
@ -1,8 +1,17 @@
-from typing import Any, Literal, override
+from typing import Annotated, Literal, override
 from uuid import UUID

 from flask import request
-from pydantic import BaseModel, ConfigDict, Field, GetJsonSchemaHandler, RootModel, field_validator, model_validator
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    GetJsonSchemaHandler,
+    RootModel,
+    WithJsonSchema,
+    field_validator,
+    model_validator,
+)
 from werkzeug.exceptions import Forbidden, NotFound

 import services
@ -33,7 +42,12 @@ from models.dataset import DatasetPermissionEnum
 from models.enums import TagType
 from models.provider_ids import ModelProviderID
 from services.dataset_service import DatasetPermissionService, DatasetService, DocumentService
-from services.entities.knowledge_entities.knowledge_entities import RetrievalModel
+from services.entities.knowledge_entities.knowledge_entities import (
+    ExternalRetrievalModel,
+    KnowledgeProvider,
+    RetrievalModel,
+    SummaryIndexSetting,
+)
 from services.tag_service import (
    SaveTagPayload,
    TagBindingCreatePayload,
@ -46,37 +60,122 @@ from services.tag_service import (

 register_enum_models(service_api_ns, DatasetPermissionEnum)

+PartialMemberList = Annotated[
+    list[dict[str, str]] | None,
+    WithJsonSchema(
+        {
+            "anyOf": [
+                {
+                    "items": {
+                        "properties": {
+                            "user_id": {
+                                "description": "ID of the team member to grant access.",
+                                "type": "string",
+                            }
+                        },
+                        "type": "object",
+                    },
+                    "type": "array",
+                },
+                {"type": "null"},
+            ]
+        }
+    ),
+]
+

 class DatasetCreatePayload(BaseModel):
-    name: str = Field(..., min_length=1, max_length=40)
-    description: str = Field(default="", description="Dataset description (max 400 chars)", max_length=400)
-    indexing_technique: Literal["high_quality", "economy"] | None = None
-    permission: DatasetPermissionEnum | None = DatasetPermissionEnum.ONLY_ME
-    external_knowledge_api_id: str | None = None
-    provider: str = "vendor"
-    external_knowledge_id: str | None = None
-    retrieval_model: RetrievalModel | None = None
-    embedding_model: str | None = None
-    embedding_model_provider: str | None = None
-    summary_index_setting: dict | None = Field(default=None)
+    name: str = Field(..., min_length=1, max_length=40, description="Name of the knowledge base.")
+    description: str = Field(default="", description="Description of the knowledge base.", max_length=400)
+    indexing_technique: Literal["high_quality", "economy"] | None = Field(
+        default=None,
+        description="`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing.",
+    )
+    permission: DatasetPermissionEnum | None = Field(
+        default=DatasetPermissionEnum.ONLY_ME,
+        description=(
+            "Controls who can access this knowledge base. `only_me` restricts access to the creator, "
+            "`all_team_members` grants workspace-wide access, and `partial_members` grants access to specified "
+            "members."
+        ),
+    )
+    external_knowledge_api_id: str | None = Field(default=None, description="ID of the external knowledge API.")
+    provider: KnowledgeProvider = Field(
+        default="vendor",
+        description="Knowledge base provider: `vendor` for internal knowledge bases, `external` for external ones.",
+    )
+    external_knowledge_id: str | None = Field(default=None, description="ID of the external knowledge base.")
+    retrieval_model: RetrievalModel | None = Field(
+        default=None,
+        description="Retrieval model configuration. Controls how chunks are searched and ranked.",
+    )
+    embedding_model: str | None = Field(
+        default=None,
+        description=(
+            "Embedding model name. Use the `model` field from "
+            "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
+        ),
+    )
+    embedding_model_provider: str | None = Field(
+        default=None,
+        description=(
+            "Embedding model provider. Use the `provider` field from "
+            "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
+        ),
+    )
+    summary_index_setting: SummaryIndexSetting = Field(
+        default=None,
+        description="Summary index configuration.",
+    )


 class DatasetUpdatePayload(BaseModel):
-    name: str | None = Field(default=None, min_length=1, max_length=40)
-    description: str | None = Field(default=None, description="Dataset description (max 400 chars)", max_length=400)
-    indexing_technique: Literal["high_quality", "economy"] | None = None
-    permission: DatasetPermissionEnum | None = None
-    embedding_model: str | None = None
-    embedding_model_provider: str | None = None
-    retrieval_model: RetrievalModel | None = None
-    partial_member_list: list[dict[str, str]] | None = None
-    external_retrieval_model: dict[str, Any] | None = Field(default=None)
-    external_knowledge_id: str | None = None
-    external_knowledge_api_id: str | None = None
+    name: str | None = Field(default=None, min_length=1, max_length=40, description="Name of the knowledge base.")
+    description: str | None = Field(default=None, description="Description of the knowledge base.", max_length=400)
+    indexing_technique: Literal["high_quality", "economy"] | None = Field(
+        default=None,
+        description="`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing.",
+    )
+    permission: DatasetPermissionEnum | None = Field(
+        default=None,
+        description=(
+            "Controls who can access this knowledge base. `only_me` restricts access to the creator, "
+            "`all_team_members` grants workspace-wide access, and `partial_members` grants access to specified "
+            "members."
+        ),
+    )
+    embedding_model: str | None = Field(
+        default=None,
+        description=(
+            "Embedding model name. Use the `model` field from "
+            "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
+        ),
+    )
+    embedding_model_provider: str | None = Field(
+        default=None,
+        description=(
+            "Embedding model provider. Use the `provider` field from "
+            "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
+        ),
+    )
+    retrieval_model: RetrievalModel | None = Field(
+        default=None,
+        description="Retrieval model configuration. Controls how chunks are searched and ranked.",
+    )
+    partial_member_list: PartialMemberList = Field(
+        default=None,
+        description="List of team members with access when `permission` is `partial_members`.",
+    )
+    external_retrieval_model: ExternalRetrievalModel = Field(
+        default=None,
+        description="Retrieval settings for external knowledge bases.",
+    )
+    external_knowledge_id: str | None = Field(default=None, description="ID of the external knowledge base.")
+    external_knowledge_api_id: str | None = Field(default=None, description="ID of the external knowledge API.")


 class DocumentStatusPayload(BaseModel):
-    document_ids: list[str] = Field(default_factory=list, description="Document IDs to update")
+    document_ids: list[str] = Field(default_factory=list, description="List of document IDs to update.")


 DOCUMENT_STATUS_ACTION_PARAM = {
@ -87,7 +186,7 @@ DOCUMENT_STATUS_ACTION_PARAM = {


 class TagNamePayload(BaseModel):
-    name: str = Field(..., min_length=1, max_length=50)
+    name: str = Field(..., min_length=1, max_length=50, description="Tag name.")


 class TagCreatePayload(TagNamePayload):
@ -95,16 +194,16 @@ class TagCreatePayload(TagNamePayload):


 class TagUpdatePayload(TagNamePayload):
-    tag_id: str
+    tag_id: str = Field(description="Tag ID to update.")


 class TagDeletePayload(BaseModel):
-    tag_id: str
+    tag_id: str = Field(description="Tag ID to delete.")


 class TagBindingPayload(BaseModel):
-    tag_ids: list[str]
-    target_id: str
+    tag_ids: list[str] = Field(description="Tag IDs to bind.")
+    target_id: str = Field(description="Knowledge base ID to bind the tags to.")

    @field_validator("tag_ids")
    @classmethod
@ -119,7 +218,7 @@ class TagUnbindingPayload(BaseModel):

    tag_ids: list[str] = Field(default_factory=list)
    tag_id: str | None = None
-    target_id: str
+    target_id: str = Field(description="Knowledge base ID.")

    @classmethod
    @override
@ -134,7 +233,7 @@ class TagUnbindingPayload(BaseModel):
            "minItems": 1,
            "type": "array",
        }
-        target_id_property = {"title": "Target Id", "type": "string"}
+        target_id_property = {"description": "Knowledge base ID.", "title": "Target Id", "type": "string"}
        return {
            "anyOf": [
                {
@ -192,11 +291,14 @@ class KnowledgeTagListResponse(RootModel[list[KnowledgeTagResponse]]):


 class DatasetListQuery(BaseModel):
-    page: int = Field(default=1, description="Page number")
-    limit: int = Field(default=20, description="Number of items per page")
-    keyword: str | None = Field(default=None, description="Search keyword")
-    include_all: bool = Field(default=False, description="Include all datasets")
-    tag_ids: list[str] = Field(default_factory=list, description="Filter by tag IDs")
+    page: int = Field(default=1, description="Page number to retrieve.")
+    limit: int = Field(default=20, description="Number of items per page. Server caps at `100`.")
+    keyword: str | None = Field(default=None, description="Search keyword to filter by name.")
+    include_all: bool = Field(
+        default=False,
+        description="Whether to include all knowledge bases regardless of permissions.",
+    )
+    tag_ids: list[str] = Field(default_factory=list, description="Tag IDs to filter by.")


 class DatasetDetailWithPartialMembersResponse(DatasetDetailResponse):
@ -409,7 +511,7 @@ class DatasetApi(DatasetApiResource):
    )
    @service_api_ns.doc("get_dataset")
    @service_api_ns.doc(description="Get a specific dataset by ID")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(
        responses={
            200: "Dataset retrieved successfully",
@ -488,7 +590,7 @@ class DatasetApi(DatasetApiResource):
    @service_api_ns.expect(service_api_ns.models[DatasetUpdatePayload.__name__])
    @service_api_ns.doc("update_dataset")
    @service_api_ns.doc(description="Update an existing dataset")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(
        responses={
            200: "Dataset updated successfully",
@ -585,7 +687,7 @@ class DatasetApi(DatasetApiResource):
    )
    @service_api_ns.doc("delete_dataset")
    @service_api_ns.doc(description="Delete a dataset")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(
        responses={
            204: "Dataset deleted successfully",
@ -648,7 +750,7 @@ class DocumentStatusApi(DatasetApiResource):
    @service_api_ns.doc(description="Batch update document status")
    @service_api_ns.doc(
        params={
-            "dataset_id": "Dataset ID",
+            "dataset_id": "Knowledge base ID.",
            "action": DOCUMENT_STATUS_ACTION_PARAM,
        }
    )
@ -927,7 +1029,7 @@ class DatasetTagsBindingStatusApi(DatasetApiResource):
    )
    @service_api_ns.doc("get_dataset_tags_binding_status")
    @service_api_ns.doc(description="Get tags bound to a specific dataset")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(
        responses={
            200: "Tags retrieved successfully",
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@ -9,11 +9,11 @@ import json
 from collections.abc import Mapping
 from contextlib import ExitStack
 from copy import deepcopy
-from typing import Any, Literal, Self, override
+from typing import Annotated, Any, Literal, Self, override
 from uuid import UUID

 from flask import request, send_file
-from pydantic import BaseModel, Field, GetJsonSchemaHandler, field_validator, model_validator
+from pydantic import BaseModel, Field, GetJsonSchemaHandler, WithJsonSchema, field_validator, model_validator
 from sqlalchemy import desc, func, select
 from werkzeug.exceptions import Forbidden, NotFound

@ -63,6 +63,8 @@ from models.dataset import Dataset, Document, DocumentSegment
 from models.enums import SegmentStatus
 from services.dataset_service import DatasetService, DocumentService
 from services.entities.knowledge_entities.knowledge_entities import (
+    DocForm,
+    IndexingTechnique,
    KnowledgeConfig,
    ProcessRule,
    RetrievalModel,
@ -72,16 +74,44 @@ from services.summary_index_service import SummaryIndexService


 class DocumentTextCreatePayload(BaseModel):
-    name: str
-    text: str
-    process_rule: ProcessRule | None = None
-    original_document_id: str | None = None
-    doc_form: str = Field(default="text_model")
-    doc_language: str = Field(default="English")
-    indexing_technique: str | None = None
-    retrieval_model: RetrievalModel | None = None
-    embedding_model: str | None = None
-    embedding_model_provider: str | None = None
+    name: str = Field(description="Document name.")
+    text: str = Field(description="Document text content.")
+    process_rule: ProcessRule | None = Field(default=None, description="Processing rules for chunking.")
+    original_document_id: str | None = Field(default=None, description="Original document ID for replacement.")
+    doc_form: DocForm = Field(
+        default="text_model",
+        description=(
+            "`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, "
+            "`qa_model` for question-answer pair extraction."
+        ),
+    )
+    doc_language: str = Field(default="English", description="Language of the document for processing optimization.")
+    indexing_technique: IndexingTechnique = Field(
+        default=None,
+        description=(
+            "`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing. "
+            "Required when adding the first document to a knowledge base; subsequent documents inherit the "
+            "knowledge base's indexing technique if omitted."
+        ),
+    )
+    retrieval_model: RetrievalModel | None = Field(
+        default=None,
+        description="Retrieval model configuration. Controls how chunks are searched and ranked.",
+    )
+    embedding_model: str | None = Field(
+        default=None,
+        description=(
+            "Embedding model name. Use the `model` field from "
+            "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
+        ),
+    )
+    embedding_model_provider: str | None = Field(
+        default=None,
+        description=(
+            "Embedding model provider. Use the `provider` field from "
+            "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
+        ),
+    )

    @field_validator("doc_form")
    @classmethod
@ -92,12 +122,21 @@ class DocumentTextCreatePayload(BaseModel):


 class DocumentTextUpdate(BaseModel):
-    name: str | None = None
-    text: str | None = None
-    process_rule: ProcessRule | None = None
-    doc_form: str = "text_model"
-    doc_language: str = "English"
-    retrieval_model: RetrievalModel | None = None
+    name: str | None = Field(default=None, description="Document name. Required when `text` is provided.")
+    text: str | None = Field(default=None, description="Document text content.")
+    process_rule: ProcessRule | None = Field(default=None, description="Processing rules for chunking.")
+    doc_form: DocForm = Field(
+        default="text_model",
+        description=(
+            "`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, "
+            "`qa_model` for question-answer pair extraction."
+        ),
+    )
+    doc_language: str = Field(default="English", description="Language of the document for processing optimization.")
+    retrieval_model: RetrievalModel | None = Field(
+        default=None,
+        description="Retrieval model configuration. Controls how chunks are searched and ranked.",
+    )

    @field_validator("doc_form")
    @classmethod
@ -119,7 +158,7 @@ class DocumentTextUpdate(BaseModel):
        text_branch_properties["name"] = _non_null_property_schema(properties.get("name"))

        no_text_branch_properties = deepcopy(properties)
-        no_text_branch_properties["text"] = {"type": "null"}
+        no_text_branch_properties["text"] = {"description": "Document text content.", "type": "null"}

        return {
            **schema,
@ -161,19 +200,41 @@ def _non_null_property_schema(property_schema: object) -> dict[str, Any]:
    return deepcopy(property_schema)


+DocumentDisplayStatus = Annotated[
+    str | None,
+    WithJsonSchema(
+        {
+            "anyOf": [
+                {
+                    "enum": ["queuing", "indexing", "paused", "error", "available", "disabled", "archived"],
+                    "type": "string",
+                },
+                {"type": "null"},
+            ]
+        }
+    ),
+]
+
+
 class DocumentListQuery(BaseModel):
-    page: int = Field(default=1, description="Page number")
-    limit: int = Field(default=20, description="Number of items per page")
-    keyword: str | None = Field(default=None, description="Search keyword")
-    status: str | None = Field(default=None, description="Document status filter")
+    page: int = Field(default=1, description="Page number to retrieve.")
+    limit: int = Field(default=20, description="Number of items per page. Server caps at `100`.")
+    keyword: str | None = Field(default=None, description="Search keyword to filter by document name.")
+    status: DocumentDisplayStatus = Field(default=None, description="Filter by display status.")


 class DocumentGetQuery(BaseModel):
-    metadata: Literal["all", "only", "without"] = Field(default="all", description="Metadata response mode")
+    metadata: Literal["all", "only", "without"] = Field(
+        default="all",
+        description=(
+            "`all` returns all fields including metadata. `only` returns only `id`, `doc_type`, and "
+            "`doc_metadata`. `without` returns all fields except `doc_metadata`."
+        ),
+    )


 DOCUMENT_CREATE_BY_FILE_PARAMS = {
-    "dataset_id": "Dataset ID",
+    "dataset_id": "Knowledge base ID.",
    "file": {
        "in": "formData",
        "type": "file",
@ -184,23 +245,32 @@ DOCUMENT_CREATE_BY_FILE_PARAMS = {
        "in": "formData",
        "type": "string",
        "required": False,
-        "description": "Optional JSON string with document creation settings.",
+        "description": (
+            "JSON string containing configuration. Accepts the same fields as "
+            "[Create Document by Text](/api-reference/documents/create-document-by-text) (`indexing_technique`, "
+            "`doc_form`, `doc_language`, `process_rule`, `retrieval_model`, `embedding_model`, "
+            "`embedding_model_provider`) except `name` and `text`."
+        ),
    },
 }
 DOCUMENT_UPDATE_BY_FILE_PARAMS = {
-    "dataset_id": "Dataset ID",
-    "document_id": "Document ID",
+    "dataset_id": "Knowledge base ID.",
+    "document_id": "Document ID.",
    "file": {
        "in": "formData",
        "type": "file",
        "required": False,
-        "description": "Replacement document file.",
+        "description": "Replacement document file to upload.",
    },
    "data": {
        "in": "formData",
        "type": "string",
        "required": False,
-        "description": "Optional JSON string with document update settings.",
+        "description": (
+            "JSON string containing document update settings such as `doc_form`, `doc_language`, `process_rule`, "
+            "`retrieval_model`, `embedding_model`, and `embedding_model_provider`. `name` and `text` are not used "
+            "for file updates."
+        ),
    },
 }

@ -422,7 +492,7 @@ class DocumentAddByTextApi(DatasetApiResource):
    @service_api_ns.expect(service_api_ns.models[DocumentTextCreatePayload.__name__])
    @service_api_ns.doc("create_document_by_text")
    @service_api_ns.doc(description="Create a new document by providing text content")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(
        responses={
            200: "Document created successfully",
@ -454,7 +524,7 @@ class DeprecatedDocumentAddByTextApi(DatasetApiResource):
            "Use /datasets/{dataset_id}/document/create-by-text instead."
        )
    )
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(
        responses={
            200: "Document created successfully",
@ -499,7 +569,7 @@ class DocumentUpdateByTextApi(DatasetApiResource):
    @service_api_ns.expect(service_api_ns.models[DocumentTextUpdate.__name__])
    @service_api_ns.doc("update_document_by_text")
    @service_api_ns.doc(description="Update an existing document by providing text content")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."})
    @service_api_ns.doc(
        responses={
            200: "Document updated successfully",
@ -530,7 +600,7 @@ class DeprecatedDocumentUpdateByTextApi(DatasetApiResource):
            "Use /datasets/{dataset_id}/documents/{document_id}/update-by-text instead."
        )
    )
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."})
    @service_api_ns.doc(
        responses={
            200: "Document updated successfully",
@ -839,7 +909,7 @@ class DocumentListApi(DatasetApiResource):
    )
    @service_api_ns.doc("list_documents")
    @service_api_ns.doc(description="List all documents in a dataset")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID", **query_params_from_model(DocumentListQuery)})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", **query_params_from_model(DocumentListQuery)})
    @service_api_ns.doc(
        responses={
            200: "Documents retrieved successfully",
@ -913,7 +983,7 @@ class DocumentBatchDownloadZipApi(DatasetApiResource):
    @service_api_ns.expect(service_api_ns.models[DocumentBatchDownloadZipPayload.__name__])
    @service_api_ns.doc("download_documents_as_zip")
    @service_api_ns.doc(description="Download selected uploaded documents as a single ZIP archive")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(
        responses={
            200: "ZIP archive generated successfully",
@ -965,7 +1035,7 @@ class DocumentIndexingStatusApi(DatasetApiResource):
    )
    @service_api_ns.doc("get_document_indexing_status")
    @service_api_ns.doc(description="Get indexing status for documents in a batch")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID", "batch": "Batch ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "batch": "Batch ID."})
    @service_api_ns.doc(
        responses={
            200: "Indexing status retrieved successfully",
@ -1047,7 +1117,7 @@ class DocumentDownloadApi(DatasetApiResource):
    )
    @service_api_ns.doc("get_document_download_url")
    @service_api_ns.doc(description="Get a signed download URL for a document's original uploaded file")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."})
    @service_api_ns.doc(
        responses={
            200: "Download URL generated successfully",
@ -1099,7 +1169,7 @@ class DocumentApi(DatasetApiResource):
    )
    @service_api_ns.doc("get_document")
    @service_api_ns.doc(description="Get a specific document by ID")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."})
    @service_api_ns.doc(params=query_params_from_model(DocumentGetQuery))
    @service_api_ns.doc(
        responses={
@ -1251,7 +1321,7 @@ class DocumentApi(DatasetApiResource):
    )
    @service_api_ns.doc("delete_document")
    @service_api_ns.doc(description="Delete a document")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."})
    @service_api_ns.doc(
        responses={
            204: "Document deleted successfully",
--- a/api/controllers/service_api/dataset/hit_testing.py
+++ b/api/controllers/service_api/dataset/hit_testing.py
@ -41,7 +41,7 @@ class HitTestingApi(DatasetApiResource, DatasetsHitTestingBase):
    )
    @service_api_ns.doc("dataset_hit_testing")
    @service_api_ns.doc(description="Perform hit testing on a dataset")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.response(
        200,
        "Hit testing results",
--- a/api/controllers/service_api/dataset/metadata.py
+++ b/api/controllers/service_api/dataset/metadata.py
@ -25,7 +25,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
 from services.metadata_service import MetadataService

 BUILT_IN_METADATA_ACTION_PARAM = {
-    "description": "Action to perform: 'enable' or 'disable'",
+    "description": "`enable` to activate built-in metadata fields, `disable` to deactivate them.",
    "enum": ["enable", "disable"],
    "type": "string",
 }
@ -63,7 +63,7 @@ class DatasetMetadataCreateServiceApi(DatasetApiResource):
    @service_api_ns.expect(service_api_ns.models[MetadataArgs.__name__])
    @service_api_ns.doc("create_dataset_metadata")
    @service_api_ns.doc(description="Create metadata for a dataset")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(
        responses={
            201: "Metadata created successfully",
@ -101,7 +101,7 @@ class DatasetMetadataCreateServiceApi(DatasetApiResource):
    )
    @service_api_ns.doc("get_dataset_metadata")
    @service_api_ns.doc(description="Get all metadata for a dataset")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(
        responses={
            200: "Metadata retrieved successfully",
@ -135,7 +135,7 @@ class DatasetMetadataServiceApi(DatasetApiResource):
    @service_api_ns.expect(service_api_ns.models[MetadataUpdatePayload.__name__])
    @service_api_ns.doc("update_dataset_metadata")
    @service_api_ns.doc(description="Update metadata name")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID", "metadata_id": "Metadata ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "metadata_id": "Metadata field ID."})
    @service_api_ns.doc(
        responses={
            200: "Metadata updated successfully",
@ -174,7 +174,7 @@ class DatasetMetadataServiceApi(DatasetApiResource):
    )
    @service_api_ns.doc("delete_dataset_metadata")
    @service_api_ns.doc(description="Delete metadata")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID", "metadata_id": "Metadata ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "metadata_id": "Metadata field ID."})
    @service_api_ns.doc(
        responses={
            204: "Metadata deleted successfully",
@ -211,6 +211,7 @@ class DatasetMetadataBuiltInFieldServiceApi(DatasetApiResource):
    )
    @service_api_ns.doc("get_built_in_fields")
    @service_api_ns.doc(description="Get all built-in metadata fields")
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(
        responses={
            200: "Built-in fields retrieved successfully",
@ -240,7 +241,7 @@ class DatasetMetadataBuiltInFieldActionServiceApi(DatasetApiResource):
    )
    @service_api_ns.doc("toggle_built_in_field")
    @service_api_ns.doc(description="Enable or disable built-in metadata field")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID", "action": BUILT_IN_METADATA_ACTION_PARAM})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "action": BUILT_IN_METADATA_ACTION_PARAM})
    @service_api_ns.doc(
        responses={
            200: "Action completed successfully",
@ -284,7 +285,7 @@ class DocumentMetadataEditServiceApi(DatasetApiResource):
    @service_api_ns.expect(service_api_ns.models[MetadataOperationData.__name__])
    @service_api_ns.doc("update_documents_metadata")
    @service_api_ns.doc(description="Update metadata for multiple documents")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(
        responses={
            200: "Documents metadata updated successfully",
--- a/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py
+++ b/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py
@ -37,6 +37,7 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError
 from services.file_service import FileService
 from services.rag_pipeline.entity.pipeline_service_api_entities import (
    DatasourceNodeRunApiEntity,
+    DatasourceType,
    PipelineRunApiEntity,
 )
 from services.rag_pipeline.pipeline_generate_service import PipelineGenerateService
@ -44,14 +45,27 @@ from services.rag_pipeline.rag_pipeline import RagPipelineService


 class DatasourceNodeRunPayload(BaseModel):
-    inputs: dict[str, Any]
-    datasource_type: str
-    credential_id: str | None = None
-    is_published: bool
+    inputs: dict[str, Any] = Field(description="Input variables for the datasource node.")
+    datasource_type: DatasourceType = Field(description="Type of the datasource.")
+    credential_id: str | None = Field(
+        default=None, description="Datasource credential ID. Uses the default if omitted."
+    )
+    is_published: bool = Field(
+        description=(
+            "Whether to run the published or draft version of the node. `true` runs the published version, "
+            "`false` runs the draft."
+        )
+    )


 class DatasourcePluginsQuery(BaseModel):
-    is_published: bool = True
+    is_published: bool = Field(
+        default=True,
+        description=(
+            "Whether to retrieve nodes from the published or draft pipeline. `true` returns nodes from the published "
+            "version, `false` returns nodes from the draft."
+        ),
+    )


 class DatasourceCredentialInfoResponse(ResponseModel):
@ -114,11 +128,7 @@ class DatasourcePluginsApi(DatasetApiResource):
    )
    @service_api_ns.doc(shortcut="list_rag_pipeline_datasource_plugins")
    @service_api_ns.doc(description="List all datasource plugins for a rag pipeline")
-    @service_api_ns.doc(
-        path={
-            "dataset_id": "Dataset ID",
-        }
-    )
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(params=query_params_from_model(DatasourcePluginsQuery))
    @service_api_ns.doc(
        responses={
@ -169,11 +179,7 @@ class DatasourceNodeRunApi(DatasetApiResource):
    @event_stream_response(service_api_ns)
    @service_api_ns.doc(shortcut="pipeline_datasource_node_run")
    @service_api_ns.doc(description="Run a datasource node for a rag pipeline")
-    @service_api_ns.doc(
-        path={
-            "dataset_id": "Dataset ID",
-        }
-    )
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "node_id": "ID of the datasource node to execute."})
    @service_api_ns.doc(
        responses={
            200: "Datasource node run successfully",
@ -245,11 +251,7 @@ class PipelineRunApi(DatasetApiResource):
    @json_or_event_stream_response(service_api_ns)
    @service_api_ns.doc(shortcut="pipeline_datasource_node_run")
    @service_api_ns.doc(description="Run a datasource node for a rag pipeline")
-    @service_api_ns.doc(
-        path={
-            "dataset_id": "Dataset ID",
-        }
-    )
+    @service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
    @service_api_ns.doc(
        responses={
            200: "Pipeline run successfully",
--- a/api/controllers/service_api/dataset/segment.py
+++ b/api/controllers/service_api/dataset/segment.py
@ -47,10 +47,10 @@ from services.summary_index_service import SummaryIndexService


 class SegmentCreateItemPayload(BaseModel):
-    content: str = Field(min_length=1)
-    answer: str | None = None
-    keywords: list[str] | None = None
-    attachment_ids: list[str] | None = None
+    content: str = Field(min_length=1, description="Chunk text content.")
+    answer: str | None = Field(default=None, description="Answer content for QA mode.")
+    keywords: list[str] | None = Field(default=None, description="Keywords for the chunk.")
+    attachment_ids: list[str] | None = Field(default=None, description="Attachment file IDs.")

    @field_validator("content")
    @classmethod
@ -61,31 +61,34 @@ class SegmentCreateItemPayload(BaseModel):


 class SegmentCreatePayload(BaseModel):
-    segments: list[SegmentCreateItemPayload] = Field(min_length=1)
+    segments: list[SegmentCreateItemPayload] = Field(min_length=1, description="Array of chunk objects to create.")


 class SegmentListQuery(BaseModel):
-    limit: int = Field(default=20, ge=1)
-    page: int = Field(default=1, ge=1)
-    status: list[str] = Field(default_factory=list)
-    keyword: str | None = None
+    limit: int = Field(default=20, ge=1, description="Number of items per page. Server caps at `100`.")
+    page: int = Field(default=1, ge=1, description="Page number to retrieve.")
+    status: list[str] = Field(
+        default_factory=list,
+        description="Filter chunks by indexing status, such as `completed`, `indexing`, or `error`.",
+    )
+    keyword: str | None = Field(default=None, description="Search keyword.")


 class SegmentUpdatePayload(BaseModel):
-    segment: SegmentUpdateArgs
+    segment: SegmentUpdateArgs = Field(description="Chunk update payload.")


 class ChildChunkListQuery(BaseModel):
-    limit: int = Field(default=20, ge=1)
-    keyword: str | None = None
-    page: int = Field(default=1, ge=1)
+    limit: int = Field(default=20, ge=1, description="Number of items per page. Server caps at `100`.")
+    keyword: str | None = Field(default=None, description="Search keyword.")
+    page: int = Field(default=1, ge=1, description="Page number to retrieve.")


 class SegmentDocParams:
-    DATASET_DOCUMENT = {"dataset_id": "Dataset ID", "document_id": "Document ID"}
-    DATASET_DOCUMENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Segment ID"}
-    DATASET_DOCUMENT_PARENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Parent segment ID"}
-    DATASET_DOCUMENT_CHILD_CHUNK = {**DATASET_DOCUMENT_PARENT_SEGMENT, "child_chunk_id": "Child chunk ID"}
+    DATASET_DOCUMENT = {"dataset_id": "Knowledge base ID.", "document_id": "Document ID."}
+    DATASET_DOCUMENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Chunk ID."}
+    DATASET_DOCUMENT_PARENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Chunk ID."}
+    DATASET_DOCUMENT_CHILD_CHUNK = {**DATASET_DOCUMENT_PARENT_SEGMENT, "child_chunk_id": "Child chunk ID."}


 class SegmentCreateListResponse(ResponseModel):
--- a/api/controllers/service_api/schema.py
+++ b/api/controllers/service_api/schema.py
@ -8,19 +8,69 @@ from __future__ import annotations

 from collections.abc import Sequence
 from copy import deepcopy
-from typing import cast
+from typing import Annotated, Any, cast

 from flask_restx import Namespace
-from pydantic import BaseModel
+from pydantic import BaseModel, WithJsonSchema

-USER_PROPERTY_SCHEMA: dict[str, object] = {"description": "End user identifier", "type": "string"}
-USER_QUERY_PARAM: dict[str, object] = {"description": "End user identifier", "in": "query", "type": "string"}
-USER_FORM_PARAM: dict[str, object] = {"description": "End user identifier", "in": "formData", "type": "string"}
-FILE_FORM_PARAM: dict[str, object] = {"in": "formData", "required": True, "type": "file"}
+USER_DESCRIPTION = (
+    "User identifier, unique within the application. This identifier scopes data access; resources created with "
+    "one `user` value are only visible when queried with the same `user` value."
+)
+USER_PROPERTY_SCHEMA: dict[str, object] = {"description": USER_DESCRIPTION, "type": "string"}
+USER_QUERY_PARAM: dict[str, object] = {
+    "description": "User identifier, used for end-user context.",
+    "in": "query",
+    "type": "string",
+}
+USER_FORM_PARAM: dict[str, object] = {
+    "description": USER_DESCRIPTION,
+    "in": "formData",
+    "type": "string",
+}
+FILE_FORM_PARAM: dict[str, object] = {
+    "description": "The file to upload.",
+    "in": "formData",
+    "required": True,
+    "type": "file",
+}
 USER_FETCH_FROM_ATTR = "_dify_service_api_user_fetch_from"
 USER_REQUIRED_ATTR = "_dify_service_api_user_required"
 JSON_USER_FETCH_FROM = "JSON"

+INPUT_FILE_ITEM_SCHEMA: dict[str, object] = {
+    "type": "object",
+    "required": ["type", "transfer_method"],
+    "properties": {
+        "type": {
+            "description": "File type.",
+            "enum": ["document", "image", "audio", "video", "custom"],
+            "type": "string",
+        },
+        "transfer_method": {
+            "description": "Transfer method: `remote_url` for file URL, `local_file` for uploaded file.",
+            "enum": ["remote_url", "local_file"],
+            "type": "string",
+        },
+        "url": {
+            "description": "File URL when `transfer_method` is `remote_url`.",
+            "format": "url",
+            "type": "string",
+        },
+        "upload_file_id": {
+            "description": (
+                "Uploaded file ID obtained from the [Upload File](/api-reference/files/upload-file) API when "
+                "`transfer_method` is `local_file`."
+            ),
+            "type": "string",
+        },
+    },
+}
+INPUT_FILE_LIST_SCHEMA: dict[str, object] = {
+    "anyOf": [{"items": INPUT_FILE_ITEM_SCHEMA, "type": "array"}, {"type": "null"}]
+}
+InputFileList = Annotated[list[dict[str, Any]] | None, WithJsonSchema(INPUT_FILE_LIST_SCHEMA)]
+

 def expect_with_user(namespace: Namespace, model: type[BaseModel]):
    """Document a JSON request body as ``model`` plus Service API ``user``."""
@ -54,8 +104,12 @@ def expect_user_json(namespace: Namespace):
    return decorator


-def multipart_file_params(*, include_user: bool) -> dict[str, dict[str, object]]:
-    params: dict[str, dict[str, object]] = {"file": FILE_FORM_PARAM}
+def multipart_file_params(*, include_user: bool, file_description: str | None = None) -> dict[str, dict[str, object]]:
+    file_param = deepcopy(FILE_FORM_PARAM)
+    if file_description is not None:
+        file_param["description"] = file_description
+
+    params: dict[str, dict[str, object]] = {"file": file_param}
    if include_user:
        params["user"] = USER_FORM_PARAM
    return deepcopy(params)
--- a/api/controllers/service_api/workspace/models.py
+++ b/api/controllers/service_api/workspace/models.py
@ -9,6 +9,12 @@ from graphon.model_runtime.utils.encoders import jsonable_encoder
 from services.entities.model_provider_entities import ProviderWithModelsResponse
 from services.model_provider_service import ModelProviderService

+MODEL_TYPE_PARAM = {
+    "description": "Type of model to retrieve.",
+    "enum": ["text-embedding", "rerank", "llm", "tts", "speech2text", "moderation"],
+    "type": "string",
+}
+

 class ProviderWithModelsListResponse(ResponseModel):
    data: list[ProviderWithModelsResponse]
@ -32,7 +38,7 @@ class ModelProviderAvailableModelApi(Resource):
    )
    @service_api_ns.doc("get_available_models")
    @service_api_ns.doc(description="Get available models by model type")
-    @service_api_ns.doc(params={"model_type": "Type of model to retrieve"})
+    @service_api_ns.doc(params={"model_type": MODEL_TYPE_PARAM})
    @service_api_ns.doc(
        responses={
            200: "Models retrieved successfully",
--- a/api/core/rag/entities/metadata_entities.py
+++ b/api/core/rag/entities/metadata_entities.py
@ -1,7 +1,7 @@
 from collections.abc import Sequence
-from typing import Literal
+from typing import Annotated, Literal

-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, WithJsonSchema

 SupportedComparisonOperator = Literal[
    # for string or array
@ -26,6 +26,19 @@ SupportedComparisonOperator = Literal[
    "before",
    "after",
 ]
+ConditionValue = Annotated[
+    str | Sequence[str] | None | int | float,
+    WithJsonSchema(
+        {
+            "anyOf": [
+                {"type": "string"},
+                {"items": {"type": "string"}, "type": "array"},
+                {"type": "number"},
+                {"type": "null"},
+            ]
+        }
+    ),
+]


 class Condition(BaseModel):
@ -33,9 +46,23 @@ class Condition(BaseModel):
    Condition detail
    """

-    name: str
-    comparison_operator: SupportedComparisonOperator
-    value: str | Sequence[str] | None | int | float = None
+    name: str = Field(description="Metadata field name to compare against.")
+    comparison_operator: SupportedComparisonOperator = Field(
+        description=(
+            "Comparison to apply. String operators (`contains`, `not contains`, `start with`, `end with`, `is`, "
+            "`is not`, `empty`, `not empty`, `in`, `not in`) act on string or array metadata; numeric operators "
+            "(`=`, `≠`, `>`, `<`, `≥`, `≤`) act on numeric metadata; time operators (`before`, `after`) act on "
+            "time metadata."
+        )
+    )
+    value: ConditionValue = Field(
+        default=None,
+        description=(
+            "Value to compare against. Type depends on `comparison_operator`: string for most string operators, "
+            "array of strings for `in` and `not in`, number for numeric operators, and omit or use `null` for "
+            "`empty` and `not empty`."
+        ),
+    )


 class MetadataFilteringCondition(BaseModel):
@ -43,5 +70,12 @@ class MetadataFilteringCondition(BaseModel):
    Metadata Filtering Condition.
    """

-    logical_operator: Literal["and", "or"] | None = "and"
-    conditions: list[Condition] | None = Field(default=None, deprecated=True)
+    logical_operator: Literal["and", "or"] | None = Field(
+        default="and",
+        description="How to combine multiple conditions.",
+    )
+    conditions: list[Condition] | None = Field(
+        default=None,
+        deprecated=True,
+        description="List of metadata conditions to evaluate.",
+    )
--- a/api/core/rag/entities/processing_entities.py
+++ b/api/core/rag/entities/processing_entities.py
@ -1,7 +1,7 @@
 from enum import StrEnum
-from typing import Literal
+from typing import Annotated, Literal

-from pydantic import BaseModel
+from pydantic import BaseModel, Field, WithJsonSchema


 class ParentMode(StrEnum):
@ -9,19 +9,39 @@ class ParentMode(StrEnum):
    PARAGRAPH = "paragraph"


+PreProcessingRuleID = Annotated[
+    str,
+    WithJsonSchema(
+        {
+            "enum": ["remove_stopwords", "remove_extra_spaces", "remove_urls_emails"],
+            "type": "string",
+        }
+    ),
+]
+
+
 class PreProcessingRule(BaseModel):
-    id: str
-    enabled: bool
+    id: PreProcessingRuleID = Field(description="Rule identifier.")
+    enabled: bool = Field(description="Whether this preprocessing rule is enabled.")


 class Segmentation(BaseModel):
-    separator: str = "\n"
-    max_tokens: int
-    chunk_overlap: int = 0
+    separator: str = Field(default="\n", description="Custom separator for splitting text.")
+    max_tokens: int = Field(description="Maximum token count per chunk.")
+    chunk_overlap: int = Field(default=0, description="Token overlap between chunks.")


 class Rule(BaseModel):
-    pre_processing_rules: list[PreProcessingRule] | None = None
-    segmentation: Segmentation | None = None
-    parent_mode: Literal["full-doc", "paragraph"] | None = None
-    subchunk_segmentation: Segmentation | None = None
+    pre_processing_rules: list[PreProcessingRule] | None = Field(
+        default=None,
+        description="Pre-processing rules to apply before segmentation.",
+    )
+    segmentation: Segmentation | None = Field(default=None, description="Parent chunk segmentation settings.")
+    parent_mode: Literal["full-doc", "paragraph"] | None = Field(
+        default=None,
+        description="Parent-child segmentation mode.",
+    )
+    subchunk_segmentation: Segmentation | None = Field(
+        default=None,
+        description="Child chunk segmentation settings.",
+    )
--- a/api/libs/flask_restx_compat.py
+++ b/api/libs/flask_restx_compat.py
@ -122,6 +122,7 @@ def install_swagger_compatibility() -> None:
    original_description_for = Swagger.description_for
    original_serialize_operation = Swagger.serialize_operation
    original_parameters_and_request_body_for = Swagger.parameters_and_request_body_for
+    original_request_body_from_form_params = Swagger.request_body_from_form_params
    original_as_dict = Swagger.as_dict

    def get_or_create_inline_model(self: Swagger, nested_fields: dict[object, object]) -> object:
@ -203,6 +204,35 @@ def install_swagger_compatibility() -> None:
            path[method] = operation
        return not_none(path)

+    def request_body_from_form_params_with_file_description(self: Swagger, params: list[dict[str, object]]):
+        request_body = original_request_body_from_form_params(self, params)
+        for param in params:
+            if param.get("type") != "file":
+                continue
+
+            name = param.get("name")
+            description = param.get("description")
+            if not isinstance(name, str) or not isinstance(description, str):
+                continue
+
+            content = request_body.get("content")
+            if not isinstance(content, dict):
+                continue
+            multipart = content.get("multipart/form-data")
+            if not isinstance(multipart, dict):
+                continue
+            schema = multipart.get("schema")
+            if not isinstance(schema, dict):
+                continue
+            properties = schema.get("properties")
+            if not isinstance(properties, dict):
+                continue
+            file_schema = properties.get(name)
+            if isinstance(file_schema, dict):
+                file_schema["description"] = description
+
+        return request_body
+
    def as_dict_with_inline_dict_support(self: Swagger):
        # Temporary set RESTX_INCLUDE_ALL_MODELS = false to prevent "length changed while iterating" error
        include_all_models = current_app.config.get("RESTX_INCLUDE_ALL_MODELS", False)
@ -219,5 +249,6 @@ def install_swagger_compatibility() -> None:
    Swagger.description_for = description_for_with_explicit_summary
    Swagger.serialize_operation = serialize_operation_with_explicit_summary_tags
    Swagger.serialize_resource = serialize_resource_with_explicit_operation_tags
+    Swagger.request_body_from_form_params = request_body_from_form_params_with_file_description
    Swagger.as_dict = as_dict_with_inline_dict_support
    Swagger._dify_swagger_compatibility_installed = True
--- a/api/openapi/markdown/console-openapi.md
+++ b/api/openapi/markdown/console-openapi.md
@ -6390,9 +6390,9 @@ Request body:

 | Name | Located in | Description | Required | Schema |
 | ---- | ---------- | ----------- | -------- | ------ |
-| conversation_id | query | Conversation UUID | Yes | string |
-| first_id | query | First message ID for pagination | No | string |
-| limit | query | Number of messages to return (1-100) | No | integer, <br>**Default:** 20 |
+| conversation_id | query | Conversation ID. | Yes | string |
+| first_id | query | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No | string |
+| limit | query | Number of chat history messages to return per request. | No | integer, <br>**Default:** 20 |
 | installed_app_id | path |  | Yes | string (uuid) |

 #### Responses
@ -13352,7 +13352,7 @@ Button styles for user actions.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| content | string |  | Yes |
+| content | string | Child chunk text content. | Yes |

 #### ChildChunkDetailResponse

@ -13395,14 +13395,14 @@ Button styles for user actions.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| content | string |  | Yes |
-| id | string |  | No |
+| content | string | Child chunk text content. | Yes |
+| id | string | Existing child chunk ID. Omit to create a new child chunk. | No |

 #### ChildChunkUpdatePayload

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| content | string |  | Yes |
+| content | string | Child chunk text content. | Yes |

 #### CliToolSuggestion

@ -13560,9 +13560,9 @@ Condition detail

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| comparison_operator | string, <br>**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
-| name | string |  | Yes |
-| value | string<br>[ string ]<br>integer<br>number |  | No |
+| comparison_operator | string, <br>**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | Comparison to apply. String operators (`contains`, `not contains`, `start with`, `end with`, `is`, `is not`, `empty`, `not empty`, `in`, `not in`) act on string or array metadata; numeric operators (`=`, `≠`, `>`, `<`, `≥`, `≤`) act on numeric metadata; time operators (`before`, `after`) act on time metadata.<br>*Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
+| name | string | Metadata field name to compare against. | Yes |
+| value | string<br>[ string ]<br>number | Value to compare against. Type depends on `comparison_operator`: string for most string operators, array of strings for `in` and `not in`, number for numeric operators, and omit or use `null` for `empty` and `not empty`. | No |

 #### ConfigurateMethod

@ -13704,8 +13704,8 @@ Enum class for configurate method of provider model.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| auto_generate | boolean |  | No |
-| name | string |  | No |
+| auto_generate | boolean | Automatically generate the conversation name. When `true`, the `name` field is ignored. | No |
+| name | string | Conversation name. Required when `auto_generate` is `false`. | No |

 #### ConversationVariableResponse

@ -14682,15 +14682,15 @@ Request payload for bulk downloading documents as a zip archive.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| document_ids | [ string (uuid) ] |  | Yes |
+| document_ids | [ string (uuid) ] | List of document IDs to include in the ZIP download. | Yes |

 #### DocumentMetadataOperation

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| document_id | string |  | Yes |
-| metadata_list | [ [MetadataDetail](#metadatadetail) ] |  | Yes |
-| partial_update | boolean |  | No |
+| document_id | string | Document ID whose metadata should be updated. | Yes |
+| metadata_list | [ [MetadataDetail](#metadatadetail) ] | Metadata fields to update. | Yes |
+| partial_update | boolean | Whether to partially update metadata, keeping existing values for unspecified fields. | No |

 #### DocumentMetadataResponse

@ -15472,10 +15472,10 @@ Enum class for form type.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| attachment_ids | [ string ] |  | No |
-| external_retrieval_model | object |  | No |
-| query | string |  | Yes |
-| retrieval_model | [RetrievalModel](#retrievalmodel) |  | No |
+| attachment_ids | [ string ] | List of attachment IDs to include in the retrieval context. | No |
+| external_retrieval_model | object | Retrieval settings for external knowledge bases. | No |
+| query | string | Search query text. | Yes |
+| retrieval_model | [RetrievalModel](#retrievalmodel) | Retrieval model configuration. Controls how chunks are searched and ranked. | No |

 #### HitTestingQuery

@ -15857,19 +15857,19 @@ Input field definition for snippet parameters.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| data_source | [DataSource](#datasource) |  | No |
-| doc_form | string, <br>**Default:** text_model |  | No |
-| doc_language | string, <br>**Default:** English |  | No |
-| duplicate | boolean, <br>**Default:** true |  | No |
-| embedding_model | string |  | No |
-| embedding_model_provider | string |  | No |
-| indexing_technique | string, <br>**Available values:** "economy", "high_quality" | *Enum:* `"economy"`, `"high_quality"` | Yes |
-| is_multimodal | boolean |  | No |
-| name | string |  | No |
-| original_document_id | string |  | No |
-| process_rule | [ProcessRule](#processrule) |  | No |
-| retrieval_model | [RetrievalModel](#retrievalmodel) |  | No |
-| summary_index_setting | object |  | No |
+| data_source | [DataSource](#datasource) | Document data source configuration. | No |
+| doc_form | string, <br>**Available values:** "hierarchical_model", "qa_model", "text_model", <br>**Default:** text_model | `text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, `qa_model` for question-answer pair extraction.<br>*Enum:* `"hierarchical_model"`, `"qa_model"`, `"text_model"` | No |
+| doc_language | string, <br>**Default:** English | Language of the document for processing optimization. | No |
+| duplicate | boolean, <br>**Default:** true | Whether duplicate document content is allowed. | No |
+| embedding_model | string | Embedding model name. Use the `model` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`. | No |
+| embedding_model_provider | string | Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`. | No |
+| indexing_technique | string, <br>**Available values:** "economy", "high_quality" | `high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing. Required when adding the first document to a knowledge base; subsequent documents inherit the knowledge base's indexing technique if omitted.<br>*Enum:* `"economy"`, `"high_quality"` | Yes |
+| is_multimodal | boolean | Whether the document uses multimodal indexing. | No |
+| name | string | Document name. | No |
+| original_document_id | string | Original document ID for replacement updates. | No |
+| process_rule | [ProcessRule](#processrule) | Processing rules for chunking. | No |
+| retrieval_model | [RetrievalModel](#retrievalmodel) | Retrieval model configuration. Controls how chunks are searched and ranked in this knowledge base. | No |
+| summary_index_setting | object | Summary index configuration. | No |

 #### KnowledgePipeline

@ -16142,9 +16142,9 @@ Enum class for large language model mode.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| content | string |  | No |
+| content | string | Optional text feedback providing additional detail. | No |
 | message_id | string | Message ID | Yes |
-| rating | string |  | No |
+| rating | string | Feedback rating. Set to `null` to revoke previously submitted feedback. | No |

 #### MessageFile

@ -16199,24 +16199,24 @@ Enum class for large language model mode.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| conversation_id | string | Conversation UUID | Yes |
-| first_id | string | First message ID for pagination | No |
-| limit | integer, <br>**Default:** 20 | Number of messages to return (1-100) | No |
+| conversation_id | string | Conversation ID. | Yes |
+| first_id | string | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No |
+| limit | integer, <br>**Default:** 20 | Number of chat history messages to return per request. | No |

 #### MetadataArgs

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| name | string |  | Yes |
-| type | string, <br>**Available values:** "number", "string", "time" | *Enum:* `"number"`, `"string"`, `"time"` | Yes |
+| name | string | Metadata field name. | Yes |
+| type | string, <br>**Available values:** "number", "string", "time" | `string` for text values, `number` for numeric values, `time` for date/time values.<br>*Enum:* `"number"`, `"string"`, `"time"` | Yes |

 #### MetadataDetail

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| id | string |  | Yes |
-| name | string |  | Yes |
-| value | string<br>integer<br>number |  | No |
+| id | string | Metadata field ID. | Yes |
+| name | string | Metadata field name. | Yes |
+| value | string<br>integer<br>number | Metadata value. Can be a string, number, or `null`. | No |

 #### MetadataFilteringCondition

@ -16224,8 +16224,8 @@ Metadata Filtering Condition.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| conditions | [ [Condition](#condition) ] |  | No |
-| logical_operator | string |  | No |
+| conditions | [ [Condition](#condition) ] | List of metadata conditions to evaluate. | No |
+| logical_operator | string | How to combine multiple conditions. | No |

 #### MetadataOperationData

@ -16233,13 +16233,13 @@ Metadata operation data

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| operation_data | [ [DocumentMetadataOperation](#documentmetadataoperation) ] |  | Yes |
+| operation_data | [ [DocumentMetadataOperation](#documentmetadataoperation) ] | Array of document metadata update operations. Each entry maps a document ID to its metadata values. | Yes |

 #### MetadataUpdatePayload

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| name | string |  | Yes |
+| name | string | New metadata field name. | Yes |

 #### ModelConfig

@ -17415,8 +17415,8 @@ Shared permission levels for resources (datasets, credentials, etc.)

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| enabled | boolean |  | Yes |
-| id | string |  | Yes |
+| enabled | boolean | Whether this preprocessing rule is enabled. | Yes |
+| id | string, <br>**Available values:** "remove_extra_spaces", "remove_stopwords", "remove_urls_emails" | Rule identifier.<br>*Enum:* `"remove_extra_spaces"`, `"remove_stopwords"`, `"remove_urls_emails"` | Yes |

 #### PreviewDetail

@ -17441,8 +17441,8 @@ Serialized pricing info with codegen-safe decimal string patterns.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| mode | [ProcessRuleMode](#processrulemode) |  | Yes |
-| rules | [Rule](#rule) |  | No |
+| mode | [ProcessRuleMode](#processrulemode) | Processing mode. `automatic` uses built-in rules, `custom` allows manual configuration, and `hierarchical` enables parent-child chunk structure for `doc_form: hierarchical_model`. | Yes |
+| rules | [Rule](#rule) | Custom processing rules. | No |

 #### ProcessRuleMode

@ -17799,8 +17799,8 @@ Model class for provider quota configuration.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| reranking_model_name | string |  | No |
-| reranking_provider_name | string |  | No |
+| reranking_model_name | string | Name of the reranking model. | No |
+| reranking_provider_name | string | Provider name of the reranking model. | No |

 #### RestrictModel

@ -17826,15 +17826,15 @@ Model class for provider quota configuration.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| metadata_filtering_conditions | [MetadataFilteringCondition](#metadatafilteringcondition) |  | No |
-| reranking_enable | boolean |  | Yes |
-| reranking_mode | string |  | No |
-| reranking_model | [RerankingModel](#rerankingmodel) |  | No |
-| score_threshold | number |  | No |
-| score_threshold_enabled | boolean |  | Yes |
-| search_method | [RetrievalMethod](#retrievalmethod) |  | Yes |
-| top_k | integer |  | Yes |
-| weights | [WeightModel](#weightmodel) |  | No |
+| metadata_filtering_conditions | [MetadataFilteringCondition](#metadatafilteringcondition) | Restrict retrieval to chunks whose document metadata matches the given conditions. Conditions are evaluated server-side against document metadata fields. | No |
+| reranking_enable | boolean | Whether reranking is enabled. | Yes |
+| reranking_mode | string | Reranking mode. Required when `reranking_enable` is `true`. | No |
+| reranking_model | [RerankingModel](#rerankingmodel) | Reranking model configuration. | No |
+| score_threshold | number | Minimum similarity score for results. Only effective when score threshold filtering is enabled. | No |
+| score_threshold_enabled | boolean | Whether score threshold filtering is enabled. | Yes |
+| search_method | [RetrievalMethod](#retrievalmethod) | Search method used for retrieval. | Yes |
+| top_k | integer | Maximum number of results to return. | Yes |
+| weights | [WeightModel](#weightmodel) | Weight configuration for hybrid search. | No |

 #### RetrievalSettingResponse

@ -17876,10 +17876,10 @@ Model class for provider quota configuration.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| parent_mode | string |  | No |
-| pre_processing_rules | [ [PreProcessingRule](#preprocessingrule) ] |  | No |
-| segmentation | [Segmentation](#segmentation) |  | No |
-| subchunk_segmentation | [Segmentation](#segmentation) |  | No |
+| parent_mode | string | Parent-child segmentation mode. | No |
+| pre_processing_rules | [ [PreProcessingRule](#preprocessingrule) ] | Pre-processing rules to apply before segmentation. | No |
+| segmentation | [Segmentation](#segmentation) | Parent chunk segmentation settings. | No |
+| subchunk_segmentation | [Segmentation](#segmentation) | Child chunk segmentation settings. | No |

 #### RuleCodeGeneratePayload

@ -18083,10 +18083,10 @@ Model class for provider quota configuration.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| chunk_overlap | integer |  | No |
-| max_tokens | integer |  | Yes |
+| chunk_overlap | integer | Token overlap between chunks. | No |
+| max_tokens | integer | Maximum token count per chunk. | Yes |
 | separator | string, <br>**Default:**
- |  | No |
+ | Custom separator for splitting text. | No |

 #### SelectInputConfig

@ -18690,10 +18690,10 @@ Tag type

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| message_id | string | Message ID | No |
+| message_id | string | Message ID. Takes priority over `text` when both are provided. | No |
 | streaming | boolean | Reserved for compatibility; TTS response streaming is determined by the provider output. | No |
-| text | string | Text to convert to audio | No |
-| voice | string | Voice to use for TTS | No |
+| text | string | Speech content to convert. | No |
+| voice | string | Voice to use for text-to-speech. Available voices depend on the TTS provider configured for this app. Omit to use the app's configured voice when available; that value is exposed by [Get App Parameters](/api-reference/applications/get-app-parameters) as `text_to_speech.voice`. | No |

 #### TextToSpeechPayload

@ -19263,23 +19263,23 @@ in form definiton, or a variable while the workflow is running.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| keyword_weight | number |  | Yes |
+| keyword_weight | number | Weight assigned to keyword search results. | Yes |

 #### WeightModel

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| keyword_setting | [WeightKeywordSetting](#weightkeywordsetting) |  | No |
-| vector_setting | [WeightVectorSetting](#weightvectorsetting) |  | No |
-| weight_type | string |  | No |
+| keyword_setting | [WeightKeywordSetting](#weightkeywordsetting) | Keyword search weight settings. | No |
+| vector_setting | [WeightVectorSetting](#weightvectorsetting) | Semantic search weight settings. | No |
+| weight_type | string | Strategy for balancing semantic and keyword search weights. | No |

 #### WeightVectorSetting

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| embedding_model_name | string |  | Yes |
-| embedding_provider_name | string |  | Yes |
-| vector_weight | number |  | Yes |
+| embedding_model_name | string | Name of the embedding model used for vector search. | Yes |
+| embedding_provider_name | string | Provider of the embedding model used for vector search. | Yes |
+| vector_weight | number | Weight assigned to semantic vector search results. | Yes |

 #### WorkflowAgentBindingType

@ -19975,8 +19975,8 @@ can reuse its existing handler.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| files | [ object ] |  | No |
-| inputs | object |  | Yes |
+| files | [ object ] | File list for workflow system file inputs. Available when file upload is enabled for the workflow. To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned `id` as `upload_file_id` with `transfer_method: local_file`. | No |
+| inputs | object | Key-value pairs for workflow input variables. Values for file-type variables should be arrays of file objects with `type`, `transfer_method`, and either `url` or `upload_file_id`. Refer to the `user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) response to discover the variable names and types expected by your app. | Yes |

 #### WorkflowRunQuery

--- a/api/openapi/markdown/openapi-openapi.md
+++ b/api/openapi/markdown/openapi-openapi.md
@ -792,7 +792,7 @@ Liveness payload for `GET /openapi/v1/_health` — no auth required.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| action | string |  | Yes |
+| action | string | ID of the action button the recipient selected. Must match one of the `id` values from the form's `user_actions` list. | Yes |
 | inputs | object | Submitted human input values keyed by output variable name. Use a string for paragraph or select input values, a file mapping for file inputs, and a list of file mappings for file-list inputs. Local file mappings use `transfer_method=local_file` with `upload_file_id`; remote file mappings use `transfer_method=remote_url` with `url` or `remote_url`. | Yes |

 #### Import
--- a/api/openapi/markdown/service-openapi.md
+++ b/api/openapi/markdown/service-openapi.md
--- a/api/openapi/markdown/web-openapi.md
+++ b/api/openapi/markdown/web-openapi.md
@ -471,9 +471,9 @@ Retrieve paginated list of messages from a conversation in a chat application.

 | Name | Located in | Description | Required | Schema |
 | ---- | ---------- | ----------- | -------- | ------ |
-| conversation_id | query | Conversation UUID | Yes | string |
-| first_id | query | First message ID for pagination | No | string |
-| limit | query | Number of messages to return (1-100) | No | integer, <br>**Default:** 20 |
+| conversation_id | query | Conversation ID. | Yes | string |
+| first_id | query | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No | string |
+| limit | query | Number of chat history messages to return per request. | No | integer, <br>**Default:** 20 |

 #### Responses

@ -1091,8 +1091,8 @@ Button styles for user actions.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| auto_generate | boolean |  | No |
-| name | string |  | No |
+| auto_generate | boolean | Automatically generate the conversation name. When `true`, the `name` field is ignored. | No |
+| name | string | Conversation name. Required when `auto_generate` is `false`. | No |

 #### EmailCodeLoginSendPayload

@ -1281,7 +1281,7 @@ Parsed multipart form fields for HITL uploads.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| action | string |  | Yes |
+| action | string | ID of the action button the recipient selected. Must match one of the `id` values from the form's `user_actions` list. | Yes |
 | inputs | object | Submitted human input values keyed by output variable name. Use a string for paragraph or select input values, a file mapping for file inputs, and a list of file mappings for file-list inputs. Local file mappings use `transfer_method=local_file` with `upload_file_id`; remote file mappings use `transfer_method=remote_url` with `url` or `remote_url`. | Yes |

 #### HumanInputFormSubmitResponse
@ -1371,8 +1371,8 @@ Parsed multipart form fields for HITL uploads.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| content | string |  | No |
-| rating | string |  | No |
+| content | string | Optional text feedback providing additional detail. | No |
+| rating | string | Feedback rating. Set to `null` to revoke previously submitted feedback. | No |

 #### MessageFile

@ -1392,9 +1392,9 @@ Parsed multipart form fields for HITL uploads.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| conversation_id | string | Conversation UUID | Yes |
-| first_id | string | First message ID for pagination | No |
-| limit | integer, <br>**Default:** 20 | Number of messages to return (1-100) | No |
+| conversation_id | string | Conversation ID. | Yes |
+| first_id | string | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No |
+| limit | integer, <br>**Default:** 20 | Number of chat history messages to return per request. | No |

 #### MessageMoreLikeThisQuery

@ -1631,10 +1631,10 @@ Default configuration for form inputs.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| message_id | string | Message ID | No |
+| message_id | string | Message ID. Takes priority over `text` when both are provided. | No |
 | streaming | boolean | Reserved for compatibility; TTS response streaming is determined by the provider output. | No |
-| text | string | Text to convert to audio | No |
-| voice | string | Voice to use for TTS | No |
+| text | string | Speech content to convert. | No |
+| voice | string | Voice to use for text-to-speech. Available voices depend on the TTS provider configured for this app. Omit to use the app's configured voice when available; that value is exposed by [Get App Parameters](/api-reference/applications/get-app-parameters) as `text_to_speech.voice`. | No |

 #### UserActionConfig

@ -1711,5 +1711,5 @@ in form definiton, or a variable while the workflow is running.

 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
-| files | [ object ] |  | No |
-| inputs | object |  | Yes |
+| files | [ object ] | File list for workflow system file inputs. Available when file upload is enabled for the workflow. To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned `id` as `upload_file_id` with `transfer_method: local_file`. | No |
+| inputs | object | Key-value pairs for workflow input variables. Values for file-type variables should be arrays of file objects with `type`, `transfer_method`, and either `url` or `upload_file_id`. Refer to the `user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) response to discover the variable names and types expected by your app. | Yes |
--- a/api/services/entities/knowledge_entities/knowledge_entities.py
+++ b/api/services/entities/knowledge_entities/knowledge_entities.py
@ -1,6 +1,6 @@
-from typing import Any, Literal
+from typing import Annotated, Any, Literal

-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, Field, WithJsonSchema, field_validator

 from core.rag.entities import Rule
 from core.rag.entities.metadata_entities import MetadataFilteringCondition
@ -8,10 +8,79 @@ from core.rag.index_processor.constant.index_type import IndexStructureType
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from models.enums import ProcessRuleMode

+DocForm = Annotated[
+    str,
+    WithJsonSchema({"enum": ["text_model", "hierarchical_model", "qa_model"], "type": "string"}),
+]
+IndexingTechnique = Annotated[
+    str | None,
+    WithJsonSchema({"anyOf": [{"enum": ["high_quality", "economy"], "type": "string"}, {"type": "null"}]}),
+]
+KnowledgeProvider = Annotated[
+    str,
+    WithJsonSchema({"enum": ["vendor", "external"], "type": "string"}),
+]
+RerankingMode = Annotated[
+    str | None,
+    WithJsonSchema({"anyOf": [{"enum": ["reranking_model", "weighted_score"], "type": "string"}, {"type": "null"}]}),
+]
+SummaryIndexSetting = Annotated[
+    dict[str, Any] | None,
+    WithJsonSchema(
+        {
+            "anyOf": [
+                {
+                    "properties": {
+                        "enable": {"description": "Whether to enable summary indexing.", "type": "boolean"},
+                        "model_name": {
+                            "description": "Name of the model used for generating summaries.",
+                            "type": "string",
+                        },
+                        "model_provider_name": {
+                            "description": "Provider of the summary generation model.",
+                            "type": "string",
+                        },
+                        "summary_prompt": {
+                            "description": "Custom prompt template for summary generation.",
+                            "type": "string",
+                        },
+                    },
+                    "type": "object",
+                },
+                {"type": "null"},
+            ]
+        }
+    ),
+]
+ExternalRetrievalModel = Annotated[
+    dict[str, Any] | None,
+    WithJsonSchema(
+        {
+            "anyOf": [
+                {
+                    "properties": {
+                        "top_k": {"description": "Maximum number of results to return.", "type": "integer"},
+                        "score_threshold": {
+                            "description": "Minimum similarity score threshold for filtering results.",
+                            "type": "number",
+                        },
+                        "score_threshold_enabled": {
+                            "description": "Whether score threshold filtering is enabled.",
+                            "type": "boolean",
+                        },
+                    },
+                    "type": "object",
+                },
+                {"type": "null"},
+            ]
+        }
+    ),
+]
+

 class RerankingModel(BaseModel):
-    reranking_provider_name: str | None = None
-    reranking_model_name: str | None = None
+    reranking_provider_name: str | None = Field(default=None, description="Provider name of the reranking model.")
+    reranking_model_name: str | None = Field(default=None, description="Name of the reranking model.")


 class NotionIcon(BaseModel):
@ -56,36 +125,56 @@ class DataSource(BaseModel):


 class ProcessRule(BaseModel):
-    mode: ProcessRuleMode
-    rules: Rule | None = None
+    mode: ProcessRuleMode = Field(
+        description=(
+            "Processing mode. `automatic` uses built-in rules, `custom` allows manual configuration, and "
+            "`hierarchical` enables parent-child chunk structure for `doc_form: hierarchical_model`."
+        )
+    )
+    rules: Rule | None = Field(default=None, description="Custom processing rules.")


 class WeightVectorSetting(BaseModel):
-    vector_weight: float
-    embedding_provider_name: str
-    embedding_model_name: str
+    vector_weight: float = Field(description="Weight assigned to semantic vector search results.")
+    embedding_provider_name: str = Field(description="Provider of the embedding model used for vector search.")
+    embedding_model_name: str = Field(description="Name of the embedding model used for vector search.")


 class WeightKeywordSetting(BaseModel):
-    keyword_weight: float
+    keyword_weight: float = Field(description="Weight assigned to keyword search results.")


 class WeightModel(BaseModel):
-    weight_type: Literal["semantic_first", "keyword_first", "customized"] | None = None
-    vector_setting: WeightVectorSetting | None = None
-    keyword_setting: WeightKeywordSetting | None = None
+    weight_type: Literal["semantic_first", "keyword_first", "customized"] | None = Field(
+        default=None,
+        description="Strategy for balancing semantic and keyword search weights.",
+    )
+    vector_setting: WeightVectorSetting | None = Field(default=None, description="Semantic search weight settings.")
+    keyword_setting: WeightKeywordSetting | None = Field(default=None, description="Keyword search weight settings.")


 class RetrievalModel(BaseModel):
-    search_method: RetrievalMethod
-    reranking_enable: bool
-    reranking_model: RerankingModel | None = None
-    reranking_mode: str | None = None
-    top_k: int
-    score_threshold_enabled: bool
-    score_threshold: float | None = None
-    weights: WeightModel | None = None
-    metadata_filtering_conditions: MetadataFilteringCondition | None = None
+    search_method: RetrievalMethod = Field(description="Search method used for retrieval.")
+    reranking_enable: bool = Field(description="Whether reranking is enabled.")
+    reranking_model: RerankingModel | None = Field(default=None, description="Reranking model configuration.")
+    reranking_mode: RerankingMode = Field(
+        default=None,
+        description="Reranking mode. Required when `reranking_enable` is `true`.",
+    )
+    top_k: int = Field(description="Maximum number of results to return.")
+    score_threshold_enabled: bool = Field(description="Whether score threshold filtering is enabled.")
+    score_threshold: float | None = Field(
+        default=None,
+        description="Minimum similarity score for results. Only effective when score threshold filtering is enabled.",
+    )
+    weights: WeightModel | None = Field(default=None, description="Weight configuration for hybrid search.")
+    metadata_filtering_conditions: MetadataFilteringCondition | None = Field(
+        default=None,
+        description=(
+            "Restrict retrieval to chunks whose document metadata matches the given conditions. Conditions are "
+            "evaluated server-side against document metadata fields."
+        ),
+    )


 class MetaDataConfig(BaseModel):
@ -94,19 +183,51 @@ class MetaDataConfig(BaseModel):


 class KnowledgeConfig(BaseModel):
-    original_document_id: str | None = None
-    duplicate: bool = True
-    indexing_technique: Literal["high_quality", "economy"]
-    data_source: DataSource | None = None
-    process_rule: ProcessRule | None = None
-    retrieval_model: RetrievalModel | None = None
-    summary_index_setting: dict[str, Any] | None = Field(default=None)
-    doc_form: str = "text_model"
-    doc_language: str = "English"
-    embedding_model: str | None = None
-    embedding_model_provider: str | None = None
-    name: str | None = None
-    is_multimodal: bool = False
+    original_document_id: str | None = Field(default=None, description="Original document ID for replacement updates.")
+    duplicate: bool = Field(default=True, description="Whether duplicate document content is allowed.")
+    indexing_technique: Literal["high_quality", "economy"] = Field(
+        description=(
+            "`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing. "
+            "Required when adding the first document to a knowledge base; subsequent documents inherit the "
+            "knowledge base's indexing technique if omitted."
+        )
+    )
+    data_source: DataSource | None = Field(default=None, description="Document data source configuration.")
+    process_rule: ProcessRule | None = Field(default=None, description="Processing rules for chunking.")
+    retrieval_model: RetrievalModel | None = Field(
+        default=None,
+        description=(
+            "Retrieval model configuration. Controls how chunks are searched and ranked in this knowledge base."
+        ),
+    )
+    summary_index_setting: SummaryIndexSetting = Field(
+        default=None,
+        description="Summary index configuration.",
+    )
+    doc_form: DocForm = Field(
+        default="text_model",
+        description=(
+            "`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, "
+            "`qa_model` for question-answer pair extraction."
+        ),
+    )
+    doc_language: str = Field(default="English", description="Language of the document for processing optimization.")
+    embedding_model: str | None = Field(
+        default=None,
+        description=(
+            "Embedding model name. Use the `model` field from "
+            "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
+        ),
+    )
+    embedding_model_provider: str | None = Field(
+        default=None,
+        description=(
+            "Embedding model provider. Use the `provider` field from "
+            "[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
+        ),
+    )
+    name: str | None = Field(default=None, description="Document name.")
+    is_multimodal: bool = Field(default=False, description="Whether the document uses multimodal indexing.")

    @field_validator("doc_form")
    @classmethod
@ -122,47 +243,61 @@ class KnowledgeConfig(BaseModel):


 class SegmentCreateArgs(BaseModel):
-    content: str | None = None
-    answer: str | None = None
-    keywords: list[str] | None = None
-    attachment_ids: list[str] | None = None
+    content: str | None = Field(default=None, description="Chunk text content.")
+    answer: str | None = Field(default=None, description="Answer content for QA mode.")
+    keywords: list[str] | None = Field(default=None, description="Keywords for the chunk.")
+    attachment_ids: list[str] | None = Field(default=None, description="Attachment file IDs.")


 class SegmentUpdateArgs(BaseModel):
-    content: str | None = None
-    answer: str | None = None
-    keywords: list[str] | None = None
-    regenerate_child_chunks: bool = False
-    enabled: bool | None = None
-    attachment_ids: list[str] | None = None
-    summary: str | None = None  # Summary content for summary index
+    content: str | None = Field(default=None, description="Updated chunk text content.")
+    answer: str | None = Field(default=None, description="Updated answer content for QA mode.")
+    keywords: list[str] | None = Field(default=None, description="Updated keywords for the chunk.")
+    regenerate_child_chunks: bool = Field(
+        default=False,
+        description="Whether to regenerate child chunks after updating a parent chunk.",
+    )
+    enabled: bool | None = Field(default=None, description="Whether the chunk is enabled.")
+    attachment_ids: list[str] | None = Field(default=None, description="Attachment file IDs.")
+    summary: str | None = Field(default=None, description="Summary content for summary index.")


 class ChildChunkUpdateArgs(BaseModel):
-    id: str | None = None
-    content: str
+    id: str | None = Field(default=None, description="Existing child chunk ID. Omit to create a new child chunk.")
+    content: str = Field(description="Child chunk text content.")


 class MetadataArgs(BaseModel):
-    type: Literal["string", "number", "time"]
-    name: str
+    type: Literal["string", "number", "time"] = Field(
+        description="`string` for text values, `number` for numeric values, `time` for date/time values."
+    )
+    name: str = Field(description="Metadata field name.")


 class MetadataUpdateArgs(BaseModel):
-    name: str
-    value: str | int | float | None = None
+    name: str = Field(description="Metadata field name.")
+    value: str | int | float | None = Field(
+        default=None,
+        description="Metadata value. Can be a string, number, or `null`.",
+    )


 class MetadataDetail(BaseModel):
-    id: str
-    name: str
-    value: str | int | float | None = None
+    id: str = Field(description="Metadata field ID.")
+    name: str = Field(description="Metadata field name.")
+    value: str | int | float | None = Field(
+        default=None,
+        description="Metadata value. Can be a string, number, or `null`.",
+    )


 class DocumentMetadataOperation(BaseModel):
-    document_id: str
-    metadata_list: list[MetadataDetail]
-    partial_update: bool = False
+    document_id: str = Field(description="Document ID whose metadata should be updated.")
+    metadata_list: list[MetadataDetail] = Field(description="Metadata fields to update.")
+    partial_update: bool = Field(
+        default=False,
+        description="Whether to partially update metadata, keeping existing values for unspecified fields.",
+    )


 class MetadataOperationData(BaseModel):
@ -170,4 +305,8 @@ class MetadataOperationData(BaseModel):
    Metadata operation data
    """

-    operation_data: list[DocumentMetadataOperation]
+    operation_data: list[DocumentMetadataOperation] = Field(
+        description=(
+            "Array of document metadata update operations. Each entry maps a document ID to its metadata values."
+        )
+    )
--- a/api/services/rag_pipeline/entity/pipeline_service_api_entities.py
+++ b/api/services/rag_pipeline/entity/pipeline_service_api_entities.py
@ -1,22 +1,142 @@
 from collections.abc import Mapping
-from typing import Any
+from typing import Annotated, Any

-from pydantic import BaseModel
+from pydantic import BaseModel, Field, WithJsonSchema
+
+DatasourceType = Annotated[
+    str,
+    WithJsonSchema({"enum": ["local_file", "online_document", "website_crawl", "online_drive"], "type": "string"}),
+]
+PipelineResponseMode = Annotated[
+    str,
+    WithJsonSchema({"enum": ["streaming", "blocking"], "type": "string"}),
+]
+DatasourceInfoList = Annotated[
+    list[Mapping[str, Any]],
+    WithJsonSchema(
+        {
+            "items": {
+                "oneOf": [
+                    {
+                        "properties": {
+                            "reference": {
+                                "description": (
+                                    "Use the `id` returned by the "
+                                    "[Upload Pipeline File](/api-reference/knowledge-pipeline/upload-pipeline-file) "
+                                    "endpoint. `related_id` is accepted as an alias."
+                                ),
+                                "type": "string",
+                            },
+                            "name": {"description": "Document title. Defaults to `untitled`.", "type": "string"},
+                        },
+                        "required": ["reference"],
+                        "title": "Local File",
+                        "type": "object",
+                    },
+                    {
+                        "properties": {
+                            "workspace_id": {
+                                "description": "ID of the workspace or database in the external platform.",
+                                "type": "string",
+                            },
+                            "page": {
+                                "description": "Page details.",
+                                "properties": {
+                                    "page_id": {"description": "Page identifier.", "type": "string"},
+                                    "type": {
+                                        "description": "Page type defined by the datasource plugin.",
+                                        "type": "string",
+                                    },
+                                    "page_name": {
+                                        "description": "Display name. Defaults to `untitled`.",
+                                        "type": "string",
+                                    },
+                                },
+                                "required": ["page_id", "type"],
+                                "type": "object",
+                            },
+                            "credential_id": {
+                                "description": (
+                                    "Credential for authenticating with the external platform. If omitted, the "
+                                    "provider's default credential is used."
+                                ),
+                                "type": "string",
+                            },
+                        },
+                        "required": ["workspace_id", "page"],
+                        "title": "Online Document",
+                        "type": "object",
+                    },
+                    {
+                        "properties": {
+                            "url": {"description": "URL to crawl.", "type": "string"},
+                            "title": {
+                                "description": "Used as the document name. Defaults to `untitled`.",
+                                "type": "string",
+                            },
+                        },
+                        "required": ["url"],
+                        "title": "Website Crawl",
+                        "type": "object",
+                    },
+                    {
+                        "properties": {
+                            "id": {"description": "File or folder ID.", "type": "string"},
+                            "type": {
+                                "description": "Whether this entry is a single file or a folder to expand.",
+                                "enum": ["file", "folder"],
+                                "type": "string",
+                            },
+                            "bucket": {
+                                "description": (
+                                    "Storage bucket name. Required by some drive providers, such as S3-compatible "
+                                    "stores; omit if the provider does not use buckets."
+                                ),
+                                "type": "string",
+                            },
+                            "name": {"description": "File name. Defaults to `untitled`.", "type": "string"},
+                        },
+                        "required": ["id", "type"],
+                        "title": "Online Drive",
+                        "type": "object",
+                    },
+                ]
+            },
+            "type": "array",
+        }
+    ),
+]


 class DatasourceNodeRunApiEntity(BaseModel):
    pipeline_id: str
    node_id: str
    inputs: dict[str, Any]
-    datasource_type: str
+    datasource_type: DatasourceType
    credential_id: str | None = None
    is_published: bool


 class PipelineRunApiEntity(BaseModel):
-    inputs: Mapping[str, Any]
-    datasource_type: str
-    datasource_info_list: list[Mapping[str, Any]]
-    start_node_id: str
-    is_published: bool
-    response_mode: str
+    inputs: Mapping[str, Any] = Field(
+        description=(
+            "Key-value pairs for pipeline input variables defined in the workflow. Pass `{}` if the pipeline has "
+            "no input variables."
+        )
+    )
+    datasource_type: DatasourceType = Field(
+        description="Type of the datasource. Determines which fields are expected in `datasource_info_list` items."
+    )
+    datasource_info_list: DatasourceInfoList = Field(
+        description="List of datasource objects to process. The expected item structure depends on `datasource_type`."
+    )
+    start_node_id: str = Field(description="ID of the datasource node where the run starts.")
+    is_published: bool = Field(
+        description=(
+            "Whether to run the published or draft version of the pipeline. `true` runs the latest published "
+            "version; `false` runs the current draft (useful for testing unpublished changes)."
+        )
+    )
+    response_mode: PipelineResponseMode = Field(
+        description="Response mode. Use `streaming` for SSE or `blocking` for JSON."
+    )
--- a/api/tests/unit_tests/controllers/test_swagger.py
+++ b/api/tests/unit_tests/controllers/test_swagger.py
@ -6,6 +6,32 @@ from collections.abc import Iterator
 import pytest
 from flask import Flask

+USER_PROPERTY_SCHEMA = {
+    "description": (
+        "User identifier, unique within the application. This identifier scopes data access; resources created with "
+        "one `user` value are only visible when queried with the same `user` value."
+    ),
+    "type": "string",
+}
+GENERIC_FILE_SCHEMA = {"description": "The file to upload.", "format": "binary", "type": "string"}
+DOCUMENT_CREATE_DATA_SCHEMA = {
+    "description": (
+        "JSON string containing configuration. Accepts the same fields as "
+        "[Create Document by Text](/api-reference/documents/create-document-by-text) (`indexing_technique`, "
+        "`doc_form`, `doc_language`, `process_rule`, `retrieval_model`, `embedding_model`, "
+        "`embedding_model_provider`) except `name` and `text`."
+    ),
+    "type": "string",
+}
+DOCUMENT_UPDATE_DATA_SCHEMA = {
+    "description": (
+        "JSON string containing document update settings such as `doc_form`, `doc_language`, `process_rule`, "
+        "`retrieval_model`, `embedding_model`, and `embedding_model_provider`. `name` and `text` are not used "
+        "for file updates."
+    ),
+    "type": "string",
+}
+

 def _schema_refs(value: object) -> set[str]:
    refs: set[str] = set()
@ -180,11 +206,12 @@ def test_service_document_file_routes_document_multipart_form_data(monkeypatch:
    create_schema = _multipart_form_schema(create_operation)
    create_properties = create_schema["properties"]
    assert isinstance(create_properties, dict)
-    assert create_properties["file"] == {"type": "string", "format": "binary"}
-    assert create_properties["data"] == {
-        "description": "Optional JSON string with document creation settings.",
+    assert create_properties["file"] == {
+        "description": "Document file to upload.",
+        "format": "binary",
        "type": "string",
    }
+    assert create_properties["data"] == DOCUMENT_CREATE_DATA_SCHEMA
    assert create_schema["required"] == ["file"]
    assert create_operation["requestBody"]["required"] is True

@ -197,11 +224,12 @@ def test_service_document_file_routes_document_multipart_form_data(monkeypatch:
        update_schema = _multipart_form_schema(update_operation)
        update_properties = update_schema["properties"]
        assert isinstance(update_properties, dict)
-        assert update_properties["file"] == {"type": "string", "format": "binary"}
-        assert update_properties["data"] == {
-            "description": "Optional JSON string with document update settings.",
+        assert update_properties["file"] == {
+            "description": "Replacement document file to upload.",
+            "format": "binary",
            "type": "string",
        }
+        assert update_properties["data"] == DOCUMENT_UPDATE_DATA_SCHEMA
        assert "required" not in update_schema
        assert update_operation["requestBody"]["required"] is False

@ -228,7 +256,7 @@ def test_service_openapi_merges_public_api_reference_descriptions(monkeypatch: p
    rename_operation = payload["paths"]["/conversations/{c_id}/name"]["post"]
    assert rename_operation["summary"] == "Rename Conversation"
    assert rename_operation["tags"] == ["Conversations"]
-    assert _parameters_by_name(rename_operation)["c_id"]["description"] == "Conversation ID"
+    assert _parameters_by_name(rename_operation)["c_id"]["description"] == "Conversation ID."


 def test_service_document_list_documents_query_params_render(monkeypatch: pytest.MonkeyPatch):
@ -277,7 +305,7 @@ def test_service_openapi_documents_decorator_user_contracts(monkeypatch: pytest.
    )
    for path, method in required_json_user_operations:
        schema = _json_body_schema(payload, paths[path][method])
-        assert schema["properties"]["user"] == {"description": "End user identifier", "type": "string"}
+        assert schema["properties"]["user"] == USER_PROPERTY_SCHEMA
        assert "user" in schema["required"]

    optional_json_user_operations = (
@ -288,7 +316,7 @@ def test_service_openapi_documents_decorator_user_contracts(monkeypatch: pytest.
    )
    for path, method in optional_json_user_operations:
        schema = _json_body_schema(payload, paths[path][method])
-        assert schema["properties"]["user"] == {"description": "End user identifier", "type": "string"}
+        assert schema["properties"]["user"] == USER_PROPERTY_SCHEMA
        assert "user" not in schema.get("required", [])

    messages_params = _parameters_by_name(paths["/messages"]["get"])
@ -316,12 +344,22 @@ def test_service_openapi_documents_app_multipart_contracts(monkeypatch: pytest.M

    for path in ("/files/upload", "/audio-to-text"):
        schema = _multipart_form_schema(paths[path]["post"])
-        assert schema["properties"]["file"] == {"format": "binary", "type": "string"}
-        assert schema["properties"]["user"] == {"description": "End user identifier", "type": "string"}
+        if path == "/audio-to-text":
+            assert schema["properties"]["file"] == {
+                "description": (
+                    "Audio file to transcribe. Supported MIME types: `audio/mp3`, `audio/mpga`, `audio/m4a`, "
+                    "`audio/wav`, and `audio/amr`. File size limit is `30 MB`."
+                ),
+                "format": "binary",
+                "type": "string",
+            }
+        else:
+            assert schema["properties"]["file"] == GENERIC_FILE_SCHEMA
+        assert schema["properties"]["user"] == USER_PROPERTY_SCHEMA
        assert schema["required"] == ["file"]

    pipeline_schema = _multipart_form_schema(paths["/datasets/pipeline/file-upload"]["post"])
-    assert pipeline_schema["properties"]["file"] == {"format": "binary", "type": "string"}
+    assert pipeline_schema["properties"]["file"] == GENERIC_FILE_SCHEMA
    assert pipeline_schema["required"] == ["file"]


@ -385,14 +423,14 @@ def test_service_openapi_documents_uuid_params_and_deprecated_routes(monkeypatch

    dataset_params = _parameters_by_name(paths["/datasets/{dataset_id}"]["get"])
    assert dataset_params["dataset_id"]["schema"] == {
-        "description": "Dataset ID",
+        "description": "Knowledge base ID.",
        "format": "uuid",
        "type": "string",
    }

    conversation_params = _parameters_by_name(paths["/conversations/{c_id}"]["delete"])
    assert conversation_params["c_id"]["schema"] == {
-        "description": "Conversation ID",
+        "description": "Conversation ID.",
        "format": "uuid",
        "type": "string",
    }
@ -447,7 +485,7 @@ def test_service_openapi_documents_conditional_payload_schemas(monkeypatch: pyte
    assert manual_name_branch["properties"]["name"]["pattern"] == r".*\S.*"
    assert manual_name_branch["required"] == ["name"]
    for branch in rename_schema["anyOf"]:
-        assert branch["properties"]["user"] == {"description": "End user identifier", "type": "string"}
+        assert branch["properties"]["user"] == USER_PROPERTY_SCHEMA

    document_update_schema = payload["components"]["schemas"]["DocumentTextUpdate"]
    with_text_branch, without_text_branch = document_update_schema["anyOf"]
--- a/packages/contracts/generated/api/console/datasets/types.gen.ts
+++ b/packages/contracts/generated/api/console/datasets/types.gen.ts
@ -187,7 +187,7 @@ export type IndexingEstimateResponse = {

 export type KnowledgeConfig = {
  data_source?: DataSource | null
-  doc_form?: string
+  doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model'
  doc_language?: string
  duplicate?: boolean
  embedding_model?: string | null
@ -199,7 +199,10 @@ export type KnowledgeConfig = {
  process_rule?: ProcessRule | null
  retrieval_model?: RetrievalModel | null
  summary_index_setting?: {
-    [key: string]: unknown
+    enable?: boolean
+    model_name?: string
+    model_provider_name?: string
+    summary_prompt?: string
  } | null
 }

@ -482,7 +485,9 @@ export type ExternalRetrievalTestResponse
 export type HitTestingPayload = {
  attachment_ids?: Array<string> | null
  external_retrieval_model?: {
-    [key: string]: unknown
+    score_threshold?: number
+    score_threshold_enabled?: boolean
+    top_k?: number
  } | null
  query: string
  retrieval_model?: RetrievalModel | null
@ -707,7 +712,7 @@ export type ProcessRule = {
 export type RetrievalModel = {
  metadata_filtering_conditions?: MetadataFilteringCondition | null
  reranking_enable: boolean
-  reranking_mode?: string | null
+  reranking_mode?: 'reranking_model' | 'weighted_score' | null
  reranking_model?: RerankingModel | null
  score_threshold?: number | null
  score_threshold_enabled: boolean
@ -1035,7 +1040,7 @@ export type WebsiteInfo = {

 export type PreProcessingRule = {
  enabled: boolean
-  id: string
+  id: 'remove_extra_spaces' | 'remove_stopwords' | 'remove_urls_emails'
 }

 export type Segmentation = {
@ -1065,7 +1070,7 @@ export type Condition = {
    | '≤'
    | '≥'
  name: string
-  value?: string | Array<string> | number | number | null
+  value?: string | Array<string> | number | null
 }

 export type WeightKeywordSetting = {
--- a/packages/contracts/generated/api/console/datasets/zod.gen.ts
+++ b/packages/contracts/generated/api/console/datasets/zod.gen.ts
@ -1176,7 +1176,7 @@ export const zWebsiteInfo = z.object({
 */
 export const zPreProcessingRule = z.object({
  enabled: z.boolean(),
-  id: z.string(),
+  id: z.enum(['remove_extra_spaces', 'remove_stopwords', 'remove_urls_emails']),
 })

 /**
@ -1233,7 +1233,7 @@ export const zCondition = z.object({
    '≥',
  ]),
  name: z.string(),
-  value: z.union([z.string(), z.array(z.string()), z.int(), z.number()]).nullish(),
+  value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(),
 })

 /**
@ -1277,7 +1277,7 @@ export const zWeightModel = z.object({
 export const zRetrievalModel = z.object({
  metadata_filtering_conditions: zMetadataFilteringCondition.nullish(),
  reranking_enable: z.boolean(),
-  reranking_mode: z.string().nullish(),
+  reranking_mode: z.enum(['reranking_model', 'weighted_score']).nullish(),
  reranking_model: zRerankingModel.nullish(),
  score_threshold: z.number().nullish(),
  score_threshold_enabled: z.boolean(),
@ -1291,7 +1291,13 @@ export const zRetrievalModel = z.object({
 */
 export const zHitTestingPayload = z.object({
  attachment_ids: z.array(z.string()).nullish(),
-  external_retrieval_model: z.record(z.string(), z.unknown()).nullish(),
+  external_retrieval_model: z
+    .object({
+      score_threshold: z.number().optional(),
+      score_threshold_enabled: z.boolean().optional(),
+      top_k: z.int().optional(),
+    })
+    .nullish(),
  query: z.string().max(250),
  retrieval_model: zRetrievalModel.nullish(),
 })
@ -1451,7 +1457,10 @@ export const zDataSource = z.object({
 */
 export const zKnowledgeConfig = z.object({
  data_source: zDataSource.nullish(),
-  doc_form: z.string().optional().default('text_model'),
+  doc_form: z
+    .enum(['hierarchical_model', 'qa_model', 'text_model'])
+    .optional()
+    .default('text_model'),
  doc_language: z.string().optional().default('English'),
  duplicate: z.boolean().optional().default(true),
  embedding_model: z.string().nullish(),
@ -1462,7 +1471,14 @@ export const zKnowledgeConfig = z.object({
  original_document_id: z.string().nullish(),
  process_rule: zProcessRule.nullish(),
  retrieval_model: zRetrievalModel.nullish(),
-  summary_index_setting: z.record(z.string(), z.unknown()).nullish(),
+  summary_index_setting: z
+    .object({
+      enable: z.boolean().optional(),
+      model_name: z.string().optional(),
+      model_provider_name: z.string().optional(),
+      summary_prompt: z.string().optional(),
+    })
+    .nullish(),
 })

 export const zGetDatasetsQuery = z.object({
--- a/packages/contracts/generated/api/console/installed-apps/types.gen.ts
+++ b/packages/contracts/generated/api/console/installed-apps/types.gen.ts
@ -156,7 +156,10 @@ export type AudioBinaryResponse = Blob | File

 export type WorkflowRunPayload = {
  files?: Array<{
-    [key: string]: unknown
+    transfer_method: 'local_file' | 'remote_url'
+    type: 'audio' | 'custom' | 'document' | 'image' | 'video'
+    upload_file_id?: string
+    url?: string
  }> | null
  inputs: {
    [key: string]: unknown
--- a/packages/contracts/generated/api/console/installed-apps/zod.gen.ts
+++ b/packages/contracts/generated/api/console/installed-apps/zod.gen.ts
@ -128,7 +128,7 @@ export const zSavedMessageCreatePayload = z.object({
 * TextToAudioPayload
 */
 export const zTextToAudioPayload = z.object({
-  message_id: z.string().nullish(),
+  message_id: z.uuid().nullish(),
  streaming: z.boolean().nullish(),
  text: z.string().nullish(),
  voice: z.string().nullish(),
@ -143,7 +143,16 @@ export const zAudioBinaryResponse = z.custom<Blob | File>()
 * WorkflowRunPayload
 */
 export const zWorkflowRunPayload = z.object({
-  files: z.array(z.record(z.string(), z.unknown())).nullish(),
+  files: z
+    .array(
+      z.object({
+        transfer_method: z.enum(['local_file', 'remote_url']),
+        type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
+        upload_file_id: z.string().optional(),
+        url: z.string().optional(),
+      }),
+    )
+    .nullish(),
  inputs: z.record(z.string(), z.unknown()),
 })

--- a/packages/contracts/generated/api/service/types.gen.ts
+++ b/packages/contracts/generated/api/service/types.gen.ts
@ -103,15 +103,16 @@ export type ChatRequestPayload = {
  auto_generate_name?: boolean
  conversation_id?: string | null
  files?: Array<{
-    [key: string]: unknown
+    transfer_method: 'local_file' | 'remote_url'
+    type: 'audio' | 'custom' | 'document' | 'image' | 'video'
+    upload_file_id?: string
+    url?: string
  }> | null
  inputs: {
    [key: string]: unknown
  }
  query: string
  response_mode?: 'blocking' | 'streaming' | null
-  retriever_from?: string
-  trace_session_id?: string | null
  workflow_id?: string | null
 }

@ -119,15 +120,16 @@ export type ChatRequestPayloadWithUser = {
  auto_generate_name?: boolean
  conversation_id?: string | null
  files?: Array<{
-    [key: string]: unknown
+    transfer_method: 'local_file' | 'remote_url'
+    type: 'audio' | 'custom' | 'document' | 'image' | 'video'
+    upload_file_id?: string
+    url?: string
  }> | null
  inputs: {
    [key: string]: unknown
  }
  query: string
  response_mode?: 'blocking' | 'streaming' | null
-  retriever_from?: string
-  trace_session_id?: string | null
  user: string
  workflow_id?: string | null
 }
@ -171,28 +173,30 @@ export type ChildChunkUpdatePayload = {

 export type CompletionRequestPayload = {
  files?: Array<{
-    [key: string]: unknown
+    transfer_method: 'local_file' | 'remote_url'
+    type: 'audio' | 'custom' | 'document' | 'image' | 'video'
+    upload_file_id?: string
+    url?: string
  }> | null
  inputs: {
    [key: string]: unknown
  }
  query?: string
  response_mode?: 'blocking' | 'streaming' | null
-  retriever_from?: string
-  trace_session_id?: string | null
 }

 export type CompletionRequestPayloadWithUser = {
  files?: Array<{
-    [key: string]: unknown
+    transfer_method: 'local_file' | 'remote_url'
+    type: 'audio' | 'custom' | 'document' | 'image' | 'video'
+    upload_file_id?: string
+    url?: string
  }> | null
  inputs: {
    [key: string]: unknown
  }
  query?: string
  response_mode?: 'blocking' | 'streaming' | null
-  retriever_from?: string
-  trace_session_id?: string | null
  user: string
 }

@ -217,7 +221,7 @@ export type Condition = {
    | '≤'
    | '≥'
  name: string
-  value?: string | Array<string> | number | number | null
+  value?: string | Array<string> | number | null
 }

 export type ConversationInfiniteScrollPagination = {
@ -315,10 +319,13 @@ export type DatasetCreatePayload = {
  indexing_technique?: 'economy' | 'high_quality' | null
  name: string
  permission?: PermissionEnum | null
-  provider?: string
+  provider?: 'external' | 'vendor'
  retrieval_model?: RetrievalModel | null
  summary_index_setting?: {
-    [key: string]: unknown
+    enable?: boolean
+    model_name?: string
+    model_provider_name?: string
+    summary_prompt?: string
  } | null
 }

@ -512,12 +519,14 @@ export type DatasetUpdatePayload = {
  external_knowledge_api_id?: string | null
  external_knowledge_id?: string | null
  external_retrieval_model?: {
-    [key: string]: unknown
+    score_threshold?: number
+    score_threshold_enabled?: boolean
+    top_k?: number
  } | null
  indexing_technique?: 'economy' | 'high_quality' | null
  name?: string | null
  partial_member_list?: Array<{
-    [key: string]: string
+    user_id?: string
  }> | null
  permission?: PermissionEnum | null
  retrieval_model?: RetrievalModel | null
@ -544,7 +553,7 @@ export type DatasourceCredentialInfoResponse = {

 export type DatasourceNodeRunPayload = {
  credential_id?: string | null
-  datasource_type: string
+  datasource_type: 'local_file' | 'online_document' | 'online_drive' | 'website_crawl'
  inputs: {
    [key: string]: unknown
  }
@ -626,7 +635,15 @@ export type DocumentListQuery = {
  keyword?: string | null
  limit?: number
  page?: number
-  status?: string | null
+  status?:
+    | 'archived'
+    | 'available'
+    | 'disabled'
+    | 'error'
+    | 'indexing'
+    | 'paused'
+    | 'queuing'
+    | null
 }

 export type DocumentListResponse = {
@ -701,11 +718,11 @@ export type DocumentStatusResponse = {
 }

 export type DocumentTextCreatePayload = {
-  doc_form?: string
+  doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model'
  doc_language?: string
  embedding_model?: string | null
  embedding_model_provider?: string | null
-  indexing_technique?: string | null
+  indexing_technique?: 'economy' | 'high_quality' | null
  name: string
  original_document_id?: string | null
  process_rule?: ProcessRule | null
@ -715,7 +732,7 @@ export type DocumentTextCreatePayload = {

 export type DocumentTextUpdate = (
  | {
-    doc_form?: string
+    doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model'
    doc_language?: string
    name: string
    process_rule?: ProcessRule | null
@ -723,7 +740,7 @@ export type DocumentTextUpdate = (
    text: string
  }
  | {
-    doc_form?: string
+    doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model'
    doc_language?: string
    name?: string | null
    process_rule?: ProcessRule | null
@ -731,7 +748,7 @@ export type DocumentTextUpdate = (
    text?: null
  }
 ) & {
-  doc_form?: string
+  doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model'
  doc_language?: string
  name?: string | null
  process_rule?: ProcessRule | null
@ -849,7 +866,9 @@ export type HitTestingFile = {
 export type HitTestingPayload = {
  attachment_ids?: Array<string> | null
  external_retrieval_model?: {
-    [key: string]: unknown
+    score_threshold?: number
+    score_threshold_enabled?: boolean
+    top_k?: number
  } | null
  query: string
  retrieval_model?: RetrievalModel | null
@ -1146,15 +1165,37 @@ export type Parameters = {
 export type PermissionEnum = 'all_team_members' | 'only_me' | 'partial_members'

 export type PipelineRunApiEntity = {
-  datasource_info_list: Array<{
-    [key: string]: unknown
-  }>
-  datasource_type: string
+  datasource_info_list: Array<
+    | {
+      name?: string
+      reference: string
+    }
+    | {
+      credential_id?: string
+      page: {
+        page_id: string
+        page_name?: string
+        type: string
+      }
+      workspace_id: string
+    }
+    | {
+      title?: string
+      url: string
+    }
+    | {
+      bucket?: string
+      id: string
+      name?: string
+      type: 'file' | 'folder'
+    }
+  >
+  datasource_type: 'local_file' | 'online_document' | 'online_drive' | 'website_crawl'
  inputs: {
    [key: string]: unknown
  }
  is_published: boolean
-  response_mode: string
+  response_mode: 'blocking' | 'streaming'
  start_node_id: string
 }

@ -1170,7 +1211,7 @@ export type PipelineUploadFileResponse = {

 export type PreProcessingRule = {
  enabled: boolean
-  id: string
+  id: 'remove_extra_spaces' | 'remove_stopwords' | 'remove_urls_emails'
 }

 export type ProcessRule = {
@ -1231,7 +1272,7 @@ export type RetrievalMethod
 export type RetrievalModel = {
  metadata_filtering_conditions?: MetadataFilteringCondition | null
  reranking_enable: boolean
-  reranking_mode?: string | null
+  reranking_mode?: 'reranking_model' | 'weighted_score' | null
  reranking_model?: RerankingModel | null
  score_threshold?: number | null
  score_threshold_enabled: boolean
@ -1578,24 +1619,28 @@ export type WorkflowRunForLogResponse = {

 export type WorkflowRunPayload = {
  files?: Array<{
-    [key: string]: unknown
+    transfer_method: 'local_file' | 'remote_url'
+    type: 'audio' | 'custom' | 'document' | 'image' | 'video'
+    upload_file_id?: string
+    url?: string
  }> | null
  inputs: {
    [key: string]: unknown
  }
  response_mode?: 'blocking' | 'streaming' | null
-  trace_session_id?: string | null
 }

 export type WorkflowRunPayloadWithUser = {
  files?: Array<{
-    [key: string]: unknown
+    transfer_method: 'local_file' | 'remote_url'
+    type: 'audio' | 'custom' | 'document' | 'image' | 'video'
+    upload_file_id?: string
+    url?: string
  }> | null
  inputs: {
    [key: string]: unknown
  }
  response_mode?: 'blocking' | 'streaming' | null
-  trace_session_id?: string | null
  user: string
 }

@ -1704,7 +1749,7 @@ export type PostAppsAnnotationReplyByActionResponse
 export type GetAppsAnnotationReplyByActionStatusByJobIdData = {
  body?: never
  path: {
-    action: string
+    action: 'disable' | 'enable'
    job_id: string
  }
  query?: never
@ -2399,7 +2444,7 @@ export type GetDatasetsByDatasetIdDocumentsData = {
    keyword?: string
    limit?: number
    page?: number
-    status?: string
+    status?: 'archived' | 'available' | 'disabled' | 'error' | 'indexing' | 'paused' | 'queuing'
  }
  url: '/datasets/{dataset_id}/documents'
 }
@ -3661,7 +3706,7 @@ export type PostWorkflowsByWorkflowIdRunResponse
 export type GetWorkspacesCurrentModelsModelTypesByModelTypeData = {
  body?: never
  path: {
-    model_type: string
+    model_type: 'llm' | 'moderation' | 'rerank' | 'speech2text' | 'text-embedding' | 'tts'
  }
  query?: never
  url: '/workspaces/current/models/model-types/{model_type}'
--- a/packages/contracts/generated/api/service/zod.gen.ts
+++ b/packages/contracts/generated/api/service/zod.gen.ts
@ -131,12 +131,19 @@ export const zButtonStyle = z.enum(['accent', 'default', 'ghost', 'primary'])
 export const zChatRequestPayload = z.object({
  auto_generate_name: z.boolean().optional().default(true),
  conversation_id: z.string().nullish(),
-  files: z.array(z.record(z.string(), z.unknown())).nullish(),
+  files: z
+    .array(
+      z.object({
+        transfer_method: z.enum(['local_file', 'remote_url']),
+        type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
+        upload_file_id: z.string().optional(),
+        url: z.string().optional(),
+      }),
+    )
+    .nullish(),
  inputs: z.record(z.string(), z.unknown()),
  query: z.string(),
  response_mode: z.enum(['blocking', 'streaming']).nullish(),
-  retriever_from: z.string().optional().default('dev'),
-  trace_session_id: z.string().nullish(),
  workflow_id: z.string().nullish(),
 })

@ -146,12 +153,19 @@ export const zChatRequestPayload = z.object({
 export const zChatRequestPayloadWithUser = z.object({
  auto_generate_name: z.boolean().optional().default(true),
  conversation_id: z.string().nullish(),
-  files: z.array(z.record(z.string(), z.unknown())).nullish(),
+  files: z
+    .array(
+      z.object({
+        transfer_method: z.enum(['local_file', 'remote_url']),
+        type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
+        upload_file_id: z.string().optional(),
+        url: z.string().optional(),
+      }),
+    )
+    .nullish(),
  inputs: z.record(z.string(), z.unknown()),
  query: z.string(),
  response_mode: z.enum(['blocking', 'streaming']).nullish(),
-  retriever_from: z.string().optional().default('dev'),
-  trace_session_id: z.string().nullish(),
  user: z.string(),
  workflow_id: z.string().nullish(),
 })
@ -215,24 +229,38 @@ export const zChildChunkUpdatePayload = z.object({
 * CompletionRequestPayload
 */
 export const zCompletionRequestPayload = z.object({
-  files: z.array(z.record(z.string(), z.unknown())).nullish(),
+  files: z
+    .array(
+      z.object({
+        transfer_method: z.enum(['local_file', 'remote_url']),
+        type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
+        upload_file_id: z.string().optional(),
+        url: z.string().optional(),
+      }),
+    )
+    .nullish(),
  inputs: z.record(z.string(), z.unknown()),
  query: z.string().optional().default(''),
  response_mode: z.enum(['blocking', 'streaming']).nullish(),
-  retriever_from: z.string().optional().default('dev'),
-  trace_session_id: z.string().nullish(),
 })

 /**
 * CompletionRequestPayload
 */
 export const zCompletionRequestPayloadWithUser = z.object({
-  files: z.array(z.record(z.string(), z.unknown())).nullish(),
+  files: z
+    .array(
+      z.object({
+        transfer_method: z.enum(['local_file', 'remote_url']),
+        type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
+        upload_file_id: z.string().optional(),
+        url: z.string().optional(),
+      }),
+    )
+    .nullish(),
  inputs: z.record(z.string(), z.unknown()),
  query: z.string().optional().default(''),
  response_mode: z.enum(['blocking', 'streaming']).nullish(),
-  retriever_from: z.string().optional().default('dev'),
-  trace_session_id: z.string().nullish(),
  user: z.string(),
 })

@ -263,7 +291,7 @@ export const zCondition = z.object({
    '≥',
  ]),
  name: z.string(),
-  value: z.union([z.string(), z.array(z.string()), z.int(), z.number()]).nullish(),
+  value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(),
 })

 /**
@ -657,7 +685,7 @@ export const zDatasourceCredentialInfoResponse = z.object({
 */
 export const zDatasourceNodeRunPayload = z.object({
  credential_id: z.string().nullish(),
-  datasource_type: z.string(),
+  datasource_type: z.enum(['local_file', 'online_document', 'online_drive', 'website_crawl']),
  inputs: z.record(z.string(), z.unknown()),
  is_published: z.boolean(),
 })
@ -710,7 +738,9 @@ export const zDocumentListQuery = z.object({
  keyword: z.string().nullish(),
  limit: z.int().optional().default(20),
  page: z.int().optional().default(1),
-  status: z.string().nullish(),
+  status: z
+    .enum(['archived', 'available', 'disabled', 'error', 'indexing', 'paused', 'queuing'])
+    .nullish(),
 })

 /**
@ -1337,11 +1367,37 @@ export const zPermissionEnum = z.enum(['all_team_members', 'only_me', 'partial_m
 * PipelineRunApiEntity
 */
 export const zPipelineRunApiEntity = z.object({
-  datasource_info_list: z.array(z.record(z.string(), z.unknown())),
-  datasource_type: z.string(),
+  datasource_info_list: z.array(
+    z.union([
+      z.object({
+        name: z.string().optional(),
+        reference: z.string(),
+      }),
+      z.object({
+        credential_id: z.string().optional(),
+        page: z.object({
+          page_id: z.string(),
+          page_name: z.string().optional(),
+          type: z.string(),
+        }),
+        workspace_id: z.string(),
+      }),
+      z.object({
+        title: z.string().optional(),
+        url: z.string(),
+      }),
+      z.object({
+        bucket: z.string().optional(),
+        id: z.string(),
+        name: z.string().optional(),
+        type: z.enum(['file', 'folder']),
+      }),
+    ]),
+  ),
+  datasource_type: z.enum(['local_file', 'online_document', 'online_drive', 'website_crawl']),
  inputs: z.record(z.string(), z.unknown()),
  is_published: z.boolean(),
-  response_mode: z.string(),
+  response_mode: z.enum(['blocking', 'streaming']),
  start_node_id: z.string(),
 })

@ -1363,7 +1419,7 @@ export const zPipelineUploadFileResponse = z.object({
 */
 export const zPreProcessingRule = z.object({
  enabled: z.boolean(),
-  id: z.string(),
+  id: z.enum(['remove_extra_spaces', 'remove_stopwords', 'remove_urls_emails']),
 })

 /**
@ -1779,7 +1835,7 @@ export const zTagUpdatePayload = z.object({
 * TextToAudioPayload
 */
 export const zTextToAudioPayload = z.object({
-  message_id: z.string().nullish(),
+  message_id: z.uuid().nullish(),
  streaming: z.boolean().nullish(),
  text: z.string().nullish(),
  voice: z.string().nullish(),
@ -1789,7 +1845,7 @@ export const zTextToAudioPayload = z.object({
 * TextToAudioPayload
 */
 export const zTextToAudioPayloadWithUser = z.object({
-  message_id: z.string().nullish(),
+  message_id: z.uuid().nullish(),
  streaming: z.boolean().nullish(),
  text: z.string().nullish(),
  user: z.string().optional(),
@ -1956,7 +2012,7 @@ export const zWeightModel = z.object({
 export const zRetrievalModel = z.object({
  metadata_filtering_conditions: zMetadataFilteringCondition.nullish(),
  reranking_enable: z.boolean(),
-  reranking_mode: z.string().nullish(),
+  reranking_mode: z.enum(['reranking_model', 'weighted_score']).nullish(),
  reranking_model: zRerankingModel.nullish(),
  score_threshold: z.number().nullish(),
  score_threshold_enabled: z.boolean(),
@ -1977,9 +2033,16 @@ export const zDatasetCreatePayload = z.object({
  indexing_technique: z.enum(['economy', 'high_quality']).nullish(),
  name: z.string().min(1).max(40),
  permission: zPermissionEnum.nullish().default('only_me'),
-  provider: z.string().optional().default('vendor'),
+  provider: z.enum(['external', 'vendor']).optional().default('vendor'),
  retrieval_model: zRetrievalModel.nullish(),
-  summary_index_setting: z.record(z.string(), z.unknown()).nullish(),
+  summary_index_setting: z
+    .object({
+      enable: z.boolean().optional(),
+      model_name: z.string().optional(),
+      model_provider_name: z.string().optional(),
+      summary_prompt: z.string().optional(),
+    })
+    .nullish(),
 })

 /**
@ -1991,10 +2054,22 @@ export const zDatasetUpdatePayload = z.object({
  embedding_model_provider: z.string().nullish(),
  external_knowledge_api_id: z.string().nullish(),
  external_knowledge_id: z.string().nullish(),
-  external_retrieval_model: z.record(z.string(), z.unknown()).nullish(),
+  external_retrieval_model: z
+    .object({
+      score_threshold: z.number().optional(),
+      score_threshold_enabled: z.boolean().optional(),
+      top_k: z.int().optional(),
+    })
+    .nullish(),
  indexing_technique: z.enum(['economy', 'high_quality']).nullish(),
  name: z.string().min(1).max(40).nullish(),
-  partial_member_list: z.array(z.record(z.string(), z.string())).nullish(),
+  partial_member_list: z
+    .array(
+      z.object({
+        user_id: z.string().optional(),
+      }),
+    )
+    .nullish(),
  permission: zPermissionEnum.nullish(),
  retrieval_model: zRetrievalModel.nullish(),
 })
@ -2003,11 +2078,14 @@ export const zDatasetUpdatePayload = z.object({
 * DocumentTextCreatePayload
 */
 export const zDocumentTextCreatePayload = z.object({
-  doc_form: z.string().optional().default('text_model'),
+  doc_form: z
+    .enum(['hierarchical_model', 'qa_model', 'text_model'])
+    .optional()
+    .default('text_model'),
  doc_language: z.string().optional().default('English'),
  embedding_model: z.string().nullish(),
  embedding_model_provider: z.string().nullish(),
-  indexing_technique: z.string().nullish(),
+  indexing_technique: z.enum(['economy', 'high_quality']).nullish(),
  name: z.string(),
  original_document_id: z.string().nullish(),
  process_rule: zProcessRule.nullish(),
@ -2018,7 +2096,10 @@ export const zDocumentTextCreatePayload = z.object({
 export const zDocumentTextUpdate = z.intersection(
  z.union([
    z.object({
-      doc_form: z.string().optional().default('text_model'),
+      doc_form: z
+        .enum(['hierarchical_model', 'qa_model', 'text_model'])
+        .optional()
+        .default('text_model'),
      doc_language: z.string().optional().default('English'),
      name: z.string(),
      process_rule: zProcessRule.nullish(),
@ -2026,7 +2107,10 @@ export const zDocumentTextUpdate = z.intersection(
      text: z.string(),
    }),
    z.object({
-      doc_form: z.string().optional().default('text_model'),
+      doc_form: z
+        .enum(['hierarchical_model', 'qa_model', 'text_model'])
+        .optional()
+        .default('text_model'),
      doc_language: z.string().optional().default('English'),
      name: z.string().nullish(),
      process_rule: zProcessRule.nullish(),
@ -2035,7 +2119,10 @@ export const zDocumentTextUpdate = z.intersection(
    }),
  ]),
  z.object({
-    doc_form: z.string().optional().default('text_model'),
+    doc_form: z
+      .enum(['hierarchical_model', 'qa_model', 'text_model'])
+      .optional()
+      .default('text_model'),
    doc_language: z.string().optional().default('English'),
    name: z.string().nullish(),
    process_rule: zProcessRule.nullish(),
@ -2049,7 +2136,13 @@ export const zDocumentTextUpdate = z.intersection(
 */
 export const zHitTestingPayload = z.object({
  attachment_ids: z.array(z.string()).nullish(),
-  external_retrieval_model: z.record(z.string(), z.unknown()).nullish(),
+  external_retrieval_model: z
+    .object({
+      score_threshold: z.number().optional(),
+      score_threshold_enabled: z.boolean().optional(),
+      top_k: z.int().optional(),
+    })
+    .nullish(),
  query: z.string().max(250),
  retrieval_model: zRetrievalModel.nullish(),
 })
@ -2132,20 +2225,36 @@ export const zWorkflowAppLogPaginationResponse = z.object({
 * WorkflowRunPayload
 */
 export const zWorkflowRunPayload = z.object({
-  files: z.array(z.record(z.string(), z.unknown())).nullish(),
+  files: z
+    .array(
+      z.object({
+        transfer_method: z.enum(['local_file', 'remote_url']),
+        type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
+        upload_file_id: z.string().optional(),
+        url: z.string().optional(),
+      }),
+    )
+    .nullish(),
  inputs: z.record(z.string(), z.unknown()),
  response_mode: z.enum(['blocking', 'streaming']).nullish(),
-  trace_session_id: z.string().nullish(),
 })

 /**
 * WorkflowRunPayload
 */
 export const zWorkflowRunPayloadWithUser = z.object({
-  files: z.array(z.record(z.string(), z.unknown())).nullish(),
+  files: z
+    .array(
+      z.object({
+        transfer_method: z.enum(['local_file', 'remote_url']),
+        type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
+        upload_file_id: z.string().optional(),
+        url: z.string().optional(),
+      }),
+    )
+    .nullish(),
  inputs: z.record(z.string(), z.unknown()),
  response_mode: z.enum(['blocking', 'streaming']).nullish(),
-  trace_session_id: z.string().nullish(),
  user: z.string(),
 })

@ -2231,7 +2340,7 @@ export const zPostAppsAnnotationReplyByActionPath = z.object({
 export const zPostAppsAnnotationReplyByActionResponse = zAnnotationJobStatusResponse

 export const zGetAppsAnnotationReplyByActionStatusByJobIdPath = z.object({
-  action: z.string(),
+  action: z.enum(['disable', 'enable']),
  job_id: z.uuid(),
 })

@ -2553,7 +2662,9 @@ export const zGetDatasetsByDatasetIdDocumentsQuery = z.object({
  keyword: z.string().optional(),
  limit: z.int().optional().default(20),
  page: z.int().optional().default(1),
-  status: z.string().optional(),
+  status: z
+    .enum(['archived', 'available', 'disabled', 'error', 'indexing', 'paused', 'queuing'])
+    .optional(),
 })

 /**
@ -3112,8 +3223,8 @@ export const zGetWorkflowByTaskIdEventsQuery = z.object({
 export const zGetWorkflowByTaskIdEventsResponse = zEventStreamResponse

 export const zGetWorkflowsLogsQuery = z.object({
-  created_at__after: z.string().optional(),
-  created_at__before: z.string().optional(),
+  created_at__after: z.iso.datetime().optional(),
+  created_at__before: z.iso.datetime().optional(),
  created_by_account: z.string().optional(),
  created_by_end_user_session_id: z.string().optional(),
  keyword: z.string().optional(),
@ -3172,7 +3283,7 @@ export const zPostWorkflowsByWorkflowIdRunPath = z.object({
 export const zPostWorkflowsByWorkflowIdRunResponse = zGeneratedAppResponse

 export const zGetWorkspacesCurrentModelsModelTypesByModelTypePath = z.object({
-  model_type: z.string(),
+  model_type: z.enum(['llm', 'moderation', 'rerank', 'speech2text', 'text-embedding', 'tts']),
 })

 /**
--- a/packages/contracts/generated/api/web/types.gen.ts
+++ b/packages/contracts/generated/api/web/types.gen.ts
@ -640,7 +640,10 @@ export type WebMessageListItem = {

 export type WorkflowRunPayload = {
  files?: Array<{
-    [key: string]: unknown
+    transfer_method: 'local_file' | 'remote_url'
+    type: 'audio' | 'custom' | 'document' | 'image' | 'video'
+    upload_file_id?: string
+    url?: string
  }> | null
  inputs: {
    [key: string]: unknown
--- a/packages/contracts/generated/api/web/zod.gen.ts
+++ b/packages/contracts/generated/api/web/zod.gen.ts
@ -696,7 +696,7 @@ export const zParameters = z.object({
 * TextToAudioPayload
 */
 export const zTextToAudioPayload = z.object({
-  message_id: z.string().nullish(),
+  message_id: z.uuid().nullish(),
  streaming: z.boolean().nullish(),
  text: z.string().nullish(),
  voice: z.string().nullish(),
@ -906,7 +906,16 @@ export const zWebMessageInfiniteScrollPagination = z.object({
 * WorkflowRunPayload
 */
 export const zWorkflowRunPayload = z.object({
-  files: z.array(z.record(z.string(), z.unknown())).nullish(),
+  files: z
+    .array(
+      z.object({
+        transfer_method: z.enum(['local_file', 'remote_url']),
+        type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
+        upload_file_id: z.string().optional(),
+        url: z.string().optional(),
+      }),
+    )
+    .nullish(),
  inputs: z.record(z.string(), z.unknown()),
 })