docs: enrich generated service API descriptions (#37615)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Stephen Zhou 2026-06-18 16:43:39 +08:00 committed by GitHub
parent 2f72b576f0
commit c52eafe2ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
37 changed files with 1992 additions and 918 deletions

View File

@ -1,8 +1,8 @@
from copy import deepcopy
from typing import Any, Literal, override
from typing import Annotated, Any, Literal, override
from uuid import UUID
from pydantic import BaseModel, Field, GetJsonSchemaHandler, model_validator
from pydantic import BaseModel, Field, GetJsonSchemaHandler, WithJsonSchema, model_validator
from libs.helper import UUIDStrOrEmpty
@ -10,8 +10,14 @@ from libs.helper import UUIDStrOrEmpty
class ConversationRenamePayload(BaseModel):
name: str | None = None
auto_generate: bool = False
name: str | None = Field(
default=None,
description="Conversation name. Required when `auto_generate` is `false`.",
)
auto_generate: bool = Field(
default=False,
description="Automatically generate the conversation name. When `true`, the `name` field is ignored.",
)
@classmethod
@override
@ -64,14 +70,28 @@ class ConversationRenamePayload(BaseModel):
class MessageListQuery(BaseModel):
conversation_id: UUIDStrOrEmpty = Field(description="Conversation UUID")
first_id: UUIDStrOrEmpty | None = Field(default=None, description="First message ID for pagination")
limit: int = Field(default=20, ge=1, le=100, description="Number of messages to return (1-100)")
conversation_id: UUIDStrOrEmpty = Field(description="Conversation ID.")
first_id: UUIDStrOrEmpty | None = Field(
default=None,
description=(
"The ID of the first chat record on the current page. Omit this value to fetch the latest messages; "
"for subsequent pages, use the first message ID from the current list to fetch older messages."
),
)
limit: int = Field(
default=20,
ge=1,
le=100,
description="Number of chat history messages to return per request.",
)
class MessageFeedbackPayload(BaseModel):
rating: Literal["like", "dislike"] | None = None
content: str | None = None
rating: Literal["like", "dislike"] | None = Field(
default=None,
description="Feedback rating. Set to `null` to revoke previously submitted feedback.",
)
content: str | None = Field(default=None, description="Optional text feedback providing additional detail.")
# --- Saved message schemas ---
@ -88,6 +108,39 @@ class SavedMessageCreatePayload(BaseModel):
# --- Workflow schemas ---
WORKFLOW_INPUT_FILE_ITEM_SCHEMA: dict[str, object] = {
"type": "object",
"required": ["type", "transfer_method"],
"properties": {
"type": {
"description": "File type.",
"enum": ["document", "image", "audio", "video", "custom"],
"type": "string",
},
"transfer_method": {
"description": "Transfer method: `remote_url` for file URL, `local_file` for uploaded file.",
"enum": ["remote_url", "local_file"],
"type": "string",
},
"url": {
"description": "File URL when `transfer_method` is `remote_url`.",
"format": "url",
"type": "string",
},
"upload_file_id": {
"description": (
"Uploaded file ID obtained from the [Upload File](/api-reference/files/upload-file) API when "
"`transfer_method` is `local_file`."
),
"type": "string",
},
},
}
WORKFLOW_INPUT_FILE_LIST_SCHEMA: dict[str, object] = {
"anyOf": [{"items": WORKFLOW_INPUT_FILE_ITEM_SCHEMA, "type": "array"}, {"type": "null"}]
}
WorkflowInputFileList = Annotated[list[dict[str, Any]] | None, WithJsonSchema(WORKFLOW_INPUT_FILE_LIST_SCHEMA)]
class DefaultBlockConfigQuery(BaseModel):
q: str | None = None
@ -101,8 +154,22 @@ class WorkflowListQuery(BaseModel):
class WorkflowRunPayload(BaseModel):
inputs: dict[str, Any]
files: list[dict[str, Any]] | None = Field(default=None)
inputs: dict[str, Any] = Field(
description=(
"Key-value pairs for workflow input variables. Values for file-type variables should be arrays of "
"file objects with `type`, `transfer_method`, and either `url` or `upload_file_id`. Refer to the "
"`user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) "
"response to discover the variable names and types expected by your app."
)
)
files: WorkflowInputFileList = Field(
default=None,
description=(
"File list for workflow system file inputs. Available when file upload is enabled for the workflow. "
"To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use "
"the returned `id` as `upload_file_id` with `transfer_method: local_file`."
),
)
class WorkflowUpdatePayload(BaseModel):
@ -117,30 +184,48 @@ DOCUMENT_BATCH_DOWNLOAD_ZIP_MAX_DOCS = 100
class ChildChunkCreatePayload(BaseModel):
content: str
content: str = Field(description="Child chunk text content.")
class ChildChunkUpdatePayload(BaseModel):
content: str
content: str = Field(description="Child chunk text content.")
class DocumentBatchDownloadZipPayload(BaseModel):
"""Request payload for bulk downloading documents as a zip archive."""
document_ids: list[UUID] = Field(..., min_length=1, max_length=DOCUMENT_BATCH_DOWNLOAD_ZIP_MAX_DOCS)
document_ids: list[UUID] = Field(
...,
min_length=1,
max_length=DOCUMENT_BATCH_DOWNLOAD_ZIP_MAX_DOCS,
description="List of document IDs to include in the ZIP download.",
)
class MetadataUpdatePayload(BaseModel):
name: str
name: str = Field(description="New metadata field name.")
# --- Audio schemas ---
UUIDString = Annotated[str, WithJsonSchema({"format": "uuid", "type": "string"})]
class TextToAudioPayload(BaseModel):
message_id: str | None = Field(default=None, description="Message ID")
voice: str | None = Field(default=None, description="Voice to use for TTS")
text: str | None = Field(default=None, description="Text to convert to audio")
message_id: UUIDString | None = Field(
default=None,
description="Message ID. Takes priority over `text` when both are provided.",
)
voice: str | None = Field(
default=None,
description=(
"Voice to use for text-to-speech. Available voices depend on the TTS provider configured for this app. "
"Omit to use the app's configured voice when available; that value is exposed by "
"[Get App Parameters](/api-reference/applications/get-app-parameters) as `text_to_speech.voice`."
),
)
text: str | None = Field(default=None, description="Speech content to convert.")
streaming: bool | None = Field(
default=None,
description="Reserved for compatibility; TTS response streaming is determined by the provider output.",

View File

@ -35,7 +35,12 @@ class HumanInputFormSubmitPayload(BaseModel):
),
examples=[HUMAN_INPUT_FORM_INPUT_EXAMPLE],
)
action: str
action: str = Field(
description=(
"ID of the action button the recipient selected. Must match one of the `id` values from the form's "
"`user_actions` list."
)
)
def stringify_form_default_values(values: dict[str, object]) -> dict[str, str]:

View File

@ -23,17 +23,26 @@ from libs.login import resolve_account_fallback
from models.account import Account
from models.dataset import Dataset
from services.dataset_service import DatasetService
from services.entities.knowledge_entities.knowledge_entities import RetrievalModel
from services.entities.knowledge_entities.knowledge_entities import ExternalRetrievalModel, RetrievalModel
from services.hit_testing_service import HitTestingService
logger = logging.getLogger(__name__)
class HitTestingPayload(BaseModel):
query: str = Field(max_length=250)
retrieval_model: RetrievalModel | None = None
external_retrieval_model: dict[str, Any] | None = Field(default=None)
attachment_ids: list[str] | None = None
query: str = Field(description="Search query text.", max_length=250)
retrieval_model: RetrievalModel | None = Field(
default=None,
description="Retrieval model configuration. Controls how chunks are searched and ranked.",
)
external_retrieval_model: ExternalRetrievalModel = Field(
default=None,
description="Retrieval settings for external knowledge bases.",
)
attachment_ids: list[str] | None = Field(
default=None,
description="List of attachment IDs to include in the retrieval context.",
)
class DatasetsHitTestingBase:

View File

@ -23,20 +23,25 @@ from services.annotation_service import (
class AnnotationCreatePayload(BaseModel):
question: str = Field(description="Annotation question")
answer: str = Field(description="Annotation answer")
question: str = Field(description="Annotation question.")
answer: str = Field(description="Annotation answer.")
class AnnotationReplyActionPayload(BaseModel):
score_threshold: float = Field(description="Score threshold for annotation matching")
embedding_provider_name: str = Field(description="Embedding provider name")
embedding_model_name: str = Field(description="Embedding model name")
score_threshold: float = Field(
description=(
"Minimum similarity score for an annotation to be considered a match. Higher values require closer matches."
),
json_schema_extra={"format": "float"},
)
embedding_provider_name: str = Field(description="Name of the embedding model provider.")
embedding_model_name: str = Field(description="Name of the embedding model to use for annotation matching.")
class AnnotationListQuery(BaseModel):
page: int = Field(default=1, ge=1, description="Page number")
limit: int = Field(default=20, ge=1, description="Number of annotations per page")
keyword: str = Field(default="", description="Keyword to search annotations")
page: int = Field(default=1, ge=1, description="Page number for pagination.")
limit: int = Field(default=20, ge=1, description="Number of items per page.")
keyword: str = Field(default="", description="Keyword to filter annotations by question or answer content.")
class AnnotationJobStatusResponse(ResponseModel):
@ -46,7 +51,7 @@ class AnnotationJobStatusResponse(ResponseModel):
ANNOTATION_REPLY_ACTION_PARAM = {
"description": "Action to perform: 'enable' or 'disable'",
"description": "Action to perform: `enable` or `disable`.",
"enum": ["enable", "disable"],
"type": "string",
}
@ -125,7 +130,15 @@ class AnnotationReplyActionStatusApi(Resource):
)
@service_api_ns.doc("get_annotation_reply_action_status")
@service_api_ns.doc(description="Get the status of an annotation reply action job")
@service_api_ns.doc(params={"action": "Action type", "job_id": "Job ID"})
@service_api_ns.doc(
params={
"action": ANNOTATION_REPLY_ACTION_PARAM,
"job_id": (
"Job ID returned by "
"[Configure Annotation Reply](/api-reference/annotations/configure-annotation-reply)."
),
}
)
@service_api_ns.doc(
responses={
200: "Job status retrieved successfully",
@ -248,7 +261,7 @@ class AnnotationUpdateDeleteApi(Resource):
@service_api_ns.expect(service_api_ns.models[AnnotationCreatePayload.__name__])
@service_api_ns.doc("update_annotation")
@service_api_ns.doc(description="Update an existing annotation")
@service_api_ns.doc(params={"annotation_id": "Annotation ID"})
@service_api_ns.doc(params={"annotation_id": "The unique identifier of the annotation to update."})
@service_api_ns.doc(
responses={
200: "Annotation updated successfully",
@ -284,7 +297,7 @@ class AnnotationUpdateDeleteApi(Resource):
)
@service_api_ns.doc("delete_annotation")
@service_api_ns.doc(description="Delete an annotation")
@service_api_ns.doc(params={"annotation_id": "Annotation ID"})
@service_api_ns.doc(params={"annotation_id": "The unique identifier of the annotation to delete."})
@service_api_ns.doc(
responses={
204: "Annotation deleted successfully",

View File

@ -64,7 +64,16 @@ class AudioApi(Resource):
)
@service_api_ns.doc("audio_to_text")
@service_api_ns.doc(description="Convert audio to text using speech-to-text")
@service_api_ns.doc(consumes=["multipart/form-data"], params=multipart_file_params(include_user=True))
@service_api_ns.doc(
consumes=["multipart/form-data"],
params=multipart_file_params(
include_user=True,
file_description=(
"Audio file to transcribe. Supported MIME types: `audio/mp3`, `audio/mpga`, `audio/m4a`, "
"`audio/wav`, and `audio/amr`. File size limit is `30 MB`."
),
),
)
@service_api_ns.doc(
responses={
200: "Audio successfully transcribed",

View File

@ -5,6 +5,7 @@ from uuid import UUID
from flask import request
from flask_restx import Resource
from pydantic import BaseModel, Field, field_validator
from pydantic.json_schema import SkipJsonSchema
from werkzeug.exceptions import BadRequest, InternalServerError, NotFound
import services
@ -20,7 +21,12 @@ from controllers.service_api.app.error import (
ProviderNotInitializeError,
ProviderQuotaExceededError,
)
from controllers.service_api.schema import expect_user_json, expect_with_user, json_or_event_stream_response
from controllers.service_api.schema import (
InputFileList,
expect_user_json,
expect_with_user,
json_or_event_stream_response,
)
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from core.app.entities.app_invoke_entities import InvokeFrom
@ -52,24 +58,84 @@ def _resolve_agent_app_streaming(*, app_mode: AppMode, response_mode: str | None
class CompletionRequestPayload(BaseModel):
inputs: dict[str, Any]
query: str = Field(default="")
files: list[dict[str, Any]] | None = Field(default=None)
response_mode: Literal["blocking", "streaming"] | None = None
retriever_from: str = Field(default="dev")
trace_session_id: str | None = Field(default=None, description="Trace session ID for observability grouping")
inputs: dict[str, Any] = Field(
description=(
"Values for app-defined variables. Refer to the `user_input_form` field in the "
"[Get App Parameters](/api-reference/applications/get-app-parameters) response to discover expected "
"variable names and types."
)
)
query: str = Field(default="", description="User input or prompt content.")
files: InputFileList = Field(
default=None,
description=(
"File list for multimodal understanding, including images, documents, audio, and video. To attach a "
"local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned "
"`id` as `upload_file_id` with `transfer_method: local_file`."
),
)
response_mode: Literal["blocking", "streaming"] | None = Field(
default=None,
description=(
"Response mode. `streaming` uses Server-Sent Events; `blocking` returns after completion. When omitted, "
"the request runs in blocking mode."
),
)
retriever_from: SkipJsonSchema[str] = Field(default="dev")
trace_session_id: SkipJsonSchema[str | None] = Field(
default=None, description="Trace session ID for observability grouping"
)
class ChatRequestPayload(BaseModel):
inputs: dict[str, Any]
query: str
files: list[dict[str, Any]] | None = Field(default=None)
response_mode: Literal["blocking", "streaming"] | None = None
conversation_id: UUIDStrOrEmpty | None = Field(default=None, description="Conversation UUID")
retriever_from: str = Field(default="dev")
auto_generate_name: bool = Field(default=True, description="Auto generate conversation name")
workflow_id: str | None = Field(default=None, description="Workflow ID for advanced chat")
trace_session_id: str | None = Field(default=None, description="Trace session ID for observability grouping")
inputs: dict[str, Any] = Field(
description=(
"Values for app-defined variables. Refer to the `user_input_form` field in the "
"[Get App Parameters](/api-reference/applications/get-app-parameters) response to discover expected "
"variable names and types."
)
)
query: str = Field(description="User input or question content.")
files: InputFileList = Field(
default=None,
description=(
"File list for multimodal understanding, including images, documents, audio, and video. To attach a "
"local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned "
"`id` as `upload_file_id` with `transfer_method: local_file`."
),
)
response_mode: Literal["blocking", "streaming"] | None = Field(
default=None,
description=(
"Response mode. `streaming` uses Server-Sent Events; `blocking` returns after completion. New Agent app "
"mode supports streaming only. When omitted, non-Agent apps run in blocking mode and new Agent apps stream."
),
)
conversation_id: UUIDStrOrEmpty | None = Field(
default=None,
description=(
"Conversation ID to continue a conversation. Omit this field or pass an empty string to start a new "
"conversation, then pass the returned `conversation_id` in subsequent requests."
),
)
retriever_from: SkipJsonSchema[str] = Field(default="dev")
auto_generate_name: bool = Field(
default=True,
description=(
"Auto-generate the conversation title. If `false`, use the Rename Conversation API with "
"`auto_generate: true` to generate the title asynchronously."
),
)
workflow_id: str | None = Field(
default=None,
description=(
"Published workflow version ID to execute for advanced chat. If omitted, the app's current published "
"workflow is used."
),
)
trace_session_id: SkipJsonSchema[str | None] = Field(
default=None, description="Trace session ID for observability grouping"
)
@field_validator("conversation_id", mode="before")
@classmethod
@ -206,7 +272,9 @@ class CompletionStopApi(Resource):
@expect_user_json(service_api_ns)
@service_api_ns.doc("stop_completion")
@service_api_ns.doc(description="Stop a running completion task")
@service_api_ns.doc(params={"task_id": "The ID of the task to stop"})
@service_api_ns.doc(
params={"task_id": ("Task ID, obtained from a streaming chunk returned by the Send Completion Message API.")}
)
@service_api_ns.doc(
responses={
200: "Task stopped successfully",
@ -355,7 +423,9 @@ class ChatStopApi(Resource):
@expect_user_json(service_api_ns)
@service_api_ns.doc("stop_chat_message")
@service_api_ns.doc(description="Stop a running chat message generation")
@service_api_ns.doc(params={"task_id": "The ID of the task to stop"})
@service_api_ns.doc(
params={"task_id": "Task ID, obtained from a streaming chunk returned by the Send Chat Message API."}
)
@service_api_ns.doc(
responses={
200: "Task stopped successfully",

View File

@ -30,18 +30,28 @@ from services.conversation_service import ConversationService
class ConversationListQuery(BaseModel):
last_id: UUIDStrOrEmpty | None = Field(default=None, description="Last conversation ID for pagination")
limit: int = Field(default=20, ge=1, le=100, description="Number of conversations to return")
last_id: UUIDStrOrEmpty | None = Field(
default=None,
description="The ID of the last record on the current page. Used to fetch the next page.",
)
limit: int = Field(default=20, ge=1, le=100, description="Number of records to return.")
sort_by: Literal["created_at", "-created_at", "updated_at", "-updated_at"] = Field(
default="-updated_at", description="Sort order for conversations"
default="-updated_at",
description="Sorting field. Use the `-` prefix for descending order.",
)
class ConversationVariablesQuery(BaseModel):
last_id: UUIDStrOrEmpty | None = Field(default=None, description="Last variable ID for pagination")
limit: int = Field(default=20, ge=1, le=100, description="Number of variables to return")
last_id: UUIDStrOrEmpty | None = Field(
default=None,
description="The ID of the last record on the current page. Used to fetch the next page.",
)
limit: int = Field(default=20, ge=1, le=100, description="Number of records to return.")
variable_name: str | None = Field(
default=None, description="Filter variables by name", min_length=1, max_length=255
default=None,
description="Filter variables by a specific name.",
min_length=1,
max_length=255,
)
@field_validator("variable_name", mode="before")
@ -69,7 +79,7 @@ class ConversationVariablesQuery(BaseModel):
class ConversationVariableUpdatePayload(BaseModel):
value: Any
value: Any = Field(description="The new value for the variable. Must match the variable's expected type.")
class ConversationVariableResponse(ResponseModel):
@ -221,7 +231,7 @@ class ConversationDetailApi(Resource):
@expect_user_json(service_api_ns)
@service_api_ns.doc("delete_conversation")
@service_api_ns.doc(description="Delete a specific conversation")
@service_api_ns.doc(params={"c_id": "Conversation ID"})
@service_api_ns.doc(params={"c_id": "Conversation ID."})
@service_api_ns.doc(
responses={
204: "Conversation deleted successfully",
@ -263,7 +273,7 @@ class ConversationRenameApi(Resource):
@expect_with_user(service_api_ns, ConversationRenamePayload)
@service_api_ns.doc("rename_conversation")
@service_api_ns.doc(description="Rename a conversation or auto-generate a name")
@service_api_ns.doc(params={"c_id": "Conversation ID"})
@service_api_ns.doc(params={"c_id": "Conversation ID."})
@service_api_ns.doc(
responses={
200: "Conversation renamed successfully",
@ -315,7 +325,7 @@ class ConversationVariablesApi(Resource):
@service_api_ns.doc(params=query_params_from_model(ConversationVariablesQuery))
@service_api_ns.doc("list_conversation_variables")
@service_api_ns.doc(description="List all variables for a conversation")
@service_api_ns.doc(params={"c_id": "Conversation ID"})
@service_api_ns.doc(params={"c_id": "Conversation ID."})
@service_api_ns.doc(
responses={
200: "Variables retrieved successfully",
@ -375,7 +385,7 @@ class ConversationVariableDetailApi(Resource):
@expect_with_user(service_api_ns, ConversationVariableUpdatePayload)
@service_api_ns.doc("update_conversation_variable")
@service_api_ns.doc(description="Update a conversation variable's value")
@service_api_ns.doc(params={"c_id": "Conversation ID", "variable_id": "Variable ID"})
@service_api_ns.doc(params={"c_id": "Conversation ID.", "variable_id": "Variable ID."})
@service_api_ns.doc(
responses={
200: "Variable updated successfully",

View File

@ -25,7 +25,10 @@ logger = logging.getLogger(__name__)
class FilePreviewQuery(BaseModel):
as_attachment: bool = Field(default=False, description="Download as attachment")
as_attachment: bool = Field(
default=False,
description="If `true`, forces the file to download as an attachment instead of previewing in browser.",
)
register_schema_model(service_api_ns, FilePreviewQuery)
@ -83,7 +86,14 @@ class FilePreviewApi(Resource):
@binary_response(service_api_ns, FILE_PREVIEW_RESPONSE_MEDIA_TYPES)
@service_api_ns.doc("preview_file")
@service_api_ns.doc(description="Preview or download a file uploaded via Service API")
@service_api_ns.doc(params={"file_id": "UUID of the file to preview"})
@service_api_ns.doc(
params={
"file_id": (
"The unique identifier of the file to preview, obtained from the "
"[Upload File](/api-reference/files/upload-file) API response."
)
}
)
@service_api_ns.doc(
responses={
200: "File retrieved successfully",

View File

@ -31,8 +31,8 @@ logger = logging.getLogger(__name__)
class FeedbackListQuery(BaseModel):
page: int = Field(default=1, ge=1, description="Page number")
limit: int = Field(default=20, ge=1, le=101, description="Number of feedbacks per page")
page: int = Field(default=1, ge=1, description="Page number for pagination.")
limit: int = Field(default=20, ge=1, le=101, description="Number of records per page.")
class AppFeedbackResponse(ResponseModel):
@ -142,7 +142,7 @@ class MessageFeedbackApi(Resource):
@service_api_ns.response(200, "Feedback submitted successfully", service_api_ns.models[ResultResponse.__name__])
@service_api_ns.doc("create_message_feedback")
@service_api_ns.doc(description="Submit feedback for a message")
@service_api_ns.doc(params={"message_id": "Message ID"})
@service_api_ns.doc(params={"message_id": "Message ID."})
@service_api_ns.doc(
responses={
200: "Feedback submitted successfully",

View File

@ -7,6 +7,7 @@ from dateutil.parser import isoparse
from flask import request
from flask_restx import Resource, fields
from pydantic import BaseModel, Field, field_validator
from pydantic.json_schema import SkipJsonSchema
from sqlalchemy.orm import sessionmaker
from werkzeug.exceptions import BadRequest, InternalServerError, NotFound
@ -58,19 +59,41 @@ logger = logging.getLogger(__name__)
class WorkflowRunPayload(WorkflowRunPayloadBase):
response_mode: Literal["blocking", "streaming"] | None = None
trace_session_id: str | None = Field(default=None, description="Trace session ID for observability grouping")
response_mode: Literal["blocking", "streaming"] | None = Field(
default=None,
description=(
"Response mode. Use `blocking` for synchronous responses or `streaming` for Server-Sent Events. "
"When omitted, the request runs in blocking mode."
),
)
trace_session_id: SkipJsonSchema[str | None] = Field(
default=None, description="Trace session ID for observability grouping"
)
class WorkflowLogQuery(BaseModel):
keyword: str | None = None
status: Literal["succeeded", "failed", "stopped"] | None = None
created_at__before: str | None = None
created_at__after: str | None = None
created_by_end_user_session_id: str | None = None
created_by_account: str | None = None
page: int = Field(default=1, ge=1, le=99999)
limit: int = Field(default=20, ge=1, le=100)
keyword: str | None = Field(default=None, description="Keyword to search in logs.")
status: Literal["succeeded", "failed", "stopped"] | None = Field(
default=None,
description="Filter by execution status.",
)
created_at__before: str | None = Field(
default=None,
description="Filter logs created before this ISO 8601 timestamp.",
json_schema_extra={"format": "date-time"},
)
created_at__after: str | None = Field(
default=None,
description="Filter logs created after this ISO 8601 timestamp.",
json_schema_extra={"format": "date-time"},
)
created_by_end_user_session_id: str | None = Field(
default=None,
description="Filter by end user session ID.",
)
created_by_account: str | None = Field(default=None, description="Filter by account ID.")
page: int = Field(default=1, ge=1, le=99999, description="Page number for pagination.")
limit: int = Field(default=20, ge=1, le=100, description="Number of items per page.")
register_schema_models(service_api_ns, WorkflowRunPayload, WorkflowLogQuery)
@ -226,7 +249,11 @@ class WorkflowRunDetailApi(Resource):
)
@service_api_ns.doc("get_workflow_run_detail")
@service_api_ns.doc(description="Get workflow run details")
@service_api_ns.doc(params={"workflow_run_id": "Workflow run ID"})
@service_api_ns.doc(
params={
"workflow_run_id": "Workflow run ID, obtained from the workflow execution response or streaming events."
}
)
@service_api_ns.doc(
responses={
200: "Workflow run details retrieved successfully",
@ -397,7 +424,14 @@ class WorkflowRunByIdApi(Resource):
@json_or_event_stream_response(service_api_ns)
@service_api_ns.doc("run_workflow_by_id")
@service_api_ns.doc(description="Execute a specific workflow by ID")
@service_api_ns.doc(params={"workflow_id": "Workflow ID to execute"})
@service_api_ns.doc(
params={
"workflow_id": (
"Workflow ID of the specific version to execute. This value is returned in the `workflow_id` field "
"of workflow run responses."
)
}
)
@service_api_ns.doc(
responses={
200: "Workflow executed successfully",
@ -482,7 +516,9 @@ class WorkflowTaskStopApi(Resource):
@expect_user_json(service_api_ns)
@service_api_ns.doc("stop_workflow_task")
@service_api_ns.doc(description="Stop a running workflow task")
@service_api_ns.doc(params={"task_id": "Task ID to stop"})
@service_api_ns.doc(
params={"task_id": "Task ID, obtained from the streaming chunk returned by the Run Workflow API."}
)
@service_api_ns.doc(
responses={
200: "Task stopped successfully",

View File

@ -32,9 +32,25 @@ from services.workflow_event_snapshot_service import build_workflow_event_stream
class WorkflowEventsQuery(BaseModel):
user: str = Field(..., description="End user identifier")
include_state_snapshot: bool = Field(default=False, description="Replay from persisted state snapshot")
continue_on_pause: bool = Field(default=False, description="Keep the stream open across workflow_paused events")
user: str = Field(
...,
description="End-user identifier that originally triggered the run. Must match the creator of the run.",
)
include_state_snapshot: bool = Field(
default=False,
description=(
"When `true`, replay from the persisted state snapshot to include a status summary of already-executed "
"nodes before streaming new events."
),
)
continue_on_pause: bool = Field(
default=False,
description=(
"Set to `true` to keep the stream open across multiple `workflow_paused` events, which is useful when "
"the workflow has more than one Human Input node in sequence. By default, the stream closes after the "
"first pause."
),
)
register_schema_models(service_api_ns, WorkflowEventsQuery)
@ -65,7 +81,7 @@ class WorkflowEventsApi(Resource):
@event_stream_response(service_api_ns)
@service_api_ns.doc("get_workflow_events")
@service_api_ns.doc(description="Get workflow execution events stream after resume")
@service_api_ns.doc(params={"task_id": "Workflow run ID"})
@service_api_ns.doc(params={"task_id": "Workflow run ID returned by the original workflow run request."})
@service_api_ns.doc(params=query_params_from_model(WorkflowEventsQuery))
@service_api_ns.doc(
responses={

View File

@ -1,8 +1,17 @@
from typing import Any, Literal, override
from typing import Annotated, Literal, override
from uuid import UUID
from flask import request
from pydantic import BaseModel, ConfigDict, Field, GetJsonSchemaHandler, RootModel, field_validator, model_validator
from pydantic import (
BaseModel,
ConfigDict,
Field,
GetJsonSchemaHandler,
RootModel,
WithJsonSchema,
field_validator,
model_validator,
)
from werkzeug.exceptions import Forbidden, NotFound
import services
@ -33,7 +42,12 @@ from models.dataset import DatasetPermissionEnum
from models.enums import TagType
from models.provider_ids import ModelProviderID
from services.dataset_service import DatasetPermissionService, DatasetService, DocumentService
from services.entities.knowledge_entities.knowledge_entities import RetrievalModel
from services.entities.knowledge_entities.knowledge_entities import (
ExternalRetrievalModel,
KnowledgeProvider,
RetrievalModel,
SummaryIndexSetting,
)
from services.tag_service import (
SaveTagPayload,
TagBindingCreatePayload,
@ -46,37 +60,122 @@ from services.tag_service import (
register_enum_models(service_api_ns, DatasetPermissionEnum)
PartialMemberList = Annotated[
list[dict[str, str]] | None,
WithJsonSchema(
{
"anyOf": [
{
"items": {
"properties": {
"user_id": {
"description": "ID of the team member to grant access.",
"type": "string",
}
},
"type": "object",
},
"type": "array",
},
{"type": "null"},
]
}
),
]
class DatasetCreatePayload(BaseModel):
name: str = Field(..., min_length=1, max_length=40)
description: str = Field(default="", description="Dataset description (max 400 chars)", max_length=400)
indexing_technique: Literal["high_quality", "economy"] | None = None
permission: DatasetPermissionEnum | None = DatasetPermissionEnum.ONLY_ME
external_knowledge_api_id: str | None = None
provider: str = "vendor"
external_knowledge_id: str | None = None
retrieval_model: RetrievalModel | None = None
embedding_model: str | None = None
embedding_model_provider: str | None = None
summary_index_setting: dict | None = Field(default=None)
name: str = Field(..., min_length=1, max_length=40, description="Name of the knowledge base.")
description: str = Field(default="", description="Description of the knowledge base.", max_length=400)
indexing_technique: Literal["high_quality", "economy"] | None = Field(
default=None,
description="`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing.",
)
permission: DatasetPermissionEnum | None = Field(
default=DatasetPermissionEnum.ONLY_ME,
description=(
"Controls who can access this knowledge base. `only_me` restricts access to the creator, "
"`all_team_members` grants workspace-wide access, and `partial_members` grants access to specified "
"members."
),
)
external_knowledge_api_id: str | None = Field(default=None, description="ID of the external knowledge API.")
provider: KnowledgeProvider = Field(
default="vendor",
description="Knowledge base provider: `vendor` for internal knowledge bases, `external` for external ones.",
)
external_knowledge_id: str | None = Field(default=None, description="ID of the external knowledge base.")
retrieval_model: RetrievalModel | None = Field(
default=None,
description="Retrieval model configuration. Controls how chunks are searched and ranked.",
)
embedding_model: str | None = Field(
default=None,
description=(
"Embedding model name. Use the `model` field from "
"[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
),
)
embedding_model_provider: str | None = Field(
default=None,
description=(
"Embedding model provider. Use the `provider` field from "
"[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
),
)
summary_index_setting: SummaryIndexSetting = Field(
default=None,
description="Summary index configuration.",
)
class DatasetUpdatePayload(BaseModel):
name: str | None = Field(default=None, min_length=1, max_length=40)
description: str | None = Field(default=None, description="Dataset description (max 400 chars)", max_length=400)
indexing_technique: Literal["high_quality", "economy"] | None = None
permission: DatasetPermissionEnum | None = None
embedding_model: str | None = None
embedding_model_provider: str | None = None
retrieval_model: RetrievalModel | None = None
partial_member_list: list[dict[str, str]] | None = None
external_retrieval_model: dict[str, Any] | None = Field(default=None)
external_knowledge_id: str | None = None
external_knowledge_api_id: str | None = None
name: str | None = Field(default=None, min_length=1, max_length=40, description="Name of the knowledge base.")
description: str | None = Field(default=None, description="Description of the knowledge base.", max_length=400)
indexing_technique: Literal["high_quality", "economy"] | None = Field(
default=None,
description="`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing.",
)
permission: DatasetPermissionEnum | None = Field(
default=None,
description=(
"Controls who can access this knowledge base. `only_me` restricts access to the creator, "
"`all_team_members` grants workspace-wide access, and `partial_members` grants access to specified "
"members."
),
)
embedding_model: str | None = Field(
default=None,
description=(
"Embedding model name. Use the `model` field from "
"[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
),
)
embedding_model_provider: str | None = Field(
default=None,
description=(
"Embedding model provider. Use the `provider` field from "
"[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
),
)
retrieval_model: RetrievalModel | None = Field(
default=None,
description="Retrieval model configuration. Controls how chunks are searched and ranked.",
)
partial_member_list: PartialMemberList = Field(
default=None,
description="List of team members with access when `permission` is `partial_members`.",
)
external_retrieval_model: ExternalRetrievalModel = Field(
default=None,
description="Retrieval settings for external knowledge bases.",
)
external_knowledge_id: str | None = Field(default=None, description="ID of the external knowledge base.")
external_knowledge_api_id: str | None = Field(default=None, description="ID of the external knowledge API.")
class DocumentStatusPayload(BaseModel):
document_ids: list[str] = Field(default_factory=list, description="Document IDs to update")
document_ids: list[str] = Field(default_factory=list, description="List of document IDs to update.")
DOCUMENT_STATUS_ACTION_PARAM = {
@ -87,7 +186,7 @@ DOCUMENT_STATUS_ACTION_PARAM = {
class TagNamePayload(BaseModel):
name: str = Field(..., min_length=1, max_length=50)
name: str = Field(..., min_length=1, max_length=50, description="Tag name.")
class TagCreatePayload(TagNamePayload):
@ -95,16 +194,16 @@ class TagCreatePayload(TagNamePayload):
class TagUpdatePayload(TagNamePayload):
tag_id: str
tag_id: str = Field(description="Tag ID to update.")
class TagDeletePayload(BaseModel):
tag_id: str
tag_id: str = Field(description="Tag ID to delete.")
class TagBindingPayload(BaseModel):
tag_ids: list[str]
target_id: str
tag_ids: list[str] = Field(description="Tag IDs to bind.")
target_id: str = Field(description="Knowledge base ID to bind the tags to.")
@field_validator("tag_ids")
@classmethod
@ -119,7 +218,7 @@ class TagUnbindingPayload(BaseModel):
tag_ids: list[str] = Field(default_factory=list)
tag_id: str | None = None
target_id: str
target_id: str = Field(description="Knowledge base ID.")
@classmethod
@override
@ -134,7 +233,7 @@ class TagUnbindingPayload(BaseModel):
"minItems": 1,
"type": "array",
}
target_id_property = {"title": "Target Id", "type": "string"}
target_id_property = {"description": "Knowledge base ID.", "title": "Target Id", "type": "string"}
return {
"anyOf": [
{
@ -192,11 +291,14 @@ class KnowledgeTagListResponse(RootModel[list[KnowledgeTagResponse]]):
class DatasetListQuery(BaseModel):
page: int = Field(default=1, description="Page number")
limit: int = Field(default=20, description="Number of items per page")
keyword: str | None = Field(default=None, description="Search keyword")
include_all: bool = Field(default=False, description="Include all datasets")
tag_ids: list[str] = Field(default_factory=list, description="Filter by tag IDs")
page: int = Field(default=1, description="Page number to retrieve.")
limit: int = Field(default=20, description="Number of items per page. Server caps at `100`.")
keyword: str | None = Field(default=None, description="Search keyword to filter by name.")
include_all: bool = Field(
default=False,
description="Whether to include all knowledge bases regardless of permissions.",
)
tag_ids: list[str] = Field(default_factory=list, description="Tag IDs to filter by.")
class DatasetDetailWithPartialMembersResponse(DatasetDetailResponse):
@ -409,7 +511,7 @@ class DatasetApi(DatasetApiResource):
)
@service_api_ns.doc("get_dataset")
@service_api_ns.doc(description="Get a specific dataset by ID")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(
responses={
200: "Dataset retrieved successfully",
@ -488,7 +590,7 @@ class DatasetApi(DatasetApiResource):
@service_api_ns.expect(service_api_ns.models[DatasetUpdatePayload.__name__])
@service_api_ns.doc("update_dataset")
@service_api_ns.doc(description="Update an existing dataset")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(
responses={
200: "Dataset updated successfully",
@ -585,7 +687,7 @@ class DatasetApi(DatasetApiResource):
)
@service_api_ns.doc("delete_dataset")
@service_api_ns.doc(description="Delete a dataset")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(
responses={
204: "Dataset deleted successfully",
@ -648,7 +750,7 @@ class DocumentStatusApi(DatasetApiResource):
@service_api_ns.doc(description="Batch update document status")
@service_api_ns.doc(
params={
"dataset_id": "Dataset ID",
"dataset_id": "Knowledge base ID.",
"action": DOCUMENT_STATUS_ACTION_PARAM,
}
)
@ -927,7 +1029,7 @@ class DatasetTagsBindingStatusApi(DatasetApiResource):
)
@service_api_ns.doc("get_dataset_tags_binding_status")
@service_api_ns.doc(description="Get tags bound to a specific dataset")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(
responses={
200: "Tags retrieved successfully",

View File

@ -9,11 +9,11 @@ import json
from collections.abc import Mapping
from contextlib import ExitStack
from copy import deepcopy
from typing import Any, Literal, Self, override
from typing import Annotated, Any, Literal, Self, override
from uuid import UUID
from flask import request, send_file
from pydantic import BaseModel, Field, GetJsonSchemaHandler, field_validator, model_validator
from pydantic import BaseModel, Field, GetJsonSchemaHandler, WithJsonSchema, field_validator, model_validator
from sqlalchemy import desc, func, select
from werkzeug.exceptions import Forbidden, NotFound
@ -63,6 +63,8 @@ from models.dataset import Dataset, Document, DocumentSegment
from models.enums import SegmentStatus
from services.dataset_service import DatasetService, DocumentService
from services.entities.knowledge_entities.knowledge_entities import (
DocForm,
IndexingTechnique,
KnowledgeConfig,
ProcessRule,
RetrievalModel,
@ -72,16 +74,44 @@ from services.summary_index_service import SummaryIndexService
class DocumentTextCreatePayload(BaseModel):
name: str
text: str
process_rule: ProcessRule | None = None
original_document_id: str | None = None
doc_form: str = Field(default="text_model")
doc_language: str = Field(default="English")
indexing_technique: str | None = None
retrieval_model: RetrievalModel | None = None
embedding_model: str | None = None
embedding_model_provider: str | None = None
name: str = Field(description="Document name.")
text: str = Field(description="Document text content.")
process_rule: ProcessRule | None = Field(default=None, description="Processing rules for chunking.")
original_document_id: str | None = Field(default=None, description="Original document ID for replacement.")
doc_form: DocForm = Field(
default="text_model",
description=(
"`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, "
"`qa_model` for question-answer pair extraction."
),
)
doc_language: str = Field(default="English", description="Language of the document for processing optimization.")
indexing_technique: IndexingTechnique = Field(
default=None,
description=(
"`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing. "
"Required when adding the first document to a knowledge base; subsequent documents inherit the "
"knowledge base's indexing technique if omitted."
),
)
retrieval_model: RetrievalModel | None = Field(
default=None,
description="Retrieval model configuration. Controls how chunks are searched and ranked.",
)
embedding_model: str | None = Field(
default=None,
description=(
"Embedding model name. Use the `model` field from "
"[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
),
)
embedding_model_provider: str | None = Field(
default=None,
description=(
"Embedding model provider. Use the `provider` field from "
"[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
),
)
@field_validator("doc_form")
@classmethod
@ -92,12 +122,21 @@ class DocumentTextCreatePayload(BaseModel):
class DocumentTextUpdate(BaseModel):
name: str | None = None
text: str | None = None
process_rule: ProcessRule | None = None
doc_form: str = "text_model"
doc_language: str = "English"
retrieval_model: RetrievalModel | None = None
name: str | None = Field(default=None, description="Document name. Required when `text` is provided.")
text: str | None = Field(default=None, description="Document text content.")
process_rule: ProcessRule | None = Field(default=None, description="Processing rules for chunking.")
doc_form: DocForm = Field(
default="text_model",
description=(
"`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, "
"`qa_model` for question-answer pair extraction."
),
)
doc_language: str = Field(default="English", description="Language of the document for processing optimization.")
retrieval_model: RetrievalModel | None = Field(
default=None,
description="Retrieval model configuration. Controls how chunks are searched and ranked.",
)
@field_validator("doc_form")
@classmethod
@ -119,7 +158,7 @@ class DocumentTextUpdate(BaseModel):
text_branch_properties["name"] = _non_null_property_schema(properties.get("name"))
no_text_branch_properties = deepcopy(properties)
no_text_branch_properties["text"] = {"type": "null"}
no_text_branch_properties["text"] = {"description": "Document text content.", "type": "null"}
return {
**schema,
@ -161,19 +200,41 @@ def _non_null_property_schema(property_schema: object) -> dict[str, Any]:
return deepcopy(property_schema)
DocumentDisplayStatus = Annotated[
str | None,
WithJsonSchema(
{
"anyOf": [
{
"enum": ["queuing", "indexing", "paused", "error", "available", "disabled", "archived"],
"type": "string",
},
{"type": "null"},
]
}
),
]
class DocumentListQuery(BaseModel):
page: int = Field(default=1, description="Page number")
limit: int = Field(default=20, description="Number of items per page")
keyword: str | None = Field(default=None, description="Search keyword")
status: str | None = Field(default=None, description="Document status filter")
page: int = Field(default=1, description="Page number to retrieve.")
limit: int = Field(default=20, description="Number of items per page. Server caps at `100`.")
keyword: str | None = Field(default=None, description="Search keyword to filter by document name.")
status: DocumentDisplayStatus = Field(default=None, description="Filter by display status.")
class DocumentGetQuery(BaseModel):
metadata: Literal["all", "only", "without"] = Field(default="all", description="Metadata response mode")
metadata: Literal["all", "only", "without"] = Field(
default="all",
description=(
"`all` returns all fields including metadata. `only` returns only `id`, `doc_type`, and "
"`doc_metadata`. `without` returns all fields except `doc_metadata`."
),
)
DOCUMENT_CREATE_BY_FILE_PARAMS = {
"dataset_id": "Dataset ID",
"dataset_id": "Knowledge base ID.",
"file": {
"in": "formData",
"type": "file",
@ -184,23 +245,32 @@ DOCUMENT_CREATE_BY_FILE_PARAMS = {
"in": "formData",
"type": "string",
"required": False,
"description": "Optional JSON string with document creation settings.",
"description": (
"JSON string containing configuration. Accepts the same fields as "
"[Create Document by Text](/api-reference/documents/create-document-by-text) (`indexing_technique`, "
"`doc_form`, `doc_language`, `process_rule`, `retrieval_model`, `embedding_model`, "
"`embedding_model_provider`) except `name` and `text`."
),
},
}
DOCUMENT_UPDATE_BY_FILE_PARAMS = {
"dataset_id": "Dataset ID",
"document_id": "Document ID",
"dataset_id": "Knowledge base ID.",
"document_id": "Document ID.",
"file": {
"in": "formData",
"type": "file",
"required": False,
"description": "Replacement document file.",
"description": "Replacement document file to upload.",
},
"data": {
"in": "formData",
"type": "string",
"required": False,
"description": "Optional JSON string with document update settings.",
"description": (
"JSON string containing document update settings such as `doc_form`, `doc_language`, `process_rule`, "
"`retrieval_model`, `embedding_model`, and `embedding_model_provider`. `name` and `text` are not used "
"for file updates."
),
},
}
@ -422,7 +492,7 @@ class DocumentAddByTextApi(DatasetApiResource):
@service_api_ns.expect(service_api_ns.models[DocumentTextCreatePayload.__name__])
@service_api_ns.doc("create_document_by_text")
@service_api_ns.doc(description="Create a new document by providing text content")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(
responses={
200: "Document created successfully",
@ -454,7 +524,7 @@ class DeprecatedDocumentAddByTextApi(DatasetApiResource):
"Use /datasets/{dataset_id}/document/create-by-text instead."
)
)
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(
responses={
200: "Document created successfully",
@ -499,7 +569,7 @@ class DocumentUpdateByTextApi(DatasetApiResource):
@service_api_ns.expect(service_api_ns.models[DocumentTextUpdate.__name__])
@service_api_ns.doc("update_document_by_text")
@service_api_ns.doc(description="Update an existing document by providing text content")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."})
@service_api_ns.doc(
responses={
200: "Document updated successfully",
@ -530,7 +600,7 @@ class DeprecatedDocumentUpdateByTextApi(DatasetApiResource):
"Use /datasets/{dataset_id}/documents/{document_id}/update-by-text instead."
)
)
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."})
@service_api_ns.doc(
responses={
200: "Document updated successfully",
@ -839,7 +909,7 @@ class DocumentListApi(DatasetApiResource):
)
@service_api_ns.doc("list_documents")
@service_api_ns.doc(description="List all documents in a dataset")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", **query_params_from_model(DocumentListQuery)})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", **query_params_from_model(DocumentListQuery)})
@service_api_ns.doc(
responses={
200: "Documents retrieved successfully",
@ -913,7 +983,7 @@ class DocumentBatchDownloadZipApi(DatasetApiResource):
@service_api_ns.expect(service_api_ns.models[DocumentBatchDownloadZipPayload.__name__])
@service_api_ns.doc("download_documents_as_zip")
@service_api_ns.doc(description="Download selected uploaded documents as a single ZIP archive")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(
responses={
200: "ZIP archive generated successfully",
@ -965,7 +1035,7 @@ class DocumentIndexingStatusApi(DatasetApiResource):
)
@service_api_ns.doc("get_document_indexing_status")
@service_api_ns.doc(description="Get indexing status for documents in a batch")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "batch": "Batch ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "batch": "Batch ID."})
@service_api_ns.doc(
responses={
200: "Indexing status retrieved successfully",
@ -1047,7 +1117,7 @@ class DocumentDownloadApi(DatasetApiResource):
)
@service_api_ns.doc("get_document_download_url")
@service_api_ns.doc(description="Get a signed download URL for a document's original uploaded file")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."})
@service_api_ns.doc(
responses={
200: "Download URL generated successfully",
@ -1099,7 +1169,7 @@ class DocumentApi(DatasetApiResource):
)
@service_api_ns.doc("get_document")
@service_api_ns.doc(description="Get a specific document by ID")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."})
@service_api_ns.doc(params=query_params_from_model(DocumentGetQuery))
@service_api_ns.doc(
responses={
@ -1251,7 +1321,7 @@ class DocumentApi(DatasetApiResource):
)
@service_api_ns.doc("delete_document")
@service_api_ns.doc(description="Delete a document")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "document_id": "Document ID."})
@service_api_ns.doc(
responses={
204: "Document deleted successfully",

View File

@ -41,7 +41,7 @@ class HitTestingApi(DatasetApiResource, DatasetsHitTestingBase):
)
@service_api_ns.doc("dataset_hit_testing")
@service_api_ns.doc(description="Perform hit testing on a dataset")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.response(
200,
"Hit testing results",

View File

@ -25,7 +25,7 @@ from services.entities.knowledge_entities.knowledge_entities import (
from services.metadata_service import MetadataService
BUILT_IN_METADATA_ACTION_PARAM = {
"description": "Action to perform: 'enable' or 'disable'",
"description": "`enable` to activate built-in metadata fields, `disable` to deactivate them.",
"enum": ["enable", "disable"],
"type": "string",
}
@ -63,7 +63,7 @@ class DatasetMetadataCreateServiceApi(DatasetApiResource):
@service_api_ns.expect(service_api_ns.models[MetadataArgs.__name__])
@service_api_ns.doc("create_dataset_metadata")
@service_api_ns.doc(description="Create metadata for a dataset")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(
responses={
201: "Metadata created successfully",
@ -101,7 +101,7 @@ class DatasetMetadataCreateServiceApi(DatasetApiResource):
)
@service_api_ns.doc("get_dataset_metadata")
@service_api_ns.doc(description="Get all metadata for a dataset")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(
responses={
200: "Metadata retrieved successfully",
@ -135,7 +135,7 @@ class DatasetMetadataServiceApi(DatasetApiResource):
@service_api_ns.expect(service_api_ns.models[MetadataUpdatePayload.__name__])
@service_api_ns.doc("update_dataset_metadata")
@service_api_ns.doc(description="Update metadata name")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "metadata_id": "Metadata ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "metadata_id": "Metadata field ID."})
@service_api_ns.doc(
responses={
200: "Metadata updated successfully",
@ -174,7 +174,7 @@ class DatasetMetadataServiceApi(DatasetApiResource):
)
@service_api_ns.doc("delete_dataset_metadata")
@service_api_ns.doc(description="Delete metadata")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "metadata_id": "Metadata ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "metadata_id": "Metadata field ID."})
@service_api_ns.doc(
responses={
204: "Metadata deleted successfully",
@ -211,6 +211,7 @@ class DatasetMetadataBuiltInFieldServiceApi(DatasetApiResource):
)
@service_api_ns.doc("get_built_in_fields")
@service_api_ns.doc(description="Get all built-in metadata fields")
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(
responses={
200: "Built-in fields retrieved successfully",
@ -240,7 +241,7 @@ class DatasetMetadataBuiltInFieldActionServiceApi(DatasetApiResource):
)
@service_api_ns.doc("toggle_built_in_field")
@service_api_ns.doc(description="Enable or disable built-in metadata field")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "action": BUILT_IN_METADATA_ACTION_PARAM})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "action": BUILT_IN_METADATA_ACTION_PARAM})
@service_api_ns.doc(
responses={
200: "Action completed successfully",
@ -284,7 +285,7 @@ class DocumentMetadataEditServiceApi(DatasetApiResource):
@service_api_ns.expect(service_api_ns.models[MetadataOperationData.__name__])
@service_api_ns.doc("update_documents_metadata")
@service_api_ns.doc(description="Update metadata for multiple documents")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(
responses={
200: "Documents metadata updated successfully",

View File

@ -37,6 +37,7 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError
from services.file_service import FileService
from services.rag_pipeline.entity.pipeline_service_api_entities import (
DatasourceNodeRunApiEntity,
DatasourceType,
PipelineRunApiEntity,
)
from services.rag_pipeline.pipeline_generate_service import PipelineGenerateService
@ -44,14 +45,27 @@ from services.rag_pipeline.rag_pipeline import RagPipelineService
class DatasourceNodeRunPayload(BaseModel):
inputs: dict[str, Any]
datasource_type: str
credential_id: str | None = None
is_published: bool
inputs: dict[str, Any] = Field(description="Input variables for the datasource node.")
datasource_type: DatasourceType = Field(description="Type of the datasource.")
credential_id: str | None = Field(
default=None, description="Datasource credential ID. Uses the default if omitted."
)
is_published: bool = Field(
description=(
"Whether to run the published or draft version of the node. `true` runs the published version, "
"`false` runs the draft."
)
)
class DatasourcePluginsQuery(BaseModel):
is_published: bool = True
is_published: bool = Field(
default=True,
description=(
"Whether to retrieve nodes from the published or draft pipeline. `true` returns nodes from the published "
"version, `false` returns nodes from the draft."
),
)
class DatasourceCredentialInfoResponse(ResponseModel):
@ -114,11 +128,7 @@ class DatasourcePluginsApi(DatasetApiResource):
)
@service_api_ns.doc(shortcut="list_rag_pipeline_datasource_plugins")
@service_api_ns.doc(description="List all datasource plugins for a rag pipeline")
@service_api_ns.doc(
path={
"dataset_id": "Dataset ID",
}
)
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(params=query_params_from_model(DatasourcePluginsQuery))
@service_api_ns.doc(
responses={
@ -169,11 +179,7 @@ class DatasourceNodeRunApi(DatasetApiResource):
@event_stream_response(service_api_ns)
@service_api_ns.doc(shortcut="pipeline_datasource_node_run")
@service_api_ns.doc(description="Run a datasource node for a rag pipeline")
@service_api_ns.doc(
path={
"dataset_id": "Dataset ID",
}
)
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID.", "node_id": "ID of the datasource node to execute."})
@service_api_ns.doc(
responses={
200: "Datasource node run successfully",
@ -245,11 +251,7 @@ class PipelineRunApi(DatasetApiResource):
@json_or_event_stream_response(service_api_ns)
@service_api_ns.doc(shortcut="pipeline_datasource_node_run")
@service_api_ns.doc(description="Run a datasource node for a rag pipeline")
@service_api_ns.doc(
path={
"dataset_id": "Dataset ID",
}
)
@service_api_ns.doc(params={"dataset_id": "Knowledge base ID."})
@service_api_ns.doc(
responses={
200: "Pipeline run successfully",

View File

@ -47,10 +47,10 @@ from services.summary_index_service import SummaryIndexService
class SegmentCreateItemPayload(BaseModel):
content: str = Field(min_length=1)
answer: str | None = None
keywords: list[str] | None = None
attachment_ids: list[str] | None = None
content: str = Field(min_length=1, description="Chunk text content.")
answer: str | None = Field(default=None, description="Answer content for QA mode.")
keywords: list[str] | None = Field(default=None, description="Keywords for the chunk.")
attachment_ids: list[str] | None = Field(default=None, description="Attachment file IDs.")
@field_validator("content")
@classmethod
@ -61,31 +61,34 @@ class SegmentCreateItemPayload(BaseModel):
class SegmentCreatePayload(BaseModel):
segments: list[SegmentCreateItemPayload] = Field(min_length=1)
segments: list[SegmentCreateItemPayload] = Field(min_length=1, description="Array of chunk objects to create.")
class SegmentListQuery(BaseModel):
limit: int = Field(default=20, ge=1)
page: int = Field(default=1, ge=1)
status: list[str] = Field(default_factory=list)
keyword: str | None = None
limit: int = Field(default=20, ge=1, description="Number of items per page. Server caps at `100`.")
page: int = Field(default=1, ge=1, description="Page number to retrieve.")
status: list[str] = Field(
default_factory=list,
description="Filter chunks by indexing status, such as `completed`, `indexing`, or `error`.",
)
keyword: str | None = Field(default=None, description="Search keyword.")
class SegmentUpdatePayload(BaseModel):
segment: SegmentUpdateArgs
segment: SegmentUpdateArgs = Field(description="Chunk update payload.")
class ChildChunkListQuery(BaseModel):
limit: int = Field(default=20, ge=1)
keyword: str | None = None
page: int = Field(default=1, ge=1)
limit: int = Field(default=20, ge=1, description="Number of items per page. Server caps at `100`.")
keyword: str | None = Field(default=None, description="Search keyword.")
page: int = Field(default=1, ge=1, description="Page number to retrieve.")
class SegmentDocParams:
DATASET_DOCUMENT = {"dataset_id": "Dataset ID", "document_id": "Document ID"}
DATASET_DOCUMENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Segment ID"}
DATASET_DOCUMENT_PARENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Parent segment ID"}
DATASET_DOCUMENT_CHILD_CHUNK = {**DATASET_DOCUMENT_PARENT_SEGMENT, "child_chunk_id": "Child chunk ID"}
DATASET_DOCUMENT = {"dataset_id": "Knowledge base ID.", "document_id": "Document ID."}
DATASET_DOCUMENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Chunk ID."}
DATASET_DOCUMENT_PARENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Chunk ID."}
DATASET_DOCUMENT_CHILD_CHUNK = {**DATASET_DOCUMENT_PARENT_SEGMENT, "child_chunk_id": "Child chunk ID."}
class SegmentCreateListResponse(ResponseModel):

View File

@ -8,19 +8,69 @@ from __future__ import annotations
from collections.abc import Sequence
from copy import deepcopy
from typing import cast
from typing import Annotated, Any, cast
from flask_restx import Namespace
from pydantic import BaseModel
from pydantic import BaseModel, WithJsonSchema
USER_PROPERTY_SCHEMA: dict[str, object] = {"description": "End user identifier", "type": "string"}
USER_QUERY_PARAM: dict[str, object] = {"description": "End user identifier", "in": "query", "type": "string"}
USER_FORM_PARAM: dict[str, object] = {"description": "End user identifier", "in": "formData", "type": "string"}
FILE_FORM_PARAM: dict[str, object] = {"in": "formData", "required": True, "type": "file"}
USER_DESCRIPTION = (
"User identifier, unique within the application. This identifier scopes data access; resources created with "
"one `user` value are only visible when queried with the same `user` value."
)
USER_PROPERTY_SCHEMA: dict[str, object] = {"description": USER_DESCRIPTION, "type": "string"}
USER_QUERY_PARAM: dict[str, object] = {
"description": "User identifier, used for end-user context.",
"in": "query",
"type": "string",
}
USER_FORM_PARAM: dict[str, object] = {
"description": USER_DESCRIPTION,
"in": "formData",
"type": "string",
}
FILE_FORM_PARAM: dict[str, object] = {
"description": "The file to upload.",
"in": "formData",
"required": True,
"type": "file",
}
USER_FETCH_FROM_ATTR = "_dify_service_api_user_fetch_from"
USER_REQUIRED_ATTR = "_dify_service_api_user_required"
JSON_USER_FETCH_FROM = "JSON"
INPUT_FILE_ITEM_SCHEMA: dict[str, object] = {
"type": "object",
"required": ["type", "transfer_method"],
"properties": {
"type": {
"description": "File type.",
"enum": ["document", "image", "audio", "video", "custom"],
"type": "string",
},
"transfer_method": {
"description": "Transfer method: `remote_url` for file URL, `local_file` for uploaded file.",
"enum": ["remote_url", "local_file"],
"type": "string",
},
"url": {
"description": "File URL when `transfer_method` is `remote_url`.",
"format": "url",
"type": "string",
},
"upload_file_id": {
"description": (
"Uploaded file ID obtained from the [Upload File](/api-reference/files/upload-file) API when "
"`transfer_method` is `local_file`."
),
"type": "string",
},
},
}
INPUT_FILE_LIST_SCHEMA: dict[str, object] = {
"anyOf": [{"items": INPUT_FILE_ITEM_SCHEMA, "type": "array"}, {"type": "null"}]
}
InputFileList = Annotated[list[dict[str, Any]] | None, WithJsonSchema(INPUT_FILE_LIST_SCHEMA)]
def expect_with_user(namespace: Namespace, model: type[BaseModel]):
"""Document a JSON request body as ``model`` plus Service API ``user``."""
@ -54,8 +104,12 @@ def expect_user_json(namespace: Namespace):
return decorator
def multipart_file_params(*, include_user: bool) -> dict[str, dict[str, object]]:
params: dict[str, dict[str, object]] = {"file": FILE_FORM_PARAM}
def multipart_file_params(*, include_user: bool, file_description: str | None = None) -> dict[str, dict[str, object]]:
file_param = deepcopy(FILE_FORM_PARAM)
if file_description is not None:
file_param["description"] = file_description
params: dict[str, dict[str, object]] = {"file": file_param}
if include_user:
params["user"] = USER_FORM_PARAM
return deepcopy(params)

View File

@ -9,6 +9,12 @@ from graphon.model_runtime.utils.encoders import jsonable_encoder
from services.entities.model_provider_entities import ProviderWithModelsResponse
from services.model_provider_service import ModelProviderService
MODEL_TYPE_PARAM = {
"description": "Type of model to retrieve.",
"enum": ["text-embedding", "rerank", "llm", "tts", "speech2text", "moderation"],
"type": "string",
}
class ProviderWithModelsListResponse(ResponseModel):
data: list[ProviderWithModelsResponse]
@ -32,7 +38,7 @@ class ModelProviderAvailableModelApi(Resource):
)
@service_api_ns.doc("get_available_models")
@service_api_ns.doc(description="Get available models by model type")
@service_api_ns.doc(params={"model_type": "Type of model to retrieve"})
@service_api_ns.doc(params={"model_type": MODEL_TYPE_PARAM})
@service_api_ns.doc(
responses={
200: "Models retrieved successfully",

View File

@ -1,7 +1,7 @@
from collections.abc import Sequence
from typing import Literal
from typing import Annotated, Literal
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, WithJsonSchema
SupportedComparisonOperator = Literal[
# for string or array
@ -26,6 +26,19 @@ SupportedComparisonOperator = Literal[
"before",
"after",
]
ConditionValue = Annotated[
str | Sequence[str] | None | int | float,
WithJsonSchema(
{
"anyOf": [
{"type": "string"},
{"items": {"type": "string"}, "type": "array"},
{"type": "number"},
{"type": "null"},
]
}
),
]
class Condition(BaseModel):
@ -33,9 +46,23 @@ class Condition(BaseModel):
Condition detail
"""
name: str
comparison_operator: SupportedComparisonOperator
value: str | Sequence[str] | None | int | float = None
name: str = Field(description="Metadata field name to compare against.")
comparison_operator: SupportedComparisonOperator = Field(
description=(
"Comparison to apply. String operators (`contains`, `not contains`, `start with`, `end with`, `is`, "
"`is not`, `empty`, `not empty`, `in`, `not in`) act on string or array metadata; numeric operators "
"(`=`, `≠`, `>`, `<`, `≥`, `≤`) act on numeric metadata; time operators (`before`, `after`) act on "
"time metadata."
)
)
value: ConditionValue = Field(
default=None,
description=(
"Value to compare against. Type depends on `comparison_operator`: string for most string operators, "
"array of strings for `in` and `not in`, number for numeric operators, and omit or use `null` for "
"`empty` and `not empty`."
),
)
class MetadataFilteringCondition(BaseModel):
@ -43,5 +70,12 @@ class MetadataFilteringCondition(BaseModel):
Metadata Filtering Condition.
"""
logical_operator: Literal["and", "or"] | None = "and"
conditions: list[Condition] | None = Field(default=None, deprecated=True)
logical_operator: Literal["and", "or"] | None = Field(
default="and",
description="How to combine multiple conditions.",
)
conditions: list[Condition] | None = Field(
default=None,
deprecated=True,
description="List of metadata conditions to evaluate.",
)

View File

@ -1,7 +1,7 @@
from enum import StrEnum
from typing import Literal
from typing import Annotated, Literal
from pydantic import BaseModel
from pydantic import BaseModel, Field, WithJsonSchema
class ParentMode(StrEnum):
@ -9,19 +9,39 @@ class ParentMode(StrEnum):
PARAGRAPH = "paragraph"
PreProcessingRuleID = Annotated[
str,
WithJsonSchema(
{
"enum": ["remove_stopwords", "remove_extra_spaces", "remove_urls_emails"],
"type": "string",
}
),
]
class PreProcessingRule(BaseModel):
id: str
enabled: bool
id: PreProcessingRuleID = Field(description="Rule identifier.")
enabled: bool = Field(description="Whether this preprocessing rule is enabled.")
class Segmentation(BaseModel):
separator: str = "\n"
max_tokens: int
chunk_overlap: int = 0
separator: str = Field(default="\n", description="Custom separator for splitting text.")
max_tokens: int = Field(description="Maximum token count per chunk.")
chunk_overlap: int = Field(default=0, description="Token overlap between chunks.")
class Rule(BaseModel):
pre_processing_rules: list[PreProcessingRule] | None = None
segmentation: Segmentation | None = None
parent_mode: Literal["full-doc", "paragraph"] | None = None
subchunk_segmentation: Segmentation | None = None
pre_processing_rules: list[PreProcessingRule] | None = Field(
default=None,
description="Pre-processing rules to apply before segmentation.",
)
segmentation: Segmentation | None = Field(default=None, description="Parent chunk segmentation settings.")
parent_mode: Literal["full-doc", "paragraph"] | None = Field(
default=None,
description="Parent-child segmentation mode.",
)
subchunk_segmentation: Segmentation | None = Field(
default=None,
description="Child chunk segmentation settings.",
)

View File

@ -122,6 +122,7 @@ def install_swagger_compatibility() -> None:
original_description_for = Swagger.description_for
original_serialize_operation = Swagger.serialize_operation
original_parameters_and_request_body_for = Swagger.parameters_and_request_body_for
original_request_body_from_form_params = Swagger.request_body_from_form_params
original_as_dict = Swagger.as_dict
def get_or_create_inline_model(self: Swagger, nested_fields: dict[object, object]) -> object:
@ -203,6 +204,35 @@ def install_swagger_compatibility() -> None:
path[method] = operation
return not_none(path)
def request_body_from_form_params_with_file_description(self: Swagger, params: list[dict[str, object]]):
request_body = original_request_body_from_form_params(self, params)
for param in params:
if param.get("type") != "file":
continue
name = param.get("name")
description = param.get("description")
if not isinstance(name, str) or not isinstance(description, str):
continue
content = request_body.get("content")
if not isinstance(content, dict):
continue
multipart = content.get("multipart/form-data")
if not isinstance(multipart, dict):
continue
schema = multipart.get("schema")
if not isinstance(schema, dict):
continue
properties = schema.get("properties")
if not isinstance(properties, dict):
continue
file_schema = properties.get(name)
if isinstance(file_schema, dict):
file_schema["description"] = description
return request_body
def as_dict_with_inline_dict_support(self: Swagger):
# Temporary set RESTX_INCLUDE_ALL_MODELS = false to prevent "length changed while iterating" error
include_all_models = current_app.config.get("RESTX_INCLUDE_ALL_MODELS", False)
@ -219,5 +249,6 @@ def install_swagger_compatibility() -> None:
Swagger.description_for = description_for_with_explicit_summary
Swagger.serialize_operation = serialize_operation_with_explicit_summary_tags
Swagger.serialize_resource = serialize_resource_with_explicit_operation_tags
Swagger.request_body_from_form_params = request_body_from_form_params_with_file_description
Swagger.as_dict = as_dict_with_inline_dict_support
Swagger._dify_swagger_compatibility_installed = True

View File

@ -6390,9 +6390,9 @@ Request body:
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| conversation_id | query | Conversation UUID | Yes | string |
| first_id | query | First message ID for pagination | No | string |
| limit | query | Number of messages to return (1-100) | No | integer, <br>**Default:** 20 |
| conversation_id | query | Conversation ID. | Yes | string |
| first_id | query | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No | string |
| limit | query | Number of chat history messages to return per request. | No | integer, <br>**Default:** 20 |
| installed_app_id | path | | Yes | string (uuid) |
#### Responses
@ -13352,7 +13352,7 @@ Button styles for user actions.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| content | string | | Yes |
| content | string | Child chunk text content. | Yes |
#### ChildChunkDetailResponse
@ -13395,14 +13395,14 @@ Button styles for user actions.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| content | string | | Yes |
| id | string | | No |
| content | string | Child chunk text content. | Yes |
| id | string | Existing child chunk ID. Omit to create a new child chunk. | No |
#### ChildChunkUpdatePayload
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| content | string | | Yes |
| content | string | Child chunk text content. | Yes |
#### CliToolSuggestion
@ -13560,9 +13560,9 @@ Condition detail
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| comparison_operator | string, <br>**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
| name | string | | Yes |
| value | string<br>[ string ]<br>integer<br>number | | No |
| comparison_operator | string, <br>**Available values:** "<", "=", ">", "after", "before", "contains", "empty", "end with", "in", "is", "is not", "not contains", "not empty", "not in", "start with", "≠", "≤", "≥" | Comparison to apply. String operators (`contains`, `not contains`, `start with`, `end with`, `is`, `is not`, `empty`, `not empty`, `in`, `not in`) act on string or array metadata; numeric operators (`=`, `≠`, `>`, `<`, `≥`, `≤`) act on numeric metadata; time operators (`before`, `after`) act on time metadata.<br>*Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
| name | string | Metadata field name to compare against. | Yes |
| value | string<br>[ string ]<br>number | Value to compare against. Type depends on `comparison_operator`: string for most string operators, array of strings for `in` and `not in`, number for numeric operators, and omit or use `null` for `empty` and `not empty`. | No |
#### ConfigurateMethod
@ -13704,8 +13704,8 @@ Enum class for configurate method of provider model.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| auto_generate | boolean | | No |
| name | string | | No |
| auto_generate | boolean | Automatically generate the conversation name. When `true`, the `name` field is ignored. | No |
| name | string | Conversation name. Required when `auto_generate` is `false`. | No |
#### ConversationVariableResponse
@ -14682,15 +14682,15 @@ Request payload for bulk downloading documents as a zip archive.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| document_ids | [ string (uuid) ] | | Yes |
| document_ids | [ string (uuid) ] | List of document IDs to include in the ZIP download. | Yes |
#### DocumentMetadataOperation
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| document_id | string | | Yes |
| metadata_list | [ [MetadataDetail](#metadatadetail) ] | | Yes |
| partial_update | boolean | | No |
| document_id | string | Document ID whose metadata should be updated. | Yes |
| metadata_list | [ [MetadataDetail](#metadatadetail) ] | Metadata fields to update. | Yes |
| partial_update | boolean | Whether to partially update metadata, keeping existing values for unspecified fields. | No |
#### DocumentMetadataResponse
@ -15472,10 +15472,10 @@ Enum class for form type.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| attachment_ids | [ string ] | | No |
| external_retrieval_model | object | | No |
| query | string | | Yes |
| retrieval_model | [RetrievalModel](#retrievalmodel) | | No |
| attachment_ids | [ string ] | List of attachment IDs to include in the retrieval context. | No |
| external_retrieval_model | object | Retrieval settings for external knowledge bases. | No |
| query | string | Search query text. | Yes |
| retrieval_model | [RetrievalModel](#retrievalmodel) | Retrieval model configuration. Controls how chunks are searched and ranked. | No |
#### HitTestingQuery
@ -15857,19 +15857,19 @@ Input field definition for snippet parameters.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data_source | [DataSource](#datasource) | | No |
| doc_form | string, <br>**Default:** text_model | | No |
| doc_language | string, <br>**Default:** English | | No |
| duplicate | boolean, <br>**Default:** true | | No |
| embedding_model | string | | No |
| embedding_model_provider | string | | No |
| indexing_technique | string, <br>**Available values:** "economy", "high_quality" | *Enum:* `"economy"`, `"high_quality"` | Yes |
| is_multimodal | boolean | | No |
| name | string | | No |
| original_document_id | string | | No |
| process_rule | [ProcessRule](#processrule) | | No |
| retrieval_model | [RetrievalModel](#retrievalmodel) | | No |
| summary_index_setting | object | | No |
| data_source | [DataSource](#datasource) | Document data source configuration. | No |
| doc_form | string, <br>**Available values:** "hierarchical_model", "qa_model", "text_model", <br>**Default:** text_model | `text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, `qa_model` for question-answer pair extraction.<br>*Enum:* `"hierarchical_model"`, `"qa_model"`, `"text_model"` | No |
| doc_language | string, <br>**Default:** English | Language of the document for processing optimization. | No |
| duplicate | boolean, <br>**Default:** true | Whether duplicate document content is allowed. | No |
| embedding_model | string | Embedding model name. Use the `model` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`. | No |
| embedding_model_provider | string | Embedding model provider. Use the `provider` field from [Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`. | No |
| indexing_technique | string, <br>**Available values:** "economy", "high_quality" | `high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing. Required when adding the first document to a knowledge base; subsequent documents inherit the knowledge base's indexing technique if omitted.<br>*Enum:* `"economy"`, `"high_quality"` | Yes |
| is_multimodal | boolean | Whether the document uses multimodal indexing. | No |
| name | string | Document name. | No |
| original_document_id | string | Original document ID for replacement updates. | No |
| process_rule | [ProcessRule](#processrule) | Processing rules for chunking. | No |
| retrieval_model | [RetrievalModel](#retrievalmodel) | Retrieval model configuration. Controls how chunks are searched and ranked in this knowledge base. | No |
| summary_index_setting | object | Summary index configuration. | No |
#### KnowledgePipeline
@ -16142,9 +16142,9 @@ Enum class for large language model mode.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| content | string | | No |
| content | string | Optional text feedback providing additional detail. | No |
| message_id | string | Message ID | Yes |
| rating | string | | No |
| rating | string | Feedback rating. Set to `null` to revoke previously submitted feedback. | No |
#### MessageFile
@ -16199,24 +16199,24 @@ Enum class for large language model mode.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| conversation_id | string | Conversation UUID | Yes |
| first_id | string | First message ID for pagination | No |
| limit | integer, <br>**Default:** 20 | Number of messages to return (1-100) | No |
| conversation_id | string | Conversation ID. | Yes |
| first_id | string | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No |
| limit | integer, <br>**Default:** 20 | Number of chat history messages to return per request. | No |
#### MetadataArgs
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| name | string | | Yes |
| type | string, <br>**Available values:** "number", "string", "time" | *Enum:* `"number"`, `"string"`, `"time"` | Yes |
| name | string | Metadata field name. | Yes |
| type | string, <br>**Available values:** "number", "string", "time" | `string` for text values, `number` for numeric values, `time` for date/time values.<br>*Enum:* `"number"`, `"string"`, `"time"` | Yes |
#### MetadataDetail
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| id | string | | Yes |
| name | string | | Yes |
| value | string<br>integer<br>number | | No |
| id | string | Metadata field ID. | Yes |
| name | string | Metadata field name. | Yes |
| value | string<br>integer<br>number | Metadata value. Can be a string, number, or `null`. | No |
#### MetadataFilteringCondition
@ -16224,8 +16224,8 @@ Metadata Filtering Condition.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| conditions | [ [Condition](#condition) ] | | No |
| logical_operator | string | | No |
| conditions | [ [Condition](#condition) ] | List of metadata conditions to evaluate. | No |
| logical_operator | string | How to combine multiple conditions. | No |
#### MetadataOperationData
@ -16233,13 +16233,13 @@ Metadata operation data
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| operation_data | [ [DocumentMetadataOperation](#documentmetadataoperation) ] | | Yes |
| operation_data | [ [DocumentMetadataOperation](#documentmetadataoperation) ] | Array of document metadata update operations. Each entry maps a document ID to its metadata values. | Yes |
#### MetadataUpdatePayload
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| name | string | | Yes |
| name | string | New metadata field name. | Yes |
#### ModelConfig
@ -17415,8 +17415,8 @@ Shared permission levels for resources (datasets, credentials, etc.)
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| enabled | boolean | | Yes |
| id | string | | Yes |
| enabled | boolean | Whether this preprocessing rule is enabled. | Yes |
| id | string, <br>**Available values:** "remove_extra_spaces", "remove_stopwords", "remove_urls_emails" | Rule identifier.<br>*Enum:* `"remove_extra_spaces"`, `"remove_stopwords"`, `"remove_urls_emails"` | Yes |
#### PreviewDetail
@ -17441,8 +17441,8 @@ Serialized pricing info with codegen-safe decimal string patterns.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| mode | [ProcessRuleMode](#processrulemode) | | Yes |
| rules | [Rule](#rule) | | No |
| mode | [ProcessRuleMode](#processrulemode) | Processing mode. `automatic` uses built-in rules, `custom` allows manual configuration, and `hierarchical` enables parent-child chunk structure for `doc_form: hierarchical_model`. | Yes |
| rules | [Rule](#rule) | Custom processing rules. | No |
#### ProcessRuleMode
@ -17799,8 +17799,8 @@ Model class for provider quota configuration.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| reranking_model_name | string | | No |
| reranking_provider_name | string | | No |
| reranking_model_name | string | Name of the reranking model. | No |
| reranking_provider_name | string | Provider name of the reranking model. | No |
#### RestrictModel
@ -17826,15 +17826,15 @@ Model class for provider quota configuration.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| metadata_filtering_conditions | [MetadataFilteringCondition](#metadatafilteringcondition) | | No |
| reranking_enable | boolean | | Yes |
| reranking_mode | string | | No |
| reranking_model | [RerankingModel](#rerankingmodel) | | No |
| score_threshold | number | | No |
| score_threshold_enabled | boolean | | Yes |
| search_method | [RetrievalMethod](#retrievalmethod) | | Yes |
| top_k | integer | | Yes |
| weights | [WeightModel](#weightmodel) | | No |
| metadata_filtering_conditions | [MetadataFilteringCondition](#metadatafilteringcondition) | Restrict retrieval to chunks whose document metadata matches the given conditions. Conditions are evaluated server-side against document metadata fields. | No |
| reranking_enable | boolean | Whether reranking is enabled. | Yes |
| reranking_mode | string | Reranking mode. Required when `reranking_enable` is `true`. | No |
| reranking_model | [RerankingModel](#rerankingmodel) | Reranking model configuration. | No |
| score_threshold | number | Minimum similarity score for results. Only effective when score threshold filtering is enabled. | No |
| score_threshold_enabled | boolean | Whether score threshold filtering is enabled. | Yes |
| search_method | [RetrievalMethod](#retrievalmethod) | Search method used for retrieval. | Yes |
| top_k | integer | Maximum number of results to return. | Yes |
| weights | [WeightModel](#weightmodel) | Weight configuration for hybrid search. | No |
#### RetrievalSettingResponse
@ -17876,10 +17876,10 @@ Model class for provider quota configuration.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| parent_mode | string | | No |
| pre_processing_rules | [ [PreProcessingRule](#preprocessingrule) ] | | No |
| segmentation | [Segmentation](#segmentation) | | No |
| subchunk_segmentation | [Segmentation](#segmentation) | | No |
| parent_mode | string | Parent-child segmentation mode. | No |
| pre_processing_rules | [ [PreProcessingRule](#preprocessingrule) ] | Pre-processing rules to apply before segmentation. | No |
| segmentation | [Segmentation](#segmentation) | Parent chunk segmentation settings. | No |
| subchunk_segmentation | [Segmentation](#segmentation) | Child chunk segmentation settings. | No |
#### RuleCodeGeneratePayload
@ -18083,10 +18083,10 @@ Model class for provider quota configuration.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| chunk_overlap | integer | | No |
| max_tokens | integer | | Yes |
| chunk_overlap | integer | Token overlap between chunks. | No |
| max_tokens | integer | Maximum token count per chunk. | Yes |
| separator | string, <br>**Default:**
| | No |
| Custom separator for splitting text. | No |
#### SelectInputConfig
@ -18690,10 +18690,10 @@ Tag type
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| message_id | string | Message ID | No |
| message_id | string | Message ID. Takes priority over `text` when both are provided. | No |
| streaming | boolean | Reserved for compatibility; TTS response streaming is determined by the provider output. | No |
| text | string | Text to convert to audio | No |
| voice | string | Voice to use for TTS | No |
| text | string | Speech content to convert. | No |
| voice | string | Voice to use for text-to-speech. Available voices depend on the TTS provider configured for this app. Omit to use the app's configured voice when available; that value is exposed by [Get App Parameters](/api-reference/applications/get-app-parameters) as `text_to_speech.voice`. | No |
#### TextToSpeechPayload
@ -19263,23 +19263,23 @@ in form definiton, or a variable while the workflow is running.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| keyword_weight | number | | Yes |
| keyword_weight | number | Weight assigned to keyword search results. | Yes |
#### WeightModel
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| keyword_setting | [WeightKeywordSetting](#weightkeywordsetting) | | No |
| vector_setting | [WeightVectorSetting](#weightvectorsetting) | | No |
| weight_type | string | | No |
| keyword_setting | [WeightKeywordSetting](#weightkeywordsetting) | Keyword search weight settings. | No |
| vector_setting | [WeightVectorSetting](#weightvectorsetting) | Semantic search weight settings. | No |
| weight_type | string | Strategy for balancing semantic and keyword search weights. | No |
#### WeightVectorSetting
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| embedding_model_name | string | | Yes |
| embedding_provider_name | string | | Yes |
| vector_weight | number | | Yes |
| embedding_model_name | string | Name of the embedding model used for vector search. | Yes |
| embedding_provider_name | string | Provider of the embedding model used for vector search. | Yes |
| vector_weight | number | Weight assigned to semantic vector search results. | Yes |
#### WorkflowAgentBindingType
@ -19975,8 +19975,8 @@ can reuse its existing handler.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| files | [ object ] | | No |
| inputs | object | | Yes |
| files | [ object ] | File list for workflow system file inputs. Available when file upload is enabled for the workflow. To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned `id` as `upload_file_id` with `transfer_method: local_file`. | No |
| inputs | object | Key-value pairs for workflow input variables. Values for file-type variables should be arrays of file objects with `type`, `transfer_method`, and either `url` or `upload_file_id`. Refer to the `user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) response to discover the variable names and types expected by your app. | Yes |
#### WorkflowRunQuery

View File

@ -792,7 +792,7 @@ Liveness payload for `GET /openapi/v1/_health` — no auth required.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| action | string | | Yes |
| action | string | ID of the action button the recipient selected. Must match one of the `id` values from the form's `user_actions` list. | Yes |
| inputs | object | Submitted human input values keyed by output variable name. Use a string for paragraph or select input values, a file mapping for file inputs, and a list of file mappings for file-list inputs. Local file mappings use `transfer_method=local_file` with `upload_file_id`; remote file mappings use `transfer_method=remote_url` with `url` or `remote_url`. | Yes |
#### Import

File diff suppressed because it is too large Load Diff

View File

@ -471,9 +471,9 @@ Retrieve paginated list of messages from a conversation in a chat application.
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| conversation_id | query | Conversation UUID | Yes | string |
| first_id | query | First message ID for pagination | No | string |
| limit | query | Number of messages to return (1-100) | No | integer, <br>**Default:** 20 |
| conversation_id | query | Conversation ID. | Yes | string |
| first_id | query | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No | string |
| limit | query | Number of chat history messages to return per request. | No | integer, <br>**Default:** 20 |
#### Responses
@ -1091,8 +1091,8 @@ Button styles for user actions.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| auto_generate | boolean | | No |
| name | string | | No |
| auto_generate | boolean | Automatically generate the conversation name. When `true`, the `name` field is ignored. | No |
| name | string | Conversation name. Required when `auto_generate` is `false`. | No |
#### EmailCodeLoginSendPayload
@ -1281,7 +1281,7 @@ Parsed multipart form fields for HITL uploads.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| action | string | | Yes |
| action | string | ID of the action button the recipient selected. Must match one of the `id` values from the form's `user_actions` list. | Yes |
| inputs | object | Submitted human input values keyed by output variable name. Use a string for paragraph or select input values, a file mapping for file inputs, and a list of file mappings for file-list inputs. Local file mappings use `transfer_method=local_file` with `upload_file_id`; remote file mappings use `transfer_method=remote_url` with `url` or `remote_url`. | Yes |
#### HumanInputFormSubmitResponse
@ -1371,8 +1371,8 @@ Parsed multipart form fields for HITL uploads.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| content | string | | No |
| rating | string | | No |
| content | string | Optional text feedback providing additional detail. | No |
| rating | string | Feedback rating. Set to `null` to revoke previously submitted feedback. | No |
#### MessageFile
@ -1392,9 +1392,9 @@ Parsed multipart form fields for HITL uploads.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| conversation_id | string | Conversation UUID | Yes |
| first_id | string | First message ID for pagination | No |
| limit | integer, <br>**Default:** 20 | Number of messages to return (1-100) | No |
| conversation_id | string | Conversation ID. | Yes |
| first_id | string | The ID of the first chat record on the current page. Omit this value to fetch the latest messages; for subsequent pages, use the first message ID from the current list to fetch older messages. | No |
| limit | integer, <br>**Default:** 20 | Number of chat history messages to return per request. | No |
#### MessageMoreLikeThisQuery
@ -1631,10 +1631,10 @@ Default configuration for form inputs.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| message_id | string | Message ID | No |
| message_id | string | Message ID. Takes priority over `text` when both are provided. | No |
| streaming | boolean | Reserved for compatibility; TTS response streaming is determined by the provider output. | No |
| text | string | Text to convert to audio | No |
| voice | string | Voice to use for TTS | No |
| text | string | Speech content to convert. | No |
| voice | string | Voice to use for text-to-speech. Available voices depend on the TTS provider configured for this app. Omit to use the app's configured voice when available; that value is exposed by [Get App Parameters](/api-reference/applications/get-app-parameters) as `text_to_speech.voice`. | No |
#### UserActionConfig
@ -1711,5 +1711,5 @@ in form definiton, or a variable while the workflow is running.
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| files | [ object ] | | No |
| inputs | object | | Yes |
| files | [ object ] | File list for workflow system file inputs. Available when file upload is enabled for the workflow. To attach a local file, first upload it via [Upload File](/api-reference/files/upload-file) and use the returned `id` as `upload_file_id` with `transfer_method: local_file`. | No |
| inputs | object | Key-value pairs for workflow input variables. Values for file-type variables should be arrays of file objects with `type`, `transfer_method`, and either `url` or `upload_file_id`. Refer to the `user_input_form` field in the [Get App Parameters](/api-reference/applications/get-app-parameters) response to discover the variable names and types expected by your app. | Yes |

View File

@ -1,6 +1,6 @@
from typing import Any, Literal
from typing import Annotated, Any, Literal
from pydantic import BaseModel, Field, field_validator
from pydantic import BaseModel, Field, WithJsonSchema, field_validator
from core.rag.entities import Rule
from core.rag.entities.metadata_entities import MetadataFilteringCondition
@ -8,10 +8,79 @@ from core.rag.index_processor.constant.index_type import IndexStructureType
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from models.enums import ProcessRuleMode
DocForm = Annotated[
str,
WithJsonSchema({"enum": ["text_model", "hierarchical_model", "qa_model"], "type": "string"}),
]
IndexingTechnique = Annotated[
str | None,
WithJsonSchema({"anyOf": [{"enum": ["high_quality", "economy"], "type": "string"}, {"type": "null"}]}),
]
KnowledgeProvider = Annotated[
str,
WithJsonSchema({"enum": ["vendor", "external"], "type": "string"}),
]
RerankingMode = Annotated[
str | None,
WithJsonSchema({"anyOf": [{"enum": ["reranking_model", "weighted_score"], "type": "string"}, {"type": "null"}]}),
]
SummaryIndexSetting = Annotated[
dict[str, Any] | None,
WithJsonSchema(
{
"anyOf": [
{
"properties": {
"enable": {"description": "Whether to enable summary indexing.", "type": "boolean"},
"model_name": {
"description": "Name of the model used for generating summaries.",
"type": "string",
},
"model_provider_name": {
"description": "Provider of the summary generation model.",
"type": "string",
},
"summary_prompt": {
"description": "Custom prompt template for summary generation.",
"type": "string",
},
},
"type": "object",
},
{"type": "null"},
]
}
),
]
ExternalRetrievalModel = Annotated[
dict[str, Any] | None,
WithJsonSchema(
{
"anyOf": [
{
"properties": {
"top_k": {"description": "Maximum number of results to return.", "type": "integer"},
"score_threshold": {
"description": "Minimum similarity score threshold for filtering results.",
"type": "number",
},
"score_threshold_enabled": {
"description": "Whether score threshold filtering is enabled.",
"type": "boolean",
},
},
"type": "object",
},
{"type": "null"},
]
}
),
]
class RerankingModel(BaseModel):
reranking_provider_name: str | None = None
reranking_model_name: str | None = None
reranking_provider_name: str | None = Field(default=None, description="Provider name of the reranking model.")
reranking_model_name: str | None = Field(default=None, description="Name of the reranking model.")
class NotionIcon(BaseModel):
@ -56,36 +125,56 @@ class DataSource(BaseModel):
class ProcessRule(BaseModel):
mode: ProcessRuleMode
rules: Rule | None = None
mode: ProcessRuleMode = Field(
description=(
"Processing mode. `automatic` uses built-in rules, `custom` allows manual configuration, and "
"`hierarchical` enables parent-child chunk structure for `doc_form: hierarchical_model`."
)
)
rules: Rule | None = Field(default=None, description="Custom processing rules.")
class WeightVectorSetting(BaseModel):
vector_weight: float
embedding_provider_name: str
embedding_model_name: str
vector_weight: float = Field(description="Weight assigned to semantic vector search results.")
embedding_provider_name: str = Field(description="Provider of the embedding model used for vector search.")
embedding_model_name: str = Field(description="Name of the embedding model used for vector search.")
class WeightKeywordSetting(BaseModel):
keyword_weight: float
keyword_weight: float = Field(description="Weight assigned to keyword search results.")
class WeightModel(BaseModel):
weight_type: Literal["semantic_first", "keyword_first", "customized"] | None = None
vector_setting: WeightVectorSetting | None = None
keyword_setting: WeightKeywordSetting | None = None
weight_type: Literal["semantic_first", "keyword_first", "customized"] | None = Field(
default=None,
description="Strategy for balancing semantic and keyword search weights.",
)
vector_setting: WeightVectorSetting | None = Field(default=None, description="Semantic search weight settings.")
keyword_setting: WeightKeywordSetting | None = Field(default=None, description="Keyword search weight settings.")
class RetrievalModel(BaseModel):
search_method: RetrievalMethod
reranking_enable: bool
reranking_model: RerankingModel | None = None
reranking_mode: str | None = None
top_k: int
score_threshold_enabled: bool
score_threshold: float | None = None
weights: WeightModel | None = None
metadata_filtering_conditions: MetadataFilteringCondition | None = None
search_method: RetrievalMethod = Field(description="Search method used for retrieval.")
reranking_enable: bool = Field(description="Whether reranking is enabled.")
reranking_model: RerankingModel | None = Field(default=None, description="Reranking model configuration.")
reranking_mode: RerankingMode = Field(
default=None,
description="Reranking mode. Required when `reranking_enable` is `true`.",
)
top_k: int = Field(description="Maximum number of results to return.")
score_threshold_enabled: bool = Field(description="Whether score threshold filtering is enabled.")
score_threshold: float | None = Field(
default=None,
description="Minimum similarity score for results. Only effective when score threshold filtering is enabled.",
)
weights: WeightModel | None = Field(default=None, description="Weight configuration for hybrid search.")
metadata_filtering_conditions: MetadataFilteringCondition | None = Field(
default=None,
description=(
"Restrict retrieval to chunks whose document metadata matches the given conditions. Conditions are "
"evaluated server-side against document metadata fields."
),
)
class MetaDataConfig(BaseModel):
@ -94,19 +183,51 @@ class MetaDataConfig(BaseModel):
class KnowledgeConfig(BaseModel):
original_document_id: str | None = None
duplicate: bool = True
indexing_technique: Literal["high_quality", "economy"]
data_source: DataSource | None = None
process_rule: ProcessRule | None = None
retrieval_model: RetrievalModel | None = None
summary_index_setting: dict[str, Any] | None = Field(default=None)
doc_form: str = "text_model"
doc_language: str = "English"
embedding_model: str | None = None
embedding_model_provider: str | None = None
name: str | None = None
is_multimodal: bool = False
original_document_id: str | None = Field(default=None, description="Original document ID for replacement updates.")
duplicate: bool = Field(default=True, description="Whether duplicate document content is allowed.")
indexing_technique: Literal["high_quality", "economy"] = Field(
description=(
"`high_quality` uses embedding models for precise search; `economy` uses keyword-based indexing. "
"Required when adding the first document to a knowledge base; subsequent documents inherit the "
"knowledge base's indexing technique if omitted."
)
)
data_source: DataSource | None = Field(default=None, description="Document data source configuration.")
process_rule: ProcessRule | None = Field(default=None, description="Processing rules for chunking.")
retrieval_model: RetrievalModel | None = Field(
default=None,
description=(
"Retrieval model configuration. Controls how chunks are searched and ranked in this knowledge base."
),
)
summary_index_setting: SummaryIndexSetting = Field(
default=None,
description="Summary index configuration.",
)
doc_form: DocForm = Field(
default="text_model",
description=(
"`text_model` for standard text chunking, `hierarchical_model` for parent-child chunk structure, "
"`qa_model` for question-answer pair extraction."
),
)
doc_language: str = Field(default="English", description="Language of the document for processing optimization.")
embedding_model: str | None = Field(
default=None,
description=(
"Embedding model name. Use the `model` field from "
"[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
),
)
embedding_model_provider: str | None = Field(
default=None,
description=(
"Embedding model provider. Use the `provider` field from "
"[Get Available Models](/api-reference/models/get-available-models) with `model_type=text-embedding`."
),
)
name: str | None = Field(default=None, description="Document name.")
is_multimodal: bool = Field(default=False, description="Whether the document uses multimodal indexing.")
@field_validator("doc_form")
@classmethod
@ -122,47 +243,61 @@ class KnowledgeConfig(BaseModel):
class SegmentCreateArgs(BaseModel):
content: str | None = None
answer: str | None = None
keywords: list[str] | None = None
attachment_ids: list[str] | None = None
content: str | None = Field(default=None, description="Chunk text content.")
answer: str | None = Field(default=None, description="Answer content for QA mode.")
keywords: list[str] | None = Field(default=None, description="Keywords for the chunk.")
attachment_ids: list[str] | None = Field(default=None, description="Attachment file IDs.")
class SegmentUpdateArgs(BaseModel):
content: str | None = None
answer: str | None = None
keywords: list[str] | None = None
regenerate_child_chunks: bool = False
enabled: bool | None = None
attachment_ids: list[str] | None = None
summary: str | None = None # Summary content for summary index
content: str | None = Field(default=None, description="Updated chunk text content.")
answer: str | None = Field(default=None, description="Updated answer content for QA mode.")
keywords: list[str] | None = Field(default=None, description="Updated keywords for the chunk.")
regenerate_child_chunks: bool = Field(
default=False,
description="Whether to regenerate child chunks after updating a parent chunk.",
)
enabled: bool | None = Field(default=None, description="Whether the chunk is enabled.")
attachment_ids: list[str] | None = Field(default=None, description="Attachment file IDs.")
summary: str | None = Field(default=None, description="Summary content for summary index.")
class ChildChunkUpdateArgs(BaseModel):
id: str | None = None
content: str
id: str | None = Field(default=None, description="Existing child chunk ID. Omit to create a new child chunk.")
content: str = Field(description="Child chunk text content.")
class MetadataArgs(BaseModel):
type: Literal["string", "number", "time"]
name: str
type: Literal["string", "number", "time"] = Field(
description="`string` for text values, `number` for numeric values, `time` for date/time values."
)
name: str = Field(description="Metadata field name.")
class MetadataUpdateArgs(BaseModel):
name: str
value: str | int | float | None = None
name: str = Field(description="Metadata field name.")
value: str | int | float | None = Field(
default=None,
description="Metadata value. Can be a string, number, or `null`.",
)
class MetadataDetail(BaseModel):
id: str
name: str
value: str | int | float | None = None
id: str = Field(description="Metadata field ID.")
name: str = Field(description="Metadata field name.")
value: str | int | float | None = Field(
default=None,
description="Metadata value. Can be a string, number, or `null`.",
)
class DocumentMetadataOperation(BaseModel):
document_id: str
metadata_list: list[MetadataDetail]
partial_update: bool = False
document_id: str = Field(description="Document ID whose metadata should be updated.")
metadata_list: list[MetadataDetail] = Field(description="Metadata fields to update.")
partial_update: bool = Field(
default=False,
description="Whether to partially update metadata, keeping existing values for unspecified fields.",
)
class MetadataOperationData(BaseModel):
@ -170,4 +305,8 @@ class MetadataOperationData(BaseModel):
Metadata operation data
"""
operation_data: list[DocumentMetadataOperation]
operation_data: list[DocumentMetadataOperation] = Field(
description=(
"Array of document metadata update operations. Each entry maps a document ID to its metadata values."
)
)

View File

@ -1,22 +1,142 @@
from collections.abc import Mapping
from typing import Any
from typing import Annotated, Any
from pydantic import BaseModel
from pydantic import BaseModel, Field, WithJsonSchema
DatasourceType = Annotated[
str,
WithJsonSchema({"enum": ["local_file", "online_document", "website_crawl", "online_drive"], "type": "string"}),
]
PipelineResponseMode = Annotated[
str,
WithJsonSchema({"enum": ["streaming", "blocking"], "type": "string"}),
]
DatasourceInfoList = Annotated[
list[Mapping[str, Any]],
WithJsonSchema(
{
"items": {
"oneOf": [
{
"properties": {
"reference": {
"description": (
"Use the `id` returned by the "
"[Upload Pipeline File](/api-reference/knowledge-pipeline/upload-pipeline-file) "
"endpoint. `related_id` is accepted as an alias."
),
"type": "string",
},
"name": {"description": "Document title. Defaults to `untitled`.", "type": "string"},
},
"required": ["reference"],
"title": "Local File",
"type": "object",
},
{
"properties": {
"workspace_id": {
"description": "ID of the workspace or database in the external platform.",
"type": "string",
},
"page": {
"description": "Page details.",
"properties": {
"page_id": {"description": "Page identifier.", "type": "string"},
"type": {
"description": "Page type defined by the datasource plugin.",
"type": "string",
},
"page_name": {
"description": "Display name. Defaults to `untitled`.",
"type": "string",
},
},
"required": ["page_id", "type"],
"type": "object",
},
"credential_id": {
"description": (
"Credential for authenticating with the external platform. If omitted, the "
"provider's default credential is used."
),
"type": "string",
},
},
"required": ["workspace_id", "page"],
"title": "Online Document",
"type": "object",
},
{
"properties": {
"url": {"description": "URL to crawl.", "type": "string"},
"title": {
"description": "Used as the document name. Defaults to `untitled`.",
"type": "string",
},
},
"required": ["url"],
"title": "Website Crawl",
"type": "object",
},
{
"properties": {
"id": {"description": "File or folder ID.", "type": "string"},
"type": {
"description": "Whether this entry is a single file or a folder to expand.",
"enum": ["file", "folder"],
"type": "string",
},
"bucket": {
"description": (
"Storage bucket name. Required by some drive providers, such as S3-compatible "
"stores; omit if the provider does not use buckets."
),
"type": "string",
},
"name": {"description": "File name. Defaults to `untitled`.", "type": "string"},
},
"required": ["id", "type"],
"title": "Online Drive",
"type": "object",
},
]
},
"type": "array",
}
),
]
class DatasourceNodeRunApiEntity(BaseModel):
pipeline_id: str
node_id: str
inputs: dict[str, Any]
datasource_type: str
datasource_type: DatasourceType
credential_id: str | None = None
is_published: bool
class PipelineRunApiEntity(BaseModel):
inputs: Mapping[str, Any]
datasource_type: str
datasource_info_list: list[Mapping[str, Any]]
start_node_id: str
is_published: bool
response_mode: str
inputs: Mapping[str, Any] = Field(
description=(
"Key-value pairs for pipeline input variables defined in the workflow. Pass `{}` if the pipeline has "
"no input variables."
)
)
datasource_type: DatasourceType = Field(
description="Type of the datasource. Determines which fields are expected in `datasource_info_list` items."
)
datasource_info_list: DatasourceInfoList = Field(
description="List of datasource objects to process. The expected item structure depends on `datasource_type`."
)
start_node_id: str = Field(description="ID of the datasource node where the run starts.")
is_published: bool = Field(
description=(
"Whether to run the published or draft version of the pipeline. `true` runs the latest published "
"version; `false` runs the current draft (useful for testing unpublished changes)."
)
)
response_mode: PipelineResponseMode = Field(
description="Response mode. Use `streaming` for SSE or `blocking` for JSON."
)

View File

@ -6,6 +6,32 @@ from collections.abc import Iterator
import pytest
from flask import Flask
USER_PROPERTY_SCHEMA = {
"description": (
"User identifier, unique within the application. This identifier scopes data access; resources created with "
"one `user` value are only visible when queried with the same `user` value."
),
"type": "string",
}
GENERIC_FILE_SCHEMA = {"description": "The file to upload.", "format": "binary", "type": "string"}
DOCUMENT_CREATE_DATA_SCHEMA = {
"description": (
"JSON string containing configuration. Accepts the same fields as "
"[Create Document by Text](/api-reference/documents/create-document-by-text) (`indexing_technique`, "
"`doc_form`, `doc_language`, `process_rule`, `retrieval_model`, `embedding_model`, "
"`embedding_model_provider`) except `name` and `text`."
),
"type": "string",
}
DOCUMENT_UPDATE_DATA_SCHEMA = {
"description": (
"JSON string containing document update settings such as `doc_form`, `doc_language`, `process_rule`, "
"`retrieval_model`, `embedding_model`, and `embedding_model_provider`. `name` and `text` are not used "
"for file updates."
),
"type": "string",
}
def _schema_refs(value: object) -> set[str]:
refs: set[str] = set()
@ -180,11 +206,12 @@ def test_service_document_file_routes_document_multipart_form_data(monkeypatch:
create_schema = _multipart_form_schema(create_operation)
create_properties = create_schema["properties"]
assert isinstance(create_properties, dict)
assert create_properties["file"] == {"type": "string", "format": "binary"}
assert create_properties["data"] == {
"description": "Optional JSON string with document creation settings.",
assert create_properties["file"] == {
"description": "Document file to upload.",
"format": "binary",
"type": "string",
}
assert create_properties["data"] == DOCUMENT_CREATE_DATA_SCHEMA
assert create_schema["required"] == ["file"]
assert create_operation["requestBody"]["required"] is True
@ -197,11 +224,12 @@ def test_service_document_file_routes_document_multipart_form_data(monkeypatch:
update_schema = _multipart_form_schema(update_operation)
update_properties = update_schema["properties"]
assert isinstance(update_properties, dict)
assert update_properties["file"] == {"type": "string", "format": "binary"}
assert update_properties["data"] == {
"description": "Optional JSON string with document update settings.",
assert update_properties["file"] == {
"description": "Replacement document file to upload.",
"format": "binary",
"type": "string",
}
assert update_properties["data"] == DOCUMENT_UPDATE_DATA_SCHEMA
assert "required" not in update_schema
assert update_operation["requestBody"]["required"] is False
@ -228,7 +256,7 @@ def test_service_openapi_merges_public_api_reference_descriptions(monkeypatch: p
rename_operation = payload["paths"]["/conversations/{c_id}/name"]["post"]
assert rename_operation["summary"] == "Rename Conversation"
assert rename_operation["tags"] == ["Conversations"]
assert _parameters_by_name(rename_operation)["c_id"]["description"] == "Conversation ID"
assert _parameters_by_name(rename_operation)["c_id"]["description"] == "Conversation ID."
def test_service_document_list_documents_query_params_render(monkeypatch: pytest.MonkeyPatch):
@ -277,7 +305,7 @@ def test_service_openapi_documents_decorator_user_contracts(monkeypatch: pytest.
)
for path, method in required_json_user_operations:
schema = _json_body_schema(payload, paths[path][method])
assert schema["properties"]["user"] == {"description": "End user identifier", "type": "string"}
assert schema["properties"]["user"] == USER_PROPERTY_SCHEMA
assert "user" in schema["required"]
optional_json_user_operations = (
@ -288,7 +316,7 @@ def test_service_openapi_documents_decorator_user_contracts(monkeypatch: pytest.
)
for path, method in optional_json_user_operations:
schema = _json_body_schema(payload, paths[path][method])
assert schema["properties"]["user"] == {"description": "End user identifier", "type": "string"}
assert schema["properties"]["user"] == USER_PROPERTY_SCHEMA
assert "user" not in schema.get("required", [])
messages_params = _parameters_by_name(paths["/messages"]["get"])
@ -316,12 +344,22 @@ def test_service_openapi_documents_app_multipart_contracts(monkeypatch: pytest.M
for path in ("/files/upload", "/audio-to-text"):
schema = _multipart_form_schema(paths[path]["post"])
assert schema["properties"]["file"] == {"format": "binary", "type": "string"}
assert schema["properties"]["user"] == {"description": "End user identifier", "type": "string"}
if path == "/audio-to-text":
assert schema["properties"]["file"] == {
"description": (
"Audio file to transcribe. Supported MIME types: `audio/mp3`, `audio/mpga`, `audio/m4a`, "
"`audio/wav`, and `audio/amr`. File size limit is `30 MB`."
),
"format": "binary",
"type": "string",
}
else:
assert schema["properties"]["file"] == GENERIC_FILE_SCHEMA
assert schema["properties"]["user"] == USER_PROPERTY_SCHEMA
assert schema["required"] == ["file"]
pipeline_schema = _multipart_form_schema(paths["/datasets/pipeline/file-upload"]["post"])
assert pipeline_schema["properties"]["file"] == {"format": "binary", "type": "string"}
assert pipeline_schema["properties"]["file"] == GENERIC_FILE_SCHEMA
assert pipeline_schema["required"] == ["file"]
@ -385,14 +423,14 @@ def test_service_openapi_documents_uuid_params_and_deprecated_routes(monkeypatch
dataset_params = _parameters_by_name(paths["/datasets/{dataset_id}"]["get"])
assert dataset_params["dataset_id"]["schema"] == {
"description": "Dataset ID",
"description": "Knowledge base ID.",
"format": "uuid",
"type": "string",
}
conversation_params = _parameters_by_name(paths["/conversations/{c_id}"]["delete"])
assert conversation_params["c_id"]["schema"] == {
"description": "Conversation ID",
"description": "Conversation ID.",
"format": "uuid",
"type": "string",
}
@ -447,7 +485,7 @@ def test_service_openapi_documents_conditional_payload_schemas(monkeypatch: pyte
assert manual_name_branch["properties"]["name"]["pattern"] == r".*\S.*"
assert manual_name_branch["required"] == ["name"]
for branch in rename_schema["anyOf"]:
assert branch["properties"]["user"] == {"description": "End user identifier", "type": "string"}
assert branch["properties"]["user"] == USER_PROPERTY_SCHEMA
document_update_schema = payload["components"]["schemas"]["DocumentTextUpdate"]
with_text_branch, without_text_branch = document_update_schema["anyOf"]

View File

@ -187,7 +187,7 @@ export type IndexingEstimateResponse = {
export type KnowledgeConfig = {
data_source?: DataSource | null
doc_form?: string
doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model'
doc_language?: string
duplicate?: boolean
embedding_model?: string | null
@ -199,7 +199,10 @@ export type KnowledgeConfig = {
process_rule?: ProcessRule | null
retrieval_model?: RetrievalModel | null
summary_index_setting?: {
[key: string]: unknown
enable?: boolean
model_name?: string
model_provider_name?: string
summary_prompt?: string
} | null
}
@ -482,7 +485,9 @@ export type ExternalRetrievalTestResponse
export type HitTestingPayload = {
attachment_ids?: Array<string> | null
external_retrieval_model?: {
[key: string]: unknown
score_threshold?: number
score_threshold_enabled?: boolean
top_k?: number
} | null
query: string
retrieval_model?: RetrievalModel | null
@ -707,7 +712,7 @@ export type ProcessRule = {
export type RetrievalModel = {
metadata_filtering_conditions?: MetadataFilteringCondition | null
reranking_enable: boolean
reranking_mode?: string | null
reranking_mode?: 'reranking_model' | 'weighted_score' | null
reranking_model?: RerankingModel | null
score_threshold?: number | null
score_threshold_enabled: boolean
@ -1035,7 +1040,7 @@ export type WebsiteInfo = {
export type PreProcessingRule = {
enabled: boolean
id: string
id: 'remove_extra_spaces' | 'remove_stopwords' | 'remove_urls_emails'
}
export type Segmentation = {
@ -1065,7 +1070,7 @@ export type Condition = {
| '≤'
| '≥'
name: string
value?: string | Array<string> | number | number | null
value?: string | Array<string> | number | null
}
export type WeightKeywordSetting = {

View File

@ -1176,7 +1176,7 @@ export const zWebsiteInfo = z.object({
*/
export const zPreProcessingRule = z.object({
enabled: z.boolean(),
id: z.string(),
id: z.enum(['remove_extra_spaces', 'remove_stopwords', 'remove_urls_emails']),
})
/**
@ -1233,7 +1233,7 @@ export const zCondition = z.object({
'≥',
]),
name: z.string(),
value: z.union([z.string(), z.array(z.string()), z.int(), z.number()]).nullish(),
value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(),
})
/**
@ -1277,7 +1277,7 @@ export const zWeightModel = z.object({
export const zRetrievalModel = z.object({
metadata_filtering_conditions: zMetadataFilteringCondition.nullish(),
reranking_enable: z.boolean(),
reranking_mode: z.string().nullish(),
reranking_mode: z.enum(['reranking_model', 'weighted_score']).nullish(),
reranking_model: zRerankingModel.nullish(),
score_threshold: z.number().nullish(),
score_threshold_enabled: z.boolean(),
@ -1291,7 +1291,13 @@ export const zRetrievalModel = z.object({
*/
export const zHitTestingPayload = z.object({
attachment_ids: z.array(z.string()).nullish(),
external_retrieval_model: z.record(z.string(), z.unknown()).nullish(),
external_retrieval_model: z
.object({
score_threshold: z.number().optional(),
score_threshold_enabled: z.boolean().optional(),
top_k: z.int().optional(),
})
.nullish(),
query: z.string().max(250),
retrieval_model: zRetrievalModel.nullish(),
})
@ -1451,7 +1457,10 @@ export const zDataSource = z.object({
*/
export const zKnowledgeConfig = z.object({
data_source: zDataSource.nullish(),
doc_form: z.string().optional().default('text_model'),
doc_form: z
.enum(['hierarchical_model', 'qa_model', 'text_model'])
.optional()
.default('text_model'),
doc_language: z.string().optional().default('English'),
duplicate: z.boolean().optional().default(true),
embedding_model: z.string().nullish(),
@ -1462,7 +1471,14 @@ export const zKnowledgeConfig = z.object({
original_document_id: z.string().nullish(),
process_rule: zProcessRule.nullish(),
retrieval_model: zRetrievalModel.nullish(),
summary_index_setting: z.record(z.string(), z.unknown()).nullish(),
summary_index_setting: z
.object({
enable: z.boolean().optional(),
model_name: z.string().optional(),
model_provider_name: z.string().optional(),
summary_prompt: z.string().optional(),
})
.nullish(),
})
export const zGetDatasetsQuery = z.object({

View File

@ -156,7 +156,10 @@ export type AudioBinaryResponse = Blob | File
export type WorkflowRunPayload = {
files?: Array<{
[key: string]: unknown
transfer_method: 'local_file' | 'remote_url'
type: 'audio' | 'custom' | 'document' | 'image' | 'video'
upload_file_id?: string
url?: string
}> | null
inputs: {
[key: string]: unknown

View File

@ -128,7 +128,7 @@ export const zSavedMessageCreatePayload = z.object({
* TextToAudioPayload
*/
export const zTextToAudioPayload = z.object({
message_id: z.string().nullish(),
message_id: z.uuid().nullish(),
streaming: z.boolean().nullish(),
text: z.string().nullish(),
voice: z.string().nullish(),
@ -143,7 +143,16 @@ export const zAudioBinaryResponse = z.custom<Blob | File>()
* WorkflowRunPayload
*/
export const zWorkflowRunPayload = z.object({
files: z.array(z.record(z.string(), z.unknown())).nullish(),
files: z
.array(
z.object({
transfer_method: z.enum(['local_file', 'remote_url']),
type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
upload_file_id: z.string().optional(),
url: z.string().optional(),
}),
)
.nullish(),
inputs: z.record(z.string(), z.unknown()),
})

View File

@ -103,15 +103,16 @@ export type ChatRequestPayload = {
auto_generate_name?: boolean
conversation_id?: string | null
files?: Array<{
[key: string]: unknown
transfer_method: 'local_file' | 'remote_url'
type: 'audio' | 'custom' | 'document' | 'image' | 'video'
upload_file_id?: string
url?: string
}> | null
inputs: {
[key: string]: unknown
}
query: string
response_mode?: 'blocking' | 'streaming' | null
retriever_from?: string
trace_session_id?: string | null
workflow_id?: string | null
}
@ -119,15 +120,16 @@ export type ChatRequestPayloadWithUser = {
auto_generate_name?: boolean
conversation_id?: string | null
files?: Array<{
[key: string]: unknown
transfer_method: 'local_file' | 'remote_url'
type: 'audio' | 'custom' | 'document' | 'image' | 'video'
upload_file_id?: string
url?: string
}> | null
inputs: {
[key: string]: unknown
}
query: string
response_mode?: 'blocking' | 'streaming' | null
retriever_from?: string
trace_session_id?: string | null
user: string
workflow_id?: string | null
}
@ -171,28 +173,30 @@ export type ChildChunkUpdatePayload = {
export type CompletionRequestPayload = {
files?: Array<{
[key: string]: unknown
transfer_method: 'local_file' | 'remote_url'
type: 'audio' | 'custom' | 'document' | 'image' | 'video'
upload_file_id?: string
url?: string
}> | null
inputs: {
[key: string]: unknown
}
query?: string
response_mode?: 'blocking' | 'streaming' | null
retriever_from?: string
trace_session_id?: string | null
}
export type CompletionRequestPayloadWithUser = {
files?: Array<{
[key: string]: unknown
transfer_method: 'local_file' | 'remote_url'
type: 'audio' | 'custom' | 'document' | 'image' | 'video'
upload_file_id?: string
url?: string
}> | null
inputs: {
[key: string]: unknown
}
query?: string
response_mode?: 'blocking' | 'streaming' | null
retriever_from?: string
trace_session_id?: string | null
user: string
}
@ -217,7 +221,7 @@ export type Condition = {
| '≤'
| '≥'
name: string
value?: string | Array<string> | number | number | null
value?: string | Array<string> | number | null
}
export type ConversationInfiniteScrollPagination = {
@ -315,10 +319,13 @@ export type DatasetCreatePayload = {
indexing_technique?: 'economy' | 'high_quality' | null
name: string
permission?: PermissionEnum | null
provider?: string
provider?: 'external' | 'vendor'
retrieval_model?: RetrievalModel | null
summary_index_setting?: {
[key: string]: unknown
enable?: boolean
model_name?: string
model_provider_name?: string
summary_prompt?: string
} | null
}
@ -512,12 +519,14 @@ export type DatasetUpdatePayload = {
external_knowledge_api_id?: string | null
external_knowledge_id?: string | null
external_retrieval_model?: {
[key: string]: unknown
score_threshold?: number
score_threshold_enabled?: boolean
top_k?: number
} | null
indexing_technique?: 'economy' | 'high_quality' | null
name?: string | null
partial_member_list?: Array<{
[key: string]: string
user_id?: string
}> | null
permission?: PermissionEnum | null
retrieval_model?: RetrievalModel | null
@ -544,7 +553,7 @@ export type DatasourceCredentialInfoResponse = {
export type DatasourceNodeRunPayload = {
credential_id?: string | null
datasource_type: string
datasource_type: 'local_file' | 'online_document' | 'online_drive' | 'website_crawl'
inputs: {
[key: string]: unknown
}
@ -626,7 +635,15 @@ export type DocumentListQuery = {
keyword?: string | null
limit?: number
page?: number
status?: string | null
status?:
| 'archived'
| 'available'
| 'disabled'
| 'error'
| 'indexing'
| 'paused'
| 'queuing'
| null
}
export type DocumentListResponse = {
@ -701,11 +718,11 @@ export type DocumentStatusResponse = {
}
export type DocumentTextCreatePayload = {
doc_form?: string
doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model'
doc_language?: string
embedding_model?: string | null
embedding_model_provider?: string | null
indexing_technique?: string | null
indexing_technique?: 'economy' | 'high_quality' | null
name: string
original_document_id?: string | null
process_rule?: ProcessRule | null
@ -715,7 +732,7 @@ export type DocumentTextCreatePayload = {
export type DocumentTextUpdate = (
| {
doc_form?: string
doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model'
doc_language?: string
name: string
process_rule?: ProcessRule | null
@ -723,7 +740,7 @@ export type DocumentTextUpdate = (
text: string
}
| {
doc_form?: string
doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model'
doc_language?: string
name?: string | null
process_rule?: ProcessRule | null
@ -731,7 +748,7 @@ export type DocumentTextUpdate = (
text?: null
}
) & {
doc_form?: string
doc_form?: 'hierarchical_model' | 'qa_model' | 'text_model'
doc_language?: string
name?: string | null
process_rule?: ProcessRule | null
@ -849,7 +866,9 @@ export type HitTestingFile = {
export type HitTestingPayload = {
attachment_ids?: Array<string> | null
external_retrieval_model?: {
[key: string]: unknown
score_threshold?: number
score_threshold_enabled?: boolean
top_k?: number
} | null
query: string
retrieval_model?: RetrievalModel | null
@ -1146,15 +1165,37 @@ export type Parameters = {
export type PermissionEnum = 'all_team_members' | 'only_me' | 'partial_members'
export type PipelineRunApiEntity = {
datasource_info_list: Array<{
[key: string]: unknown
}>
datasource_type: string
datasource_info_list: Array<
| {
name?: string
reference: string
}
| {
credential_id?: string
page: {
page_id: string
page_name?: string
type: string
}
workspace_id: string
}
| {
title?: string
url: string
}
| {
bucket?: string
id: string
name?: string
type: 'file' | 'folder'
}
>
datasource_type: 'local_file' | 'online_document' | 'online_drive' | 'website_crawl'
inputs: {
[key: string]: unknown
}
is_published: boolean
response_mode: string
response_mode: 'blocking' | 'streaming'
start_node_id: string
}
@ -1170,7 +1211,7 @@ export type PipelineUploadFileResponse = {
export type PreProcessingRule = {
enabled: boolean
id: string
id: 'remove_extra_spaces' | 'remove_stopwords' | 'remove_urls_emails'
}
export type ProcessRule = {
@ -1231,7 +1272,7 @@ export type RetrievalMethod
export type RetrievalModel = {
metadata_filtering_conditions?: MetadataFilteringCondition | null
reranking_enable: boolean
reranking_mode?: string | null
reranking_mode?: 'reranking_model' | 'weighted_score' | null
reranking_model?: RerankingModel | null
score_threshold?: number | null
score_threshold_enabled: boolean
@ -1578,24 +1619,28 @@ export type WorkflowRunForLogResponse = {
export type WorkflowRunPayload = {
files?: Array<{
[key: string]: unknown
transfer_method: 'local_file' | 'remote_url'
type: 'audio' | 'custom' | 'document' | 'image' | 'video'
upload_file_id?: string
url?: string
}> | null
inputs: {
[key: string]: unknown
}
response_mode?: 'blocking' | 'streaming' | null
trace_session_id?: string | null
}
export type WorkflowRunPayloadWithUser = {
files?: Array<{
[key: string]: unknown
transfer_method: 'local_file' | 'remote_url'
type: 'audio' | 'custom' | 'document' | 'image' | 'video'
upload_file_id?: string
url?: string
}> | null
inputs: {
[key: string]: unknown
}
response_mode?: 'blocking' | 'streaming' | null
trace_session_id?: string | null
user: string
}
@ -1704,7 +1749,7 @@ export type PostAppsAnnotationReplyByActionResponse
export type GetAppsAnnotationReplyByActionStatusByJobIdData = {
body?: never
path: {
action: string
action: 'disable' | 'enable'
job_id: string
}
query?: never
@ -2399,7 +2444,7 @@ export type GetDatasetsByDatasetIdDocumentsData = {
keyword?: string
limit?: number
page?: number
status?: string
status?: 'archived' | 'available' | 'disabled' | 'error' | 'indexing' | 'paused' | 'queuing'
}
url: '/datasets/{dataset_id}/documents'
}
@ -3661,7 +3706,7 @@ export type PostWorkflowsByWorkflowIdRunResponse
export type GetWorkspacesCurrentModelsModelTypesByModelTypeData = {
body?: never
path: {
model_type: string
model_type: 'llm' | 'moderation' | 'rerank' | 'speech2text' | 'text-embedding' | 'tts'
}
query?: never
url: '/workspaces/current/models/model-types/{model_type}'

View File

@ -131,12 +131,19 @@ export const zButtonStyle = z.enum(['accent', 'default', 'ghost', 'primary'])
export const zChatRequestPayload = z.object({
auto_generate_name: z.boolean().optional().default(true),
conversation_id: z.string().nullish(),
files: z.array(z.record(z.string(), z.unknown())).nullish(),
files: z
.array(
z.object({
transfer_method: z.enum(['local_file', 'remote_url']),
type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
upload_file_id: z.string().optional(),
url: z.string().optional(),
}),
)
.nullish(),
inputs: z.record(z.string(), z.unknown()),
query: z.string(),
response_mode: z.enum(['blocking', 'streaming']).nullish(),
retriever_from: z.string().optional().default('dev'),
trace_session_id: z.string().nullish(),
workflow_id: z.string().nullish(),
})
@ -146,12 +153,19 @@ export const zChatRequestPayload = z.object({
export const zChatRequestPayloadWithUser = z.object({
auto_generate_name: z.boolean().optional().default(true),
conversation_id: z.string().nullish(),
files: z.array(z.record(z.string(), z.unknown())).nullish(),
files: z
.array(
z.object({
transfer_method: z.enum(['local_file', 'remote_url']),
type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
upload_file_id: z.string().optional(),
url: z.string().optional(),
}),
)
.nullish(),
inputs: z.record(z.string(), z.unknown()),
query: z.string(),
response_mode: z.enum(['blocking', 'streaming']).nullish(),
retriever_from: z.string().optional().default('dev'),
trace_session_id: z.string().nullish(),
user: z.string(),
workflow_id: z.string().nullish(),
})
@ -215,24 +229,38 @@ export const zChildChunkUpdatePayload = z.object({
* CompletionRequestPayload
*/
export const zCompletionRequestPayload = z.object({
files: z.array(z.record(z.string(), z.unknown())).nullish(),
files: z
.array(
z.object({
transfer_method: z.enum(['local_file', 'remote_url']),
type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
upload_file_id: z.string().optional(),
url: z.string().optional(),
}),
)
.nullish(),
inputs: z.record(z.string(), z.unknown()),
query: z.string().optional().default(''),
response_mode: z.enum(['blocking', 'streaming']).nullish(),
retriever_from: z.string().optional().default('dev'),
trace_session_id: z.string().nullish(),
})
/**
* CompletionRequestPayload
*/
export const zCompletionRequestPayloadWithUser = z.object({
files: z.array(z.record(z.string(), z.unknown())).nullish(),
files: z
.array(
z.object({
transfer_method: z.enum(['local_file', 'remote_url']),
type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
upload_file_id: z.string().optional(),
url: z.string().optional(),
}),
)
.nullish(),
inputs: z.record(z.string(), z.unknown()),
query: z.string().optional().default(''),
response_mode: z.enum(['blocking', 'streaming']).nullish(),
retriever_from: z.string().optional().default('dev'),
trace_session_id: z.string().nullish(),
user: z.string(),
})
@ -263,7 +291,7 @@ export const zCondition = z.object({
'≥',
]),
name: z.string(),
value: z.union([z.string(), z.array(z.string()), z.int(), z.number()]).nullish(),
value: z.union([z.string(), z.array(z.string()), z.number()]).nullish(),
})
/**
@ -657,7 +685,7 @@ export const zDatasourceCredentialInfoResponse = z.object({
*/
export const zDatasourceNodeRunPayload = z.object({
credential_id: z.string().nullish(),
datasource_type: z.string(),
datasource_type: z.enum(['local_file', 'online_document', 'online_drive', 'website_crawl']),
inputs: z.record(z.string(), z.unknown()),
is_published: z.boolean(),
})
@ -710,7 +738,9 @@ export const zDocumentListQuery = z.object({
keyword: z.string().nullish(),
limit: z.int().optional().default(20),
page: z.int().optional().default(1),
status: z.string().nullish(),
status: z
.enum(['archived', 'available', 'disabled', 'error', 'indexing', 'paused', 'queuing'])
.nullish(),
})
/**
@ -1337,11 +1367,37 @@ export const zPermissionEnum = z.enum(['all_team_members', 'only_me', 'partial_m
* PipelineRunApiEntity
*/
export const zPipelineRunApiEntity = z.object({
datasource_info_list: z.array(z.record(z.string(), z.unknown())),
datasource_type: z.string(),
datasource_info_list: z.array(
z.union([
z.object({
name: z.string().optional(),
reference: z.string(),
}),
z.object({
credential_id: z.string().optional(),
page: z.object({
page_id: z.string(),
page_name: z.string().optional(),
type: z.string(),
}),
workspace_id: z.string(),
}),
z.object({
title: z.string().optional(),
url: z.string(),
}),
z.object({
bucket: z.string().optional(),
id: z.string(),
name: z.string().optional(),
type: z.enum(['file', 'folder']),
}),
]),
),
datasource_type: z.enum(['local_file', 'online_document', 'online_drive', 'website_crawl']),
inputs: z.record(z.string(), z.unknown()),
is_published: z.boolean(),
response_mode: z.string(),
response_mode: z.enum(['blocking', 'streaming']),
start_node_id: z.string(),
})
@ -1363,7 +1419,7 @@ export const zPipelineUploadFileResponse = z.object({
*/
export const zPreProcessingRule = z.object({
enabled: z.boolean(),
id: z.string(),
id: z.enum(['remove_extra_spaces', 'remove_stopwords', 'remove_urls_emails']),
})
/**
@ -1779,7 +1835,7 @@ export const zTagUpdatePayload = z.object({
* TextToAudioPayload
*/
export const zTextToAudioPayload = z.object({
message_id: z.string().nullish(),
message_id: z.uuid().nullish(),
streaming: z.boolean().nullish(),
text: z.string().nullish(),
voice: z.string().nullish(),
@ -1789,7 +1845,7 @@ export const zTextToAudioPayload = z.object({
* TextToAudioPayload
*/
export const zTextToAudioPayloadWithUser = z.object({
message_id: z.string().nullish(),
message_id: z.uuid().nullish(),
streaming: z.boolean().nullish(),
text: z.string().nullish(),
user: z.string().optional(),
@ -1956,7 +2012,7 @@ export const zWeightModel = z.object({
export const zRetrievalModel = z.object({
metadata_filtering_conditions: zMetadataFilteringCondition.nullish(),
reranking_enable: z.boolean(),
reranking_mode: z.string().nullish(),
reranking_mode: z.enum(['reranking_model', 'weighted_score']).nullish(),
reranking_model: zRerankingModel.nullish(),
score_threshold: z.number().nullish(),
score_threshold_enabled: z.boolean(),
@ -1977,9 +2033,16 @@ export const zDatasetCreatePayload = z.object({
indexing_technique: z.enum(['economy', 'high_quality']).nullish(),
name: z.string().min(1).max(40),
permission: zPermissionEnum.nullish().default('only_me'),
provider: z.string().optional().default('vendor'),
provider: z.enum(['external', 'vendor']).optional().default('vendor'),
retrieval_model: zRetrievalModel.nullish(),
summary_index_setting: z.record(z.string(), z.unknown()).nullish(),
summary_index_setting: z
.object({
enable: z.boolean().optional(),
model_name: z.string().optional(),
model_provider_name: z.string().optional(),
summary_prompt: z.string().optional(),
})
.nullish(),
})
/**
@ -1991,10 +2054,22 @@ export const zDatasetUpdatePayload = z.object({
embedding_model_provider: z.string().nullish(),
external_knowledge_api_id: z.string().nullish(),
external_knowledge_id: z.string().nullish(),
external_retrieval_model: z.record(z.string(), z.unknown()).nullish(),
external_retrieval_model: z
.object({
score_threshold: z.number().optional(),
score_threshold_enabled: z.boolean().optional(),
top_k: z.int().optional(),
})
.nullish(),
indexing_technique: z.enum(['economy', 'high_quality']).nullish(),
name: z.string().min(1).max(40).nullish(),
partial_member_list: z.array(z.record(z.string(), z.string())).nullish(),
partial_member_list: z
.array(
z.object({
user_id: z.string().optional(),
}),
)
.nullish(),
permission: zPermissionEnum.nullish(),
retrieval_model: zRetrievalModel.nullish(),
})
@ -2003,11 +2078,14 @@ export const zDatasetUpdatePayload = z.object({
* DocumentTextCreatePayload
*/
export const zDocumentTextCreatePayload = z.object({
doc_form: z.string().optional().default('text_model'),
doc_form: z
.enum(['hierarchical_model', 'qa_model', 'text_model'])
.optional()
.default('text_model'),
doc_language: z.string().optional().default('English'),
embedding_model: z.string().nullish(),
embedding_model_provider: z.string().nullish(),
indexing_technique: z.string().nullish(),
indexing_technique: z.enum(['economy', 'high_quality']).nullish(),
name: z.string(),
original_document_id: z.string().nullish(),
process_rule: zProcessRule.nullish(),
@ -2018,7 +2096,10 @@ export const zDocumentTextCreatePayload = z.object({
export const zDocumentTextUpdate = z.intersection(
z.union([
z.object({
doc_form: z.string().optional().default('text_model'),
doc_form: z
.enum(['hierarchical_model', 'qa_model', 'text_model'])
.optional()
.default('text_model'),
doc_language: z.string().optional().default('English'),
name: z.string(),
process_rule: zProcessRule.nullish(),
@ -2026,7 +2107,10 @@ export const zDocumentTextUpdate = z.intersection(
text: z.string(),
}),
z.object({
doc_form: z.string().optional().default('text_model'),
doc_form: z
.enum(['hierarchical_model', 'qa_model', 'text_model'])
.optional()
.default('text_model'),
doc_language: z.string().optional().default('English'),
name: z.string().nullish(),
process_rule: zProcessRule.nullish(),
@ -2035,7 +2119,10 @@ export const zDocumentTextUpdate = z.intersection(
}),
]),
z.object({
doc_form: z.string().optional().default('text_model'),
doc_form: z
.enum(['hierarchical_model', 'qa_model', 'text_model'])
.optional()
.default('text_model'),
doc_language: z.string().optional().default('English'),
name: z.string().nullish(),
process_rule: zProcessRule.nullish(),
@ -2049,7 +2136,13 @@ export const zDocumentTextUpdate = z.intersection(
*/
export const zHitTestingPayload = z.object({
attachment_ids: z.array(z.string()).nullish(),
external_retrieval_model: z.record(z.string(), z.unknown()).nullish(),
external_retrieval_model: z
.object({
score_threshold: z.number().optional(),
score_threshold_enabled: z.boolean().optional(),
top_k: z.int().optional(),
})
.nullish(),
query: z.string().max(250),
retrieval_model: zRetrievalModel.nullish(),
})
@ -2132,20 +2225,36 @@ export const zWorkflowAppLogPaginationResponse = z.object({
* WorkflowRunPayload
*/
export const zWorkflowRunPayload = z.object({
files: z.array(z.record(z.string(), z.unknown())).nullish(),
files: z
.array(
z.object({
transfer_method: z.enum(['local_file', 'remote_url']),
type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
upload_file_id: z.string().optional(),
url: z.string().optional(),
}),
)
.nullish(),
inputs: z.record(z.string(), z.unknown()),
response_mode: z.enum(['blocking', 'streaming']).nullish(),
trace_session_id: z.string().nullish(),
})
/**
* WorkflowRunPayload
*/
export const zWorkflowRunPayloadWithUser = z.object({
files: z.array(z.record(z.string(), z.unknown())).nullish(),
files: z
.array(
z.object({
transfer_method: z.enum(['local_file', 'remote_url']),
type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
upload_file_id: z.string().optional(),
url: z.string().optional(),
}),
)
.nullish(),
inputs: z.record(z.string(), z.unknown()),
response_mode: z.enum(['blocking', 'streaming']).nullish(),
trace_session_id: z.string().nullish(),
user: z.string(),
})
@ -2231,7 +2340,7 @@ export const zPostAppsAnnotationReplyByActionPath = z.object({
export const zPostAppsAnnotationReplyByActionResponse = zAnnotationJobStatusResponse
export const zGetAppsAnnotationReplyByActionStatusByJobIdPath = z.object({
action: z.string(),
action: z.enum(['disable', 'enable']),
job_id: z.uuid(),
})
@ -2553,7 +2662,9 @@ export const zGetDatasetsByDatasetIdDocumentsQuery = z.object({
keyword: z.string().optional(),
limit: z.int().optional().default(20),
page: z.int().optional().default(1),
status: z.string().optional(),
status: z
.enum(['archived', 'available', 'disabled', 'error', 'indexing', 'paused', 'queuing'])
.optional(),
})
/**
@ -3112,8 +3223,8 @@ export const zGetWorkflowByTaskIdEventsQuery = z.object({
export const zGetWorkflowByTaskIdEventsResponse = zEventStreamResponse
export const zGetWorkflowsLogsQuery = z.object({
created_at__after: z.string().optional(),
created_at__before: z.string().optional(),
created_at__after: z.iso.datetime().optional(),
created_at__before: z.iso.datetime().optional(),
created_by_account: z.string().optional(),
created_by_end_user_session_id: z.string().optional(),
keyword: z.string().optional(),
@ -3172,7 +3283,7 @@ export const zPostWorkflowsByWorkflowIdRunPath = z.object({
export const zPostWorkflowsByWorkflowIdRunResponse = zGeneratedAppResponse
export const zGetWorkspacesCurrentModelsModelTypesByModelTypePath = z.object({
model_type: z.string(),
model_type: z.enum(['llm', 'moderation', 'rerank', 'speech2text', 'text-embedding', 'tts']),
})
/**

View File

@ -640,7 +640,10 @@ export type WebMessageListItem = {
export type WorkflowRunPayload = {
files?: Array<{
[key: string]: unknown
transfer_method: 'local_file' | 'remote_url'
type: 'audio' | 'custom' | 'document' | 'image' | 'video'
upload_file_id?: string
url?: string
}> | null
inputs: {
[key: string]: unknown

View File

@ -696,7 +696,7 @@ export const zParameters = z.object({
* TextToAudioPayload
*/
export const zTextToAudioPayload = z.object({
message_id: z.string().nullish(),
message_id: z.uuid().nullish(),
streaming: z.boolean().nullish(),
text: z.string().nullish(),
voice: z.string().nullish(),
@ -906,7 +906,16 @@ export const zWebMessageInfiniteScrollPagination = z.object({
* WorkflowRunPayload
*/
export const zWorkflowRunPayload = z.object({
files: z.array(z.record(z.string(), z.unknown())).nullish(),
files: z
.array(
z.object({
transfer_method: z.enum(['local_file', 'remote_url']),
type: z.enum(['audio', 'custom', 'document', 'image', 'video']),
upload_file_id: z.string().optional(),
url: z.string().optional(),
}),
)
.nullish(),
inputs: z.record(z.string(), z.unknown()),
})