From 269bf883c2a400565e5146fc6ade6b8385905869 Mon Sep 17 00:00:00 2001 From: FFXN Date: Tue, 3 Mar 2026 14:31:51 +0800 Subject: [PATCH] fix: Add the validation of doc_form in the Document-related service APIs. --- api/controllers/console/datasets/datasets.py | 8 ++++++++ api/controllers/service_api/dataset/document.py | 16 +++++++++++++++- api/models/dataset.py | 1 + .../knowledge_entities/knowledge_entities.py | 15 ++++++++++++++- 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index a06b872846..b1cde105a9 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -119,6 +119,14 @@ def _validate_indexing_technique(value: str | None) -> str | None: return value +def _validate_doc_form(value: str | None) -> str | None: + if value is None: + return value + if value not in Dataset.DOC_FORM_LIST: + raise ValueError("Invalid doc_form.") + return value + + class DatasetCreatePayload(BaseModel): name: str = Field(..., min_length=1, max_length=40) description: str = Field("", max_length=400) diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index 0aeb4a2d36..dc8da025d4 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -4,7 +4,7 @@ from uuid import UUID from flask import request from flask_restx import marshal -from pydantic import BaseModel, Field, model_validator +from pydantic import BaseModel, Field, field_validator, model_validator from sqlalchemy import desc, select from werkzeug.exceptions import Forbidden, NotFound @@ -60,6 +60,13 @@ class DocumentTextCreatePayload(BaseModel): embedding_model: str | None = None embedding_model_provider: str | None = None + @field_validator("doc_form") + @classmethod + def validate_doc_form(cls, value: str) -> str: + if value not in Dataset.DOC_FORM_LIST: + raise ValueError("Invalid doc_form.") + return value + DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}" @@ -72,6 +79,13 @@ class DocumentTextUpdate(BaseModel): doc_language: str = "English" retrieval_model: RetrievalModel | None = None + @field_validator("doc_form") + @classmethod + def validate_doc_form(cls, value: str) -> str: + if value not in Dataset.DOC_FORM_LIST: + raise ValueError("Invalid doc_form.") + return value + @model_validator(mode="after") def check_text_and_name(self) -> Self: if self.text is not None and self.name is None: diff --git a/api/models/dataset.py b/api/models/dataset.py index e7da2961bc..b7b803f4ea 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -51,6 +51,7 @@ class Dataset(Base): INDEXING_TECHNIQUE_LIST = ["high_quality", "economy", None] PROVIDER_LIST = ["vendor", "external", None] + DOC_FORM_LIST = ["text_model", "qa_model", "hierarchical_model"] id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id: Mapped[str] = mapped_column(StringUUID) diff --git a/api/services/entities/knowledge_entities/knowledge_entities.py b/api/services/entities/knowledge_entities/knowledge_entities.py index 8dc5b93501..66309f0e59 100644 --- a/api/services/entities/knowledge_entities/knowledge_entities.py +++ b/api/services/entities/knowledge_entities/knowledge_entities.py @@ -1,8 +1,9 @@ from enum import StrEnum from typing import Literal -from pydantic import BaseModel +from pydantic import BaseModel, field_validator +from core.rag.index_processor.constant.index_type import IndexStructureType from core.rag.retrieval.retrieval_methods import RetrievalMethod @@ -127,6 +128,18 @@ class KnowledgeConfig(BaseModel): name: str | None = None is_multimodal: bool = False + @field_validator("doc_form") + @classmethod + def validate_doc_form(cls, value: str) -> str: + valid_forms = [ + IndexStructureType.PARAGRAPH_INDEX, + IndexStructureType.QA_INDEX, + IndexStructureType.PARENT_CHILD_INDEX, + ] + if value not in valid_forms: + raise ValueError("Invalid doc_form.") + return value + class SegmentCreateArgs(BaseModel): content: str | None = None