From 056095238b16431fd3183daec28f7742e4e49bb8 Mon Sep 17 00:00:00 2001 From: wangxiaolei Date: Fri, 23 Jan 2026 11:34:47 +0800 Subject: [PATCH] fix: fix create-by-file doc_form (#31346) --- .../service_api/dataset/document.py | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index c800c0e4e1..49ff4f57dc 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -261,17 +261,6 @@ class DocumentAddByFileApi(DatasetApiResource): @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id, dataset_id): """Create document by upload file.""" - args = {} - if "data" in request.form: - args = json.loads(request.form["data"]) - if "doc_form" not in args: - args["doc_form"] = "text_model" - if "doc_language" not in args: - args["doc_language"] = "English" - - # get dataset info - dataset_id = str(dataset_id) - tenant_id = str(tenant_id) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: @@ -280,6 +269,18 @@ class DocumentAddByFileApi(DatasetApiResource): if dataset.provider == "external": raise ValueError("External datasets are not supported.") + args = {} + if "data" in request.form: + args = json.loads(request.form["data"]) + if "doc_form" not in args: + args["doc_form"] = dataset.chunk_structure or "text_model" + if "doc_language" not in args: + args["doc_language"] = "English" + + # get dataset info + dataset_id = str(dataset_id) + tenant_id = str(tenant_id) + indexing_technique = args.get("indexing_technique") or dataset.indexing_technique if not indexing_technique: raise ValueError("indexing_technique is required.") @@ -370,17 +371,6 @@ class DocumentUpdateByFileApi(DatasetApiResource): @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id, dataset_id, document_id): """Update document by upload file.""" - args = {} - if "data" in request.form: - args = json.loads(request.form["data"]) - if "doc_form" not in args: - args["doc_form"] = "text_model" - if "doc_language" not in args: - args["doc_language"] = "English" - - # get dataset info - dataset_id = str(dataset_id) - tenant_id = str(tenant_id) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: @@ -389,6 +379,18 @@ class DocumentUpdateByFileApi(DatasetApiResource): if dataset.provider == "external": raise ValueError("External datasets are not supported.") + args = {} + if "data" in request.form: + args = json.loads(request.form["data"]) + if "doc_form" not in args: + args["doc_form"] = dataset.chunk_structure or "text_model" + if "doc_language" not in args: + args["doc_language"] = "English" + + # get dataset info + dataset_id = str(dataset_id) + tenant_id = str(tenant_id) + # indexing_technique is already set in dataset since this is an update args["indexing_technique"] = dataset.indexing_technique