From 2dc015b360edc852fbfb0fe5a73350b81193c08c Mon Sep 17 00:00:00 2001 From: plind <59729252+plind-dm@users.noreply.github.com> Date: Fri, 10 Apr 2026 19:55:40 +0900 Subject: [PATCH] fix(api): default parent_mode to paragraph for hierarchical chunking via API (#34635) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- api/services/dataset_service.py | 4 +++ .../services/test_dataset_service_document.py | 27 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 3e952059ac..9c71902849 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -2822,6 +2822,10 @@ class DocumentService: knowledge_config.process_rule.rules.pre_processing_rules = list(unique_pre_processing_rule_dicts.values()) + if knowledge_config.process_rule.mode == ProcessRuleMode.HIERARCHICAL: + if not knowledge_config.process_rule.rules.parent_mode: + knowledge_config.process_rule.rules.parent_mode = "paragraph" + if not knowledge_config.process_rule.rules.segmentation: raise ValueError("Process rule segmentation is required") diff --git a/api/tests/unit_tests/services/test_dataset_service_document.py b/api/tests/unit_tests/services/test_dataset_service_document.py index e5a2541da7..9b4734b7ad 100644 --- a/api/tests/unit_tests/services/test_dataset_service_document.py +++ b/api/tests/unit_tests/services/test_dataset_service_document.py @@ -1069,6 +1069,33 @@ class TestDocumentServiceCreateValidation: assert len(knowledge_config.process_rule.rules.pre_processing_rules) == 1 assert knowledge_config.process_rule.rules.pre_processing_rules[0].enabled is False + def test_process_rule_args_validate_hierarchical_defaults_parent_mode_to_paragraph(self): + knowledge_config = KnowledgeConfig( + indexing_technique="economy", + data_source=DataSource( + info_list=InfoList( + data_source_type="upload_file", + file_info_list=FileInfo(file_ids=["file-1"]), + ) + ), + process_rule=ProcessRule( + mode="hierarchical", + rules=Rule( + pre_processing_rules=[ + PreProcessingRule(id="remove_extra_spaces", enabled=True), + ], + segmentation=Segmentation(separator="\n", max_tokens=1024), + subchunk_segmentation=Segmentation(separator="\n", max_tokens=512), + ), + ), + ) + + DocumentService.process_rule_args_validate(knowledge_config) + + assert knowledge_config.process_rule is not None + assert knowledge_config.process_rule.rules is not None + assert knowledge_config.process_rule.rules.parent_mode == "paragraph" + class TestDocumentServiceSaveDocumentWithDatasetId: """Unit tests for non-SQL validation branches in save_document_with_dataset_id."""