mirror of
https://github.com/langgenius/dify.git
synced 2026-04-15 18:06:36 +08:00
fix(api): default parent_mode to paragraph for hierarchical chunking via API (#34635)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
2eb43b1e1f
commit
2dc015b360
@ -2822,6 +2822,10 @@ class DocumentService:
|
||||
|
||||
knowledge_config.process_rule.rules.pre_processing_rules = list(unique_pre_processing_rule_dicts.values())
|
||||
|
||||
if knowledge_config.process_rule.mode == ProcessRuleMode.HIERARCHICAL:
|
||||
if not knowledge_config.process_rule.rules.parent_mode:
|
||||
knowledge_config.process_rule.rules.parent_mode = "paragraph"
|
||||
|
||||
if not knowledge_config.process_rule.rules.segmentation:
|
||||
raise ValueError("Process rule segmentation is required")
|
||||
|
||||
|
||||
@ -1069,6 +1069,33 @@ class TestDocumentServiceCreateValidation:
|
||||
assert len(knowledge_config.process_rule.rules.pre_processing_rules) == 1
|
||||
assert knowledge_config.process_rule.rules.pre_processing_rules[0].enabled is False
|
||||
|
||||
def test_process_rule_args_validate_hierarchical_defaults_parent_mode_to_paragraph(self):
|
||||
knowledge_config = KnowledgeConfig(
|
||||
indexing_technique="economy",
|
||||
data_source=DataSource(
|
||||
info_list=InfoList(
|
||||
data_source_type="upload_file",
|
||||
file_info_list=FileInfo(file_ids=["file-1"]),
|
||||
)
|
||||
),
|
||||
process_rule=ProcessRule(
|
||||
mode="hierarchical",
|
||||
rules=Rule(
|
||||
pre_processing_rules=[
|
||||
PreProcessingRule(id="remove_extra_spaces", enabled=True),
|
||||
],
|
||||
segmentation=Segmentation(separator="\n", max_tokens=1024),
|
||||
subchunk_segmentation=Segmentation(separator="\n", max_tokens=512),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
DocumentService.process_rule_args_validate(knowledge_config)
|
||||
|
||||
assert knowledge_config.process_rule is not None
|
||||
assert knowledge_config.process_rule.rules is not None
|
||||
assert knowledge_config.process_rule.rules.parent_mode == "paragraph"
|
||||
|
||||
|
||||
class TestDocumentServiceSaveDocumentWithDatasetId:
|
||||
"""Unit tests for non-SQL validation branches in save_document_with_dataset_id."""
|
||||
|
||||
Loading…
Reference in New Issue
Block a user