From 784008997ba3fa874e457401b761db67f223a69c Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Wed, 10 Dec 2025 18:45:43 +0800 Subject: [PATCH] fix parent-child check when child chunk is not exist (#29426) --- api/core/rag/datasource/retrieval_service.py | 19 ++++++++++++++----- api/services/dataset_service.py | 2 ++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py index e4ca25b46b..a139fba4d0 100644 --- a/api/core/rag/datasource/retrieval_service.py +++ b/api/core/rag/datasource/retrieval_service.py @@ -451,12 +451,21 @@ class RetrievalService: "position": child_chunk.position, "score": document.metadata.get("score", 0.0), } - segment_child_map[segment.id]["child_chunks"].append(child_chunk_detail) - segment_child_map[segment.id]["max_score"] = max( - segment_child_map[segment.id]["max_score"], document.metadata.get("score", 0.0) - ) + if segment.id in segment_child_map: + segment_child_map[segment.id]["child_chunks"].append(child_chunk_detail) + segment_child_map[segment.id]["max_score"] = max( + segment_child_map[segment.id]["max_score"], document.metadata.get("score", 0.0) + ) + else: + segment_child_map[segment.id] = { + "max_score": document.metadata.get("score", 0.0), + "child_chunks": [child_chunk_detail], + } if attachment_info: - segment_file_map[segment.id].append(attachment_info) + if segment.id in segment_file_map: + segment_file_map[segment.id].append(attachment_info) + else: + segment_file_map[segment.id] = [attachment_info] else: # Handle normal documents segment = None diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 00f06e9405..7841b8b33d 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -673,6 +673,8 @@ class DatasetService: Returns: str: Action to perform ('add', 'remove', 'update', or None) """ + if "indexing_technique" not in data: + return None if dataset.indexing_technique != data["indexing_technique"]: if data["indexing_technique"] == "economy": # Remove embedding model configuration for economy mode