From acbc886ecd578b56a745d9187d1064d3ca3cfd87 Mon Sep 17 00:00:00 2001 From: Conner Mo Date: Sat, 29 Nov 2025 18:50:21 +0800 Subject: [PATCH] fix: implement score_threshold filtering for OceanBase vector search (#28536) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../vdb/oceanbase/oceanbase_vector.py | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py index b3db7332e8..7b53f47419 100644 --- a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py +++ b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py @@ -270,6 +270,10 @@ class OceanBaseVector(BaseVector): self._client.set_ob_hnsw_ef_search(ef_search) self._hnsw_ef_search = ef_search topk = kwargs.get("top_k", 10) + try: + score_threshold = float(val) if (val := kwargs.get("score_threshold")) is not None else 0.0 + except (ValueError, TypeError) as e: + raise ValueError(f"Invalid score_threshold parameter: {e}") from e try: cur = self._client.ann_search( table_name=self._collection_name, @@ -285,14 +289,20 @@ class OceanBaseVector(BaseVector): raise Exception("Failed to search by vector. ", e) docs = [] for _text, metadata, distance in cur: - metadata = json.loads(metadata) - metadata["score"] = 1 - distance / math.sqrt(2) - docs.append( - Document( - page_content=_text, - metadata=metadata, + score = 1 - distance / math.sqrt(2) + if score >= score_threshold: + try: + metadata = json.loads(metadata) + except json.JSONDecodeError: + logger.warning("Invalid JSON metadata: %s", metadata) + metadata = {} + metadata["score"] = score + docs.append( + Document( + page_content=_text, + metadata=metadata, + ) ) - ) return docs def delete(self):