fix: implement score_threshold filtering for OceanBase vector search (#28536)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Conner Mo 2025-11-29 18:50:21 +08:00 committed by GitHub
parent 0a2d478749
commit acbc886ecd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -270,6 +270,10 @@ class OceanBaseVector(BaseVector):
self._client.set_ob_hnsw_ef_search(ef_search) self._client.set_ob_hnsw_ef_search(ef_search)
self._hnsw_ef_search = ef_search self._hnsw_ef_search = ef_search
topk = kwargs.get("top_k", 10) topk = kwargs.get("top_k", 10)
try:
score_threshold = float(val) if (val := kwargs.get("score_threshold")) is not None else 0.0
except (ValueError, TypeError) as e:
raise ValueError(f"Invalid score_threshold parameter: {e}") from e
try: try:
cur = self._client.ann_search( cur = self._client.ann_search(
table_name=self._collection_name, table_name=self._collection_name,
@ -285,14 +289,20 @@ class OceanBaseVector(BaseVector):
raise Exception("Failed to search by vector. ", e) raise Exception("Failed to search by vector. ", e)
docs = [] docs = []
for _text, metadata, distance in cur: for _text, metadata, distance in cur:
metadata = json.loads(metadata) score = 1 - distance / math.sqrt(2)
metadata["score"] = 1 - distance / math.sqrt(2) if score >= score_threshold:
docs.append( try:
Document( metadata = json.loads(metadata)
page_content=_text, except json.JSONDecodeError:
metadata=metadata, logger.warning("Invalid JSON metadata: %s", metadata)
metadata = {}
metadata["score"] = score
docs.append(
Document(
page_content=_text,
metadata=metadata,
)
) )
)
return docs return docs
def delete(self): def delete(self):