FEAT: Tencent Vector optimize BM25 initialization to reduce loading time (#24915)

Co-authored-by: wlleiiwang <wlleiiwang@tencent.com>
This commit is contained in:
wlleiiwang 2025-09-01 21:08:41 +08:00 committed by GitHub
parent 64319c0d56
commit 9486715929
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 5 additions and 3 deletions

View File

@ -39,6 +39,9 @@ class TencentConfig(BaseModel):
return {"url": self.url, "username": self.username, "key": self.api_key, "timeout": self.timeout}
bm25 = BM25Encoder.default("zh")
class TencentVector(BaseVector):
field_id: str = "id"
field_vector: str = "vector"
@ -53,7 +56,6 @@ class TencentVector(BaseVector):
self._dimension = 1024
self._init_database()
self._load_collection()
self._bm25 = BM25Encoder.default("zh")
def _load_collection(self):
"""
@ -186,7 +188,7 @@ class TencentVector(BaseVector):
metadata=metadata,
)
if self._enable_hybrid_search:
doc.__dict__["sparse_vector"] = self._bm25.encode_texts(texts[i])
doc.__dict__["sparse_vector"] = bm25.encode_texts(texts[i])
docs.append(doc)
self._client.upsert(
database_name=self._client_config.database,
@ -264,7 +266,7 @@ class TencentVector(BaseVector):
match=[
KeywordSearch(
field_name="sparse_vector",
data=self._bm25.encode_queries(query),
data=bm25.encode_queries(query),
),
],
rerank=WeightedRerank(