diff --git a/api/core/rag/datasource/vdb/tencent/tencent_vector.py b/api/core/rag/datasource/vdb/tencent/tencent_vector.py index 0517d5a6d1..d41dcbc312 100644 --- a/api/core/rag/datasource/vdb/tencent/tencent_vector.py +++ b/api/core/rag/datasource/vdb/tencent/tencent_vector.py @@ -39,6 +39,9 @@ class TencentConfig(BaseModel): return {"url": self.url, "username": self.username, "key": self.api_key, "timeout": self.timeout} +bm25 = BM25Encoder.default("zh") + + class TencentVector(BaseVector): field_id: str = "id" field_vector: str = "vector" @@ -53,7 +56,6 @@ class TencentVector(BaseVector): self._dimension = 1024 self._init_database() self._load_collection() - self._bm25 = BM25Encoder.default("zh") def _load_collection(self): """ @@ -186,7 +188,7 @@ class TencentVector(BaseVector): metadata=metadata, ) if self._enable_hybrid_search: - doc.__dict__["sparse_vector"] = self._bm25.encode_texts(texts[i]) + doc.__dict__["sparse_vector"] = bm25.encode_texts(texts[i]) docs.append(doc) self._client.upsert( database_name=self._client_config.database, @@ -264,7 +266,7 @@ class TencentVector(BaseVector): match=[ KeywordSearch( field_name="sparse_vector", - data=self._bm25.encode_queries(query), + data=bm25.encode_queries(query), ), ], rerank=WeightedRerank(