refactor(api): type VDB to_index_struct with VectorIndexStructDict TypedDict (#34674)

This commit is contained in:
Statxc 2026-04-07 10:17:04 -03:00 committed by GitHub
parent c2af415450
commit b5d9a71cf9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 45 additions and 16 deletions

View File

@ -30,7 +30,7 @@ from pymochow.model.table import AnnSearch, BM25SearchRequest, HNSWSearchParams,
from configs import dify_config
from core.rag.datasource.vdb.field import Field as VDBField
from core.rag.datasource.vdb.field import parse_metadata_json
from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_base import BaseVector, VectorIndexStructDict
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
@ -85,8 +85,12 @@ class BaiduVector(BaseVector):
def get_type(self) -> str:
return VectorType.BAIDU
def to_index_struct(self):
return {"type": self.get_type(), "vector_store": {"class_prefix": self._collection_name}}
def to_index_struct(self) -> VectorIndexStructDict:
result: VectorIndexStructDict = {
"type": self.get_type(),
"vector_store": {"class_prefix": self._collection_name},
}
return result
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
self._create_table(len(embeddings[0]))

View File

@ -22,7 +22,7 @@ from sqlalchemy import select
from configs import dify_config
from core.rag.datasource.vdb.field import Field
from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_base import BaseVector, VectorIndexStructDict
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
@ -94,8 +94,12 @@ class QdrantVector(BaseVector):
def get_type(self) -> str:
return VectorType.QDRANT
def to_index_struct(self):
return {"type": self.get_type(), "vector_store": {"class_prefix": self._collection_name}}
def to_index_struct(self) -> VectorIndexStructDict:
result: VectorIndexStructDict = {
"type": self.get_type(),
"vector_store": {"class_prefix": self._collection_name},
}
return result
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
if texts:

View File

@ -12,7 +12,7 @@ from tcvectordb.model.document import AnnSearch, Filter, KeywordSearch, Weighted
from configs import dify_config
from core.rag.datasource.vdb.field import parse_metadata_json
from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_base import BaseVector, VectorIndexStructDict
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
@ -83,8 +83,12 @@ class TencentVector(BaseVector):
def get_type(self) -> str:
return VectorType.TENCENT
def to_index_struct(self):
return {"type": self.get_type(), "vector_store": {"class_prefix": self._collection_name}}
def to_index_struct(self) -> VectorIndexStructDict:
result: VectorIndexStructDict = {
"type": self.get_type(),
"vector_store": {"class_prefix": self._collection_name},
}
return result
def _has_collection(self) -> bool:
return bool(

View File

@ -25,7 +25,7 @@ from sqlalchemy import select
from configs import dify_config
from core.rag.datasource.vdb.field import Field
from core.rag.datasource.vdb.tidb_on_qdrant.tidb_service import TidbService
from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_base import BaseVector, VectorIndexStructDict
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
@ -91,8 +91,12 @@ class TidbOnQdrantVector(BaseVector):
def get_type(self) -> str:
return VectorType.TIDB_ON_QDRANT
def to_index_struct(self):
return {"type": self.get_type(), "vector_store": {"class_prefix": self._collection_name}}
def to_index_struct(self) -> VectorIndexStructDict:
result: VectorIndexStructDict = {
"type": self.get_type(),
"vector_store": {"class_prefix": self._collection_name},
}
return result
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
if texts:

View File

@ -1,11 +1,20 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any
from typing import Any, TypedDict
from core.rag.models.document import Document
class VectorStoreDict(TypedDict):
class_prefix: str
class VectorIndexStructDict(TypedDict):
type: str
vector_store: VectorStoreDict
class BaseVector(ABC):
def __init__(self, collection_name: str):
self._collection_name = collection_name

View File

@ -24,7 +24,7 @@ from weaviate.exceptions import UnexpectedStatusCodeError
from configs import dify_config
from core.rag.datasource.vdb.field import Field
from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_base import BaseVector, VectorIndexStructDict
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
@ -184,9 +184,13 @@ class WeaviateVector(BaseVector):
dataset_id = dataset.id
return Dataset.gen_collection_name_by_id(dataset_id)
def to_index_struct(self) -> dict:
def to_index_struct(self) -> VectorIndexStructDict:
"""Returns the index structure dictionary for persistence."""
return {"type": self.get_type(), "vector_store": {"class_prefix": self._collection_name}}
result: VectorIndexStructDict = {
"type": self.get_type(),
"vector_store": {"class_prefix": self._collection_name},
}
return result
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
"""