fix: Compatibility issues with the summary index feature when using the weaviate vector database.

This commit is contained in:
FFXN 2026-04-13 18:44:53 +08:00
parent e24b6c27b0
commit 7fd549fd39

View File

@ -20,7 +20,7 @@ from pydantic import BaseModel, model_validator
from weaviate.classes.data import DataObject from weaviate.classes.data import DataObject
from weaviate.classes.init import Auth from weaviate.classes.init import Auth
from weaviate.classes.query import Filter, MetadataQuery from weaviate.classes.query import Filter, MetadataQuery
from weaviate.exceptions import UnexpectedStatusCodeError from weaviate.exceptions import UnexpectedStatusCodeError, WeaviateQueryError
from configs import dify_config from configs import dify_config
from core.rag.datasource.vdb.field import Field from core.rag.datasource.vdb.field import Field
@ -230,6 +230,8 @@ class WeaviateVector(BaseVector):
wc.Property(name="doc_id", data_type=wc.DataType.TEXT), wc.Property(name="doc_id", data_type=wc.DataType.TEXT),
wc.Property(name="doc_type", data_type=wc.DataType.TEXT), wc.Property(name="doc_type", data_type=wc.DataType.TEXT),
wc.Property(name="chunk_index", data_type=wc.DataType.INT), wc.Property(name="chunk_index", data_type=wc.DataType.INT),
wc.Property(name="is_summary", data_type=wc.DataType.BOOL),
wc.Property(name="original_chunk_id", data_type=wc.DataType.TEXT),
], ],
vector_config=wc.Configure.Vectors.self_provided(), vector_config=wc.Configure.Vectors.self_provided(),
) )
@ -262,6 +264,10 @@ class WeaviateVector(BaseVector):
to_add.append(wc.Property(name="doc_type", data_type=wc.DataType.TEXT)) to_add.append(wc.Property(name="doc_type", data_type=wc.DataType.TEXT))
if "chunk_index" not in existing: if "chunk_index" not in existing:
to_add.append(wc.Property(name="chunk_index", data_type=wc.DataType.INT)) to_add.append(wc.Property(name="chunk_index", data_type=wc.DataType.INT))
if "is_summary" not in existing:
to_add.append(wc.Property(name="is_summary", data_type=wc.DataType.BOOL))
if "original_chunk_id" not in existing:
to_add.append(wc.Property(name="original_chunk_id", data_type=wc.DataType.TEXT))
for prop in to_add: for prop in to_add:
try: try:
@ -400,15 +406,20 @@ class WeaviateVector(BaseVector):
top_k = int(kwargs.get("top_k", 4)) top_k = int(kwargs.get("top_k", 4))
score_threshold = float(kwargs.get("score_threshold") or 0.0) score_threshold = float(kwargs.get("score_threshold") or 0.0)
res = col.query.near_vector( query_kwargs = {
near_vector=query_vector, "near_vector": query_vector,
limit=top_k, "limit": top_k,
return_properties=props, "return_properties": props,
return_metadata=MetadataQuery(distance=True), "return_metadata": MetadataQuery(distance=True),
include_vector=False, "include_vector": False,
filters=where, "filters": where,
target_vector="default", "target_vector": "default",
) }
try:
res = col.query.near_vector(**query_kwargs)
except WeaviateQueryError:
self._ensure_properties()
res = col.query.near_vector(**query_kwargs)
docs: list[Document] = [] docs: list[Document] = []
for obj in res.objects: for obj in res.objects:
@ -446,14 +457,19 @@ class WeaviateVector(BaseVector):
top_k = int(kwargs.get("top_k", 4)) top_k = int(kwargs.get("top_k", 4))
res = col.query.bm25( query_kwargs = {
query=query, "query": query,
query_properties=[Field.TEXT_KEY.value], "query_properties": [Field.TEXT_KEY.value],
limit=top_k, "limit": top_k,
return_properties=props, "return_properties": props,
include_vector=True, "include_vector": True,
filters=where, "filters": where,
) }
try:
res = col.query.bm25(**query_kwargs)
except WeaviateQueryError:
self._ensure_properties()
res = col.query.bm25(**query_kwargs)
docs: list[Document] = [] docs: list[Document] = []
for obj in res.objects: for obj in res.objects: