dify/api/tests/integration_tests/vdb/qdrant/test_qdrant.py

110 lines
4.7 KiB
Python

import uuid
from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantConfig, QdrantVector
from core.rag.models.document import Document
from tests.integration_tests.vdb.test_vector_store import (
AbstractVectorTest,
setup_mock_redis,
)
class QdrantVectorTest(AbstractVectorTest):
def __init__(self):
super().__init__()
self.attributes = ["doc_id", "dataset_id", "document_id", "doc_hash"]
self.vector = QdrantVector(
collection_name=self.collection_name,
group_id=self.dataset_id,
config=QdrantConfig(
endpoint="http://localhost:6333",
api_key="difyai123456",
),
)
# Additional doc IDs for multi-keyword search tests
self.doc_apple_id = ""
self.doc_banana_id = ""
self.doc_both_id = ""
def search_by_vector(self):
super().search_by_vector()
# only test for qdrant, may not work on other vector stores
hits_by_vector: list[Document] = self.vector.search_by_vector(
query_vector=self.example_embedding, score_threshold=1
)
assert len(hits_by_vector) == 0
def _create_document(self, content: str, doc_id: str) -> Document:
"""Create a document with the given content and doc_id."""
return Document(
page_content=content,
metadata={
"doc_id": doc_id,
"doc_hash": doc_id,
"document_id": doc_id,
"dataset_id": self.dataset_id,
},
)
def setup_multi_keyword_documents(self):
"""Create test documents with different keyword combinations for multi-keyword search tests."""
self.doc_apple_id = str(uuid.uuid4())
self.doc_banana_id = str(uuid.uuid4())
self.doc_both_id = str(uuid.uuid4())
documents = [
self._create_document("This document contains apple only", self.doc_apple_id),
self._create_document("This document contains banana only", self.doc_banana_id),
self._create_document("This document contains both apple and banana", self.doc_both_id),
]
embeddings = [self.example_embedding] * len(documents)
self.vector.add_texts(documents=documents, embeddings=embeddings)
def search_by_full_text_multi_keyword(self):
"""Test multi-keyword search returns docs matching ANY keyword (OR logic)."""
# First verify single keyword searches work correctly
hits_apple = self.vector.search_by_full_text(query="apple", top_k=10)
apple_ids = {doc.metadata["doc_id"] for doc in hits_apple}
assert self.doc_apple_id in apple_ids, "Document with 'apple' should be found"
assert self.doc_both_id in apple_ids, "Document with 'apple and banana' should be found"
hits_banana = self.vector.search_by_full_text(query="banana", top_k=10)
banana_ids = {doc.metadata["doc_id"] for doc in hits_banana}
assert self.doc_banana_id in banana_ids, "Document with 'banana' should be found"
assert self.doc_both_id in banana_ids, "Document with 'apple and banana' should be found"
# Test multi-keyword search returns all matching documents
hits = self.vector.search_by_full_text(query="apple banana", top_k=10)
doc_ids = {doc.metadata["doc_id"] for doc in hits}
assert self.doc_apple_id in doc_ids, "Document with 'apple' should be found in multi-keyword search"
assert self.doc_banana_id in doc_ids, "Document with 'banana' should be found in multi-keyword search"
assert self.doc_both_id in doc_ids, "Document with both keywords should be found"
# Expect 3 results: doc_apple (apple only), doc_banana (banana only), doc_both (contains both)
assert len(hits) == 3, f"Expected 3 documents, got {len(hits)}"
# Test keyword order independence
hits_ba = self.vector.search_by_full_text(query="banana apple", top_k=10)
ids_ba = {doc.metadata["doc_id"] for doc in hits_ba}
assert doc_ids == ids_ba, "Keyword order should not affect search results"
# Test no duplicates in results
doc_id_list = [doc.metadata["doc_id"] for doc in hits]
assert len(doc_id_list) == len(set(doc_id_list)), "Search results should not contain duplicates"
def run_all_tests(self):
self.create_vector()
self.search_by_vector()
self.search_by_full_text()
self.text_exists()
self.get_ids_by_metadata_field()
# Multi-keyword search tests
self.setup_multi_keyword_documents()
self.search_by_full_text_multi_keyword()
# Cleanup - delete_vector() removes the entire collection
self.delete_vector()
def test_qdrant_vector(setup_mock_redis):
QdrantVectorTest().run_all_tests()