mirror of
https://github.com/langgenius/dify.git
synced 2026-05-12 07:37:09 +08:00
Signed-off-by: majiayu000 <1835304752@qq.com> Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: NeatGuyCoding <15627489+NeatGuyCoding@users.noreply.github.com> Signed-off-by: -LAN- <laipz8200@outlook.com> Signed-off-by: yihong0618 <zouzou0208@gmail.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: 盐粒 Yanli <yanli@dify.ai> Co-authored-by: wangxiaolei <fatelei@gmail.com> Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cursx <33718736+Cursx@users.noreply.github.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: lif <1835304752@qq.com> Co-authored-by: 非法操作 <hjlarry@163.com> Co-authored-by: Asuka Minato <i@asukaminato.eu.org> Co-authored-by: fenglin <790872612@qq.com> Co-authored-by: qiaofenglin <qiaofenglin@baidu.com> Co-authored-by: -LAN- <laipz8200@outlook.com> Co-authored-by: TomoOkuyama <49631611+TomoOkuyama@users.noreply.github.com> Co-authored-by: Tomo Okuyama <tomo.okuyama@intersystems.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: zyssyz123 <916125788@qq.com> Co-authored-by: hj24 <mambahj24@gmail.com> Co-authored-by: Coding On Star <447357187@qq.com> Co-authored-by: CodingOnStar <hanxujiang@dify.ai> Co-authored-by: yyh <92089059+lyzno1@users.noreply.github.com> Co-authored-by: Xiangxuan Qu <fghpdf@outlook.com> Co-authored-by: fghpdf <fghpdf@users.noreply.github.com> Co-authored-by: coopercoder <whitetiger0127@163.com> Co-authored-by: zhaiguangpeng <zhaiguangpeng@didiglobal.com> Co-authored-by: Junyan Qin (Chin) <rockchinq@gmail.com> Co-authored-by: E.G <146701565+GlobalStar117@users.noreply.github.com> Co-authored-by: GlobalStar117 <GlobalStar117@users.noreply.github.com> Co-authored-by: Claude Haiku 4.5 <noreply@anthropic.com> Co-authored-by: CodingOnStar <hanxujiang@dify.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: heyszt <270985384@qq.com> Co-authored-by: NeatGuyCoding <15627489+NeatGuyCoding@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: moonpanda <chuanzegao@163.com> Co-authored-by: warlocgao <warlocgao@tencent.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com> Co-authored-by: KVOJJJin <jzongcode@gmail.com> Co-authored-by: eux <euxx@users.noreply.github.com> Co-authored-by: bangjiehan <bangjiehan@gmail.com> Co-authored-by: FFXN <31929997+FFXN@users.noreply.github.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: Nie Ronghua <nieronghua@sf-express.com> Co-authored-by: JQSevenMiao <141806521+JQSevenMiao@users.noreply.github.com> Co-authored-by: jiasiqi <jiasiqi3@tal.com> Co-authored-by: Seokrin Taron Sung <sungsjade@gmail.com> Co-authored-by: CrabSAMA <40541269+CrabSAMA@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: yihong <zouzou0208@gmail.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: yessenia <yessenia.contact@gmail.com> Co-authored-by: Jax <anobaka@qq.com> Co-authored-by: niveshdandyan <155956228+niveshdandyan@users.noreply.github.com> Co-authored-by: OSS Contributor <oss-contributor@example.com> Co-authored-by: niveshdandyan <niveshdandyan@users.noreply.github.com> Co-authored-by: Sean Kenneth Doherty <Smaster7772@gmail.com>
110 lines
4.7 KiB
Python
110 lines
4.7 KiB
Python
import uuid
|
|
|
|
from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantConfig, QdrantVector
|
|
from core.rag.models.document import Document
|
|
from tests.integration_tests.vdb.test_vector_store import (
|
|
AbstractVectorTest,
|
|
setup_mock_redis,
|
|
)
|
|
|
|
|
|
class QdrantVectorTest(AbstractVectorTest):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.attributes = ["doc_id", "dataset_id", "document_id", "doc_hash"]
|
|
self.vector = QdrantVector(
|
|
collection_name=self.collection_name,
|
|
group_id=self.dataset_id,
|
|
config=QdrantConfig(
|
|
endpoint="http://localhost:6333",
|
|
api_key="difyai123456",
|
|
),
|
|
)
|
|
# Additional doc IDs for multi-keyword search tests
|
|
self.doc_apple_id = ""
|
|
self.doc_banana_id = ""
|
|
self.doc_both_id = ""
|
|
|
|
def search_by_vector(self):
|
|
super().search_by_vector()
|
|
# only test for qdrant, may not work on other vector stores
|
|
hits_by_vector: list[Document] = self.vector.search_by_vector(
|
|
query_vector=self.example_embedding, score_threshold=1
|
|
)
|
|
assert len(hits_by_vector) == 0
|
|
|
|
def _create_document(self, content: str, doc_id: str) -> Document:
|
|
"""Create a document with the given content and doc_id."""
|
|
return Document(
|
|
page_content=content,
|
|
metadata={
|
|
"doc_id": doc_id,
|
|
"doc_hash": doc_id,
|
|
"document_id": doc_id,
|
|
"dataset_id": self.dataset_id,
|
|
},
|
|
)
|
|
|
|
def setup_multi_keyword_documents(self):
|
|
"""Create test documents with different keyword combinations for multi-keyword search tests."""
|
|
self.doc_apple_id = str(uuid.uuid4())
|
|
self.doc_banana_id = str(uuid.uuid4())
|
|
self.doc_both_id = str(uuid.uuid4())
|
|
|
|
documents = [
|
|
self._create_document("This document contains apple only", self.doc_apple_id),
|
|
self._create_document("This document contains banana only", self.doc_banana_id),
|
|
self._create_document("This document contains both apple and banana", self.doc_both_id),
|
|
]
|
|
embeddings = [self.example_embedding] * len(documents)
|
|
|
|
self.vector.add_texts(documents=documents, embeddings=embeddings)
|
|
|
|
def search_by_full_text_multi_keyword(self):
|
|
"""Test multi-keyword search returns docs matching ANY keyword (OR logic)."""
|
|
# First verify single keyword searches work correctly
|
|
hits_apple = self.vector.search_by_full_text(query="apple", top_k=10)
|
|
apple_ids = {doc.metadata["doc_id"] for doc in hits_apple}
|
|
assert self.doc_apple_id in apple_ids, "Document with 'apple' should be found"
|
|
assert self.doc_both_id in apple_ids, "Document with 'apple and banana' should be found"
|
|
|
|
hits_banana = self.vector.search_by_full_text(query="banana", top_k=10)
|
|
banana_ids = {doc.metadata["doc_id"] for doc in hits_banana}
|
|
assert self.doc_banana_id in banana_ids, "Document with 'banana' should be found"
|
|
assert self.doc_both_id in banana_ids, "Document with 'apple and banana' should be found"
|
|
|
|
# Test multi-keyword search returns all matching documents
|
|
hits = self.vector.search_by_full_text(query="apple banana", top_k=10)
|
|
doc_ids = {doc.metadata["doc_id"] for doc in hits}
|
|
|
|
assert self.doc_apple_id in doc_ids, "Document with 'apple' should be found in multi-keyword search"
|
|
assert self.doc_banana_id in doc_ids, "Document with 'banana' should be found in multi-keyword search"
|
|
assert self.doc_both_id in doc_ids, "Document with both keywords should be found"
|
|
# Expect 3 results: doc_apple (apple only), doc_banana (banana only), doc_both (contains both)
|
|
assert len(hits) == 3, f"Expected 3 documents, got {len(hits)}"
|
|
|
|
# Test keyword order independence
|
|
hits_ba = self.vector.search_by_full_text(query="banana apple", top_k=10)
|
|
ids_ba = {doc.metadata["doc_id"] for doc in hits_ba}
|
|
assert doc_ids == ids_ba, "Keyword order should not affect search results"
|
|
|
|
# Test no duplicates in results
|
|
doc_id_list = [doc.metadata["doc_id"] for doc in hits]
|
|
assert len(doc_id_list) == len(set(doc_id_list)), "Search results should not contain duplicates"
|
|
|
|
def run_all_tests(self):
|
|
self.create_vector()
|
|
self.search_by_vector()
|
|
self.search_by_full_text()
|
|
self.text_exists()
|
|
self.get_ids_by_metadata_field()
|
|
# Multi-keyword search tests
|
|
self.setup_multi_keyword_documents()
|
|
self.search_by_full_text_multi_keyword()
|
|
# Cleanup - delete_vector() removes the entire collection
|
|
self.delete_vector()
|
|
|
|
|
|
def test_qdrant_vector(setup_mock_redis):
|
|
QdrantVectorTest().run_all_tests()
|