diff --git a/.github/workflows/main-ci.yml b/.github/workflows/main-ci.yml index 59c38b6e7e..ba36b5c07a 100644 --- a/.github/workflows/main-ci.yml +++ b/.github/workflows/main-ci.yml @@ -92,6 +92,7 @@ jobs: vdb: - 'api/core/rag/datasource/**' - 'api/tests/integration_tests/vdb/**' + - 'api/providers/vdb/*/tests/**' - '.github/workflows/vdb-tests.yml' - '.github/workflows/expose_service_ports.sh' - 'docker/.env.example' diff --git a/.github/workflows/vdb-tests-full.yml b/.github/workflows/vdb-tests-full.yml index 72b3ea9aac..f0def8fe7a 100644 --- a/.github/workflows/vdb-tests-full.yml +++ b/.github/workflows/vdb-tests-full.yml @@ -89,7 +89,7 @@ jobs: cp api/tests/integration_tests/.env.example api/tests/integration_tests/.env # - name: Check VDB Ready (TiDB) -# run: uv run --project api python api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py +# run: uv run --project api python api/providers/vdb/tidb-vector/tests/integration_tests/check_tiflash_ready.py - name: Test Vector Stores run: uv run --project api bash dev/pytest/pytest_vdb.sh diff --git a/.github/workflows/vdb-tests.yml b/.github/workflows/vdb-tests.yml index 47ec70f603..f3966f15b9 100644 --- a/.github/workflows/vdb-tests.yml +++ b/.github/workflows/vdb-tests.yml @@ -81,12 +81,12 @@ jobs: cp api/tests/integration_tests/.env.example api/tests/integration_tests/.env # - name: Check VDB Ready (TiDB) -# run: uv run --project api python api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py +# run: uv run --project api python api/providers/vdb/tidb-vector/tests/integration_tests/check_tiflash_ready.py - name: Test Vector Stores run: | uv run --project api pytest --timeout "${PYTEST_TIMEOUT:-180}" \ - api/tests/integration_tests/vdb/chroma \ - api/tests/integration_tests/vdb/pgvector \ - api/tests/integration_tests/vdb/qdrant \ - api/tests/integration_tests/vdb/weaviate + api/providers/vdb/vdb-chroma/tests/integration_tests \ + api/providers/vdb/vdb-pgvector/tests/integration_tests \ + api/providers/vdb/vdb-qdrant/tests/integration_tests \ + api/providers/vdb/vdb-weaviate/tests/integration_tests diff --git a/api/Dockerfile b/api/Dockerfile index 7e0a439954..6098652573 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -21,8 +21,9 @@ RUN apt-get update \ # for building gmpy2 libmpfr-dev libmpc-dev -# Install Python dependencies +# Install Python dependencies (workspace members under providers/vdb/) COPY pyproject.toml uv.lock ./ +COPY providers ./providers RUN uv sync --locked --no-dev # production stage diff --git a/api/commands/vector.py b/api/commands/vector.py index cb7eb7c452..956f20d6bb 100644 --- a/api/commands/vector.py +++ b/api/commands/vector.py @@ -341,11 +341,10 @@ def add_qdrant_index(field: str): click.echo(click.style("No dataset collection bindings found.", fg="red")) return import qdrant_client + from dify_vdb_qdrant.qdrant_vector import PathQdrantParams, QdrantConfig from qdrant_client.http.exceptions import UnexpectedResponse from qdrant_client.http.models import PayloadSchemaType - from core.rag.datasource.vdb.qdrant.qdrant_vector import PathQdrantParams, QdrantConfig - for binding in bindings: if dify_config.QDRANT_URL is None: raise ValueError("Qdrant URL is required.") diff --git a/api/configs/middleware/vdb/hologres_config.py b/api/configs/middleware/vdb/hologres_config.py index 9812cce268..788b3cfb78 100644 --- a/api/configs/middleware/vdb/hologres_config.py +++ b/api/configs/middleware/vdb/hologres_config.py @@ -1,4 +1,3 @@ -from holo_search_sdk.types import BaseQuantizationType, DistanceType, TokenizerType from pydantic import Field from pydantic_settings import BaseSettings @@ -42,17 +41,17 @@ class HologresConfig(BaseSettings): default="public", ) - HOLOGRES_TOKENIZER: TokenizerType = Field( + HOLOGRES_TOKENIZER: str = Field( description="Tokenizer for full-text search index (e.g., 'jieba', 'ik', 'standard', 'simple').", default="jieba", ) - HOLOGRES_DISTANCE_METHOD: DistanceType = Field( + HOLOGRES_DISTANCE_METHOD: str = Field( description="Distance method for vector index (e.g., 'Cosine', 'Euclidean', 'InnerProduct').", default="Cosine", ) - HOLOGRES_BASE_QUANTIZATION_TYPE: BaseQuantizationType = Field( + HOLOGRES_BASE_QUANTIZATION_TYPE: str = Field( description="Base quantization type for vector index (e.g., 'rabitq', 'sq8', 'fp16', 'fp32').", default="rabitq", ) diff --git a/api/core/rag/datasource/vdb/vector_backend_registry.py b/api/core/rag/datasource/vdb/vector_backend_registry.py new file mode 100644 index 0000000000..15f4357caf --- /dev/null +++ b/api/core/rag/datasource/vdb/vector_backend_registry.py @@ -0,0 +1,87 @@ +"""Vector store backend discovery. + +Backends live in workspace packages under ``api/packages/dify-vdb-*/src/dify_vdb_*``. Each package +declares third-party dependencies and registers ``importlib`` entry points in group +``dify.vector_backends`` (see each package's ``pyproject.toml``). + +Shared types and the :class:`~core.rag.datasource.vdb.vector_factory.AbstractVectorFactory` protocol +remain in this package (``vector_base``, ``vector_factory``, ``vector_type``, ``field``). + +Optional **built-in** targets in ``_BUILTIN_VECTOR_FACTORY_TARGETS`` (normally empty) load without a +distribution; entry points take precedence when both exist. + +After changing packages, run ``uv sync`` so installed dist-info entry points match ``pyproject.toml``. +""" + +from __future__ import annotations + +import importlib +import logging +from importlib.metadata import entry_points +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory + +logger = logging.getLogger(__name__) + +_VECTOR_FACTORY_CACHE: dict[str, type[AbstractVectorFactory]] = {} + +# module_path:class_name — optional fallback when no distribution registers the backend. +_BUILTIN_VECTOR_FACTORY_TARGETS: dict[str, str] = {} + + +def clear_vector_factory_cache() -> None: + """Drop lazily loaded factories (for tests or plugin reload).""" + _VECTOR_FACTORY_CACHE.clear() + + +def _vector_backend_entry_points(): + return entry_points().select(group="dify.vector_backends") + + +def _load_plugin_factory(vector_type: str) -> type[AbstractVectorFactory] | None: + for ep in _vector_backend_entry_points(): + if ep.name != vector_type: + continue + try: + loaded = ep.load() + except Exception: + logger.exception("Failed to load vector backend entry point %s", ep.name) + raise + return loaded # type: ignore[return-value] + return None + + +def _unsupported(vector_type: str) -> ValueError: + installed = sorted(ep.name for ep in _vector_backend_entry_points()) + available_msg = f" Installed backends: {', '.join(installed)}." if installed else " No backends installed." + return ValueError( + f"Vector store {vector_type!r} is not supported.{available_msg} " + "Install a plugin (uv sync --group vdb-all, or vdb- per api/pyproject.toml), " + "or register a dify.vector_backends entry point." + ) + + +def _load_builtin_factory(vector_type: str) -> type[AbstractVectorFactory]: + target = _BUILTIN_VECTOR_FACTORY_TARGETS.get(vector_type) + if not target: + raise _unsupported(vector_type) + module_path, _, attr = target.partition(":") + module = importlib.import_module(module_path) + return getattr(module, attr) # type: ignore[no-any-return] + + +def get_vector_factory_class(vector_type: str) -> type[AbstractVectorFactory]: + """Resolve :class:`AbstractVectorFactory` for a :class:`~VectorType` string value.""" + if vector_type in _VECTOR_FACTORY_CACHE: + return _VECTOR_FACTORY_CACHE[vector_type] + + plugin_cls = _load_plugin_factory(vector_type) + if plugin_cls is not None: + _VECTOR_FACTORY_CACHE[vector_type] = plugin_cls + return plugin_cls + + cls = _load_builtin_factory(vector_type) + _VECTOR_FACTORY_CACHE[vector_type] = cls + return cls diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index 5d879ac3ca..dddd5fc994 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -9,6 +9,7 @@ from sqlalchemy import select from configs import dify_config from core.model_manager import ModelManager +from core.rag.datasource.vdb.vector_backend_registry import get_vector_factory_class from core.rag.datasource.vdb.vector_base import BaseVector, VectorIndexStructDict from core.rag.datasource.vdb.vector_type import VectorType from core.rag.embedding.cached_embedding import CacheEmbedding @@ -85,137 +86,7 @@ class Vector: @staticmethod def get_vector_factory(vector_type: str) -> type[AbstractVectorFactory]: - match vector_type: - case VectorType.CHROMA: - from core.rag.datasource.vdb.chroma.chroma_vector import ChromaVectorFactory - - return ChromaVectorFactory - case VectorType.MILVUS: - from core.rag.datasource.vdb.milvus.milvus_vector import MilvusVectorFactory - - return MilvusVectorFactory - case VectorType.ALIBABACLOUD_MYSQL: - from core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector import ( - AlibabaCloudMySQLVectorFactory, - ) - - return AlibabaCloudMySQLVectorFactory - case VectorType.MYSCALE: - from core.rag.datasource.vdb.myscale.myscale_vector import MyScaleVectorFactory - - return MyScaleVectorFactory - case VectorType.PGVECTOR: - from core.rag.datasource.vdb.pgvector.pgvector import PGVectorFactory - - return PGVectorFactory - case VectorType.VASTBASE: - from core.rag.datasource.vdb.pyvastbase.vastbase_vector import VastbaseVectorFactory - - return VastbaseVectorFactory - case VectorType.PGVECTO_RS: - from core.rag.datasource.vdb.pgvecto_rs.pgvecto_rs import PGVectoRSFactory - - return PGVectoRSFactory - case VectorType.QDRANT: - from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantVectorFactory - - return QdrantVectorFactory - case VectorType.RELYT: - from core.rag.datasource.vdb.relyt.relyt_vector import RelytVectorFactory - - return RelytVectorFactory - case VectorType.ELASTICSEARCH: - from core.rag.datasource.vdb.elasticsearch.elasticsearch_vector import ElasticSearchVectorFactory - - return ElasticSearchVectorFactory - case VectorType.ELASTICSEARCH_JA: - from core.rag.datasource.vdb.elasticsearch.elasticsearch_ja_vector import ( - ElasticSearchJaVectorFactory, - ) - - return ElasticSearchJaVectorFactory - case VectorType.TIDB_VECTOR: - from core.rag.datasource.vdb.tidb_vector.tidb_vector import TiDBVectorFactory - - return TiDBVectorFactory - case VectorType.WEAVIATE: - from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateVectorFactory - - return WeaviateVectorFactory - case VectorType.TENCENT: - from core.rag.datasource.vdb.tencent.tencent_vector import TencentVectorFactory - - return TencentVectorFactory - case VectorType.ORACLE: - from core.rag.datasource.vdb.oracle.oraclevector import OracleVectorFactory - - return OracleVectorFactory - case VectorType.OPENSEARCH: - from core.rag.datasource.vdb.opensearch.opensearch_vector import OpenSearchVectorFactory - - return OpenSearchVectorFactory - case VectorType.ANALYTICDB: - from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVectorFactory - - return AnalyticdbVectorFactory - case VectorType.COUCHBASE: - from core.rag.datasource.vdb.couchbase.couchbase_vector import CouchbaseVectorFactory - - return CouchbaseVectorFactory - case VectorType.BAIDU: - from core.rag.datasource.vdb.baidu.baidu_vector import BaiduVectorFactory - - return BaiduVectorFactory - case VectorType.VIKINGDB: - from core.rag.datasource.vdb.vikingdb.vikingdb_vector import VikingDBVectorFactory - - return VikingDBVectorFactory - case VectorType.UPSTASH: - from core.rag.datasource.vdb.upstash.upstash_vector import UpstashVectorFactory - - return UpstashVectorFactory - case VectorType.TIDB_ON_QDRANT: - from core.rag.datasource.vdb.tidb_on_qdrant.tidb_on_qdrant_vector import TidbOnQdrantVectorFactory - - return TidbOnQdrantVectorFactory - case VectorType.LINDORM: - from core.rag.datasource.vdb.lindorm.lindorm_vector import LindormVectorStoreFactory - - return LindormVectorStoreFactory - case VectorType.OCEANBASE | VectorType.SEEKDB: - from core.rag.datasource.vdb.oceanbase.oceanbase_vector import OceanBaseVectorFactory - - return OceanBaseVectorFactory - case VectorType.OPENGAUSS: - from core.rag.datasource.vdb.opengauss.opengauss import OpenGaussFactory - - return OpenGaussFactory - case VectorType.TABLESTORE: - from core.rag.datasource.vdb.tablestore.tablestore_vector import TableStoreVectorFactory - - return TableStoreVectorFactory - case VectorType.HUAWEI_CLOUD: - from core.rag.datasource.vdb.huawei.huawei_cloud_vector import HuaweiCloudVectorFactory - - return HuaweiCloudVectorFactory - case VectorType.MATRIXONE: - from core.rag.datasource.vdb.matrixone.matrixone_vector import MatrixoneVectorFactory - - return MatrixoneVectorFactory - case VectorType.CLICKZETTA: - from core.rag.datasource.vdb.clickzetta.clickzetta_vector import ClickzettaVectorFactory - - return ClickzettaVectorFactory - case VectorType.IRIS: - from core.rag.datasource.vdb.iris.iris_vector import IrisVectorFactory - - return IrisVectorFactory - case VectorType.HOLOGRES: - from core.rag.datasource.vdb.hologres.hologres_vector import HologresVectorFactory - - return HologresVectorFactory - case _: - raise ValueError(f"Vector store {vector_type} is not supported.") + return get_vector_factory_class(vector_type) def create(self, texts: list | None = None, **kwargs): if texts: diff --git a/api/tests/integration_tests/vdb/test_vector_store.py b/api/core/rag/datasource/vdb/vector_integration_test_support.py similarity index 83% rename from api/tests/integration_tests/vdb/test_vector_store.py rename to api/core/rag/datasource/vdb/vector_integration_test_support.py index a033443cf8..3148b7d5c1 100644 --- a/api/tests/integration_tests/vdb/test_vector_store.py +++ b/api/core/rag/datasource/vdb/vector_integration_test_support.py @@ -1,10 +1,19 @@ +"""Shared helpers for vector DB integration tests (used by workspace packages under ``api/packages``). + +:class:`AbstractVectorTest` and helper functions live here so package tests can import +``core.rag.datasource.vdb.vector_integration_test_support`` without relying on the +``tests.*`` package. + +The ``setup_mock_redis`` fixture lives in ``api/packages/conftest.py`` and is +auto-discovered by pytest for all package tests. +""" + import uuid -from unittest.mock import MagicMock import pytest +from core.rag.datasource.vdb.vector_base import BaseVector from core.rag.models.document import Document -from extensions import ext_redis from models.dataset import Dataset @@ -25,24 +34,10 @@ def get_example_document(doc_id: str) -> Document: return doc -@pytest.fixture -def setup_mock_redis(): - # get - ext_redis.redis_client.get = MagicMock(return_value=None) - - # set - ext_redis.redis_client.set = MagicMock(return_value=None) - - # lock - mock_redis_lock = MagicMock() - mock_redis_lock.__enter__ = MagicMock() - mock_redis_lock.__exit__ = MagicMock() - ext_redis.redis_client.lock = mock_redis_lock - - class AbstractVectorTest: + vector: BaseVector + def __init__(self): - self.vector = None self.dataset_id = str(uuid.uuid4()) self.collection_name = Dataset.gen_collection_name_by_id(self.dataset_id) + "_test" self.example_doc_id = str(uuid.uuid4()) diff --git a/api/providers/README.md b/api/providers/README.md new file mode 100644 index 0000000000..a00ec8bc52 --- /dev/null +++ b/api/providers/README.md @@ -0,0 +1,12 @@ +# Providers + +This directory holds **optional workspace packages** that plug into Dify’s API core. Providers are responsible for implementing the interfaces and registering themselves to the API core. Provider mechanism allows building the software with selected set of providers so as to enhance the security and flexibility of distributions. + +## Developing Providers + +- [VDB Providers](vdb/README.md) + +## Tests + +Provider tests often live next to the package, e.g. `providers///tests/unit_tests/`. Shared fixtures may live under `providers/` (e.g. `conftest.py`). + diff --git a/api/providers/vdb/README.md b/api/providers/vdb/README.md new file mode 100644 index 0000000000..b5b4197f63 --- /dev/null +++ b/api/providers/vdb/README.md @@ -0,0 +1,58 @@ +# VDB providers + +This directory contains all VDB providers. + +## Architecture +1. **Core** (`api/core/rag/datasource/vdb/`) defines the contracts and loads plugins. +2. **Each provider** (`api/providers/vdb//`) implements those contracts and registers an entry point. +3. At runtime, **`importlib.metadata.entry_points`** resolves the backend name (e.g. `pgvector`) to a factory class. The registry caches loaded classes (see `vector_backend_registry.py`). + +### Interfaces + +| Piece | Role | +|--------|----------| +| `AbstractVectorFactory` | You subclass this. Implement `init_vector(dataset, attributes, embeddings) -> BaseVector`. Optionally use `gen_index_struct_dict()` for new datasets. | +| `BaseVector` | Your store class subclasses this: `create`, `add_texts`, `search_by_vector`, `delete`, etc. | +| `VectorType` | `StrEnum` of supported backend **string ids**. Add a member when you introduce a new backend that should be selectable like existing ones. | +| Discovery | Loads `dify.vector_backends` entry points and caches `get_vector_factory_class(vector_type)`. | + +The high-level caller is `Vector` in `vector_factory.py`: it reads the configured or dataset-specific vector type, calls `get_vector_factory_class`, instantiates the factory, and uses the returned `BaseVector` implementation. + +### Entry point name must match the vector type string + +Entry points are registered under the group **`dify.vector_backends`**. The **entry point name** (left-hand side) must be exactly the string used as `vector_type` everywhere else—typically the **`VectorType` enum value** (e.g. `PGVECTOR = "pgvector"` → entry point name `pgvector`; `TIDB_ON_QDRANT = "tidb_on_qdrant"` → `tidb_on_qdrant`). + +In `pyproject.toml`: + +```toml +[project.entry-points."dify.vector_backends"] +pgvector = "dify_vdb_pgvector.pgvector:PGVectorFactory" +``` + +The value is **`module:attribute`**: a importable module path and the class implementing `AbstractVectorFactory`. + +### How registration works + +1. On first use, `get_vector_factory_class(vector_type)` looks up `vector_type` in a process cache. +2. If missing, it scans **`entry_points().select(group="dify.vector_backends")`** for an entry whose **`name` equals `vector_type`**. +3. It loads that entry (`ep.load()`), which must return the **factory class** (not an instance). +4. There is an optional internal map `_BUILTIN_VECTOR_FACTORY_TARGETS` for non-distribution builtins; **normal VDB plugins use entry points only**. + +After you change a provider’s `pyproject.toml` (entry points or dependencies), run **`uv sync`** in `api/` so the installed environment’s dist-info matches the project metadata. + +### Package layout (VDB) + +Each backend usually follows: + +- `api/providers/vdb//pyproject.toml` — project name `dify-vdb-`, dependencies, entry points. +- `api/providers/vdb//src/dify_vdb_/` — implementation (e.g. `PGVector`, `PGVectorFactory`). + +See `vdb/pgvector/` as a reference implementation. + +### Wiring a new backend into the API workspace + +The API uses a **uv workspace** (`api/pyproject.toml`): + +1. **`[tool.uv.workspace]`** — `members = ["providers/vdb/*"]` already includes every subdirectory under `vdb/`; new folders there are workspace members. +2. **`[tool.uv.sources]`** — add a line for your package: `dify-vdb-mine = { workspace = true }`. +3. **`[project.optional-dependencies]`** — add a group such as `vdb-mine = ["dify-vdb-mine"]`, and list `dify-vdb-mine` under `vdb-all` if it should install with the default bundle. \ No newline at end of file diff --git a/api/providers/vdb/conftest.py b/api/providers/vdb/conftest.py new file mode 100644 index 0000000000..c4b1cdef29 --- /dev/null +++ b/api/providers/vdb/conftest.py @@ -0,0 +1,22 @@ +from unittest.mock import MagicMock + +import pytest + +from extensions import ext_redis + + +@pytest.fixture(autouse=True) +def _init_mock_redis(): + """Ensure redis_client has a backing client so __getattr__ never raises.""" + if ext_redis.redis_client._client is None: + ext_redis.redis_client.initialize(MagicMock()) + + +@pytest.fixture +def setup_mock_redis(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(ext_redis.redis_client, "get", MagicMock(return_value=None)) + monkeypatch.setattr(ext_redis.redis_client, "set", MagicMock(return_value=None)) + mock_redis_lock = MagicMock() + mock_redis_lock.__enter__ = MagicMock() + mock_redis_lock.__exit__ = MagicMock() + monkeypatch.setattr(ext_redis.redis_client, "lock", mock_redis_lock) diff --git a/api/providers/vdb/vdb-alibabacloud-mysql/pyproject.toml b/api/providers/vdb/vdb-alibabacloud-mysql/pyproject.toml new file mode 100644 index 0000000000..bbc0e06ffa --- /dev/null +++ b/api/providers/vdb/vdb-alibabacloud-mysql/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "dify-vdb-alibabacloud-mysql" +version = "0.0.1" +dependencies = [ + "mysql-connector-python>=9.3.0", +] +description = "Dify vector store backend (dify-vdb-alibabacloud-mysql)." + +[project.entry-points."dify.vector_backends"] +alibabacloud_mysql = "dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector:AlibabaCloudMySQLVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/alibabacloud_mysql/__init__.py b/api/providers/vdb/vdb-alibabacloud-mysql/src/dify_vdb_alibabacloud_mysql/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/alibabacloud_mysql/__init__.py rename to api/providers/vdb/vdb-alibabacloud-mysql/src/dify_vdb_alibabacloud_mysql/__init__.py diff --git a/api/core/rag/datasource/vdb/alibabacloud_mysql/alibabacloud_mysql_vector.py b/api/providers/vdb/vdb-alibabacloud-mysql/src/dify_vdb_alibabacloud_mysql/alibabacloud_mysql_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/alibabacloud_mysql/alibabacloud_mysql_vector.py rename to api/providers/vdb/vdb-alibabacloud-mysql/src/dify_vdb_alibabacloud_mysql/alibabacloud_mysql_vector.py diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/alibabacloud_mysql/test_alibabacloud_mysql_factory.py b/api/providers/vdb/vdb-alibabacloud-mysql/tests/unit_tests/test_alibabacloud_mysql_factory.py similarity index 94% rename from api/tests/unit_tests/core/rag/datasource/vdb/alibabacloud_mysql/test_alibabacloud_mysql_factory.py rename to api/providers/vdb/vdb-alibabacloud-mysql/tests/unit_tests/test_alibabacloud_mysql_factory.py index e063a49f22..a907f918c3 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/alibabacloud_mysql/test_alibabacloud_mysql_factory.py +++ b/api/providers/vdb/vdb-alibabacloud-mysql/tests/unit_tests/test_alibabacloud_mysql_factory.py @@ -1,10 +1,9 @@ from types import SimpleNamespace from unittest.mock import MagicMock, patch +import dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector as alibaba_module import pytest - -import core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector as alibaba_module -from core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector import AlibabaCloudMySQLVectorFactory +from dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector import AlibabaCloudMySQLVectorFactory def test_validate_distance_function_accepts_supported_values(): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/alibabacloud_mysql/test_alibabacloud_mysql_vector.py b/api/providers/vdb/vdb-alibabacloud-mysql/tests/unit_tests/test_alibabacloud_mysql_vector.py similarity index 87% rename from api/tests/unit_tests/core/rag/datasource/vdb/alibabacloud_mysql/test_alibabacloud_mysql_vector.py rename to api/providers/vdb/vdb-alibabacloud-mysql/tests/unit_tests/test_alibabacloud_mysql_vector.py index 8ccd739e64..54eeb78ca9 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/alibabacloud_mysql/test_alibabacloud_mysql_vector.py +++ b/api/providers/vdb/vdb-alibabacloud-mysql/tests/unit_tests/test_alibabacloud_mysql_vector.py @@ -3,11 +3,11 @@ import unittest from unittest.mock import MagicMock, patch import pytest - -from core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector import ( +from dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector import ( AlibabaCloudMySQLVector, AlibabaCloudMySQLVectorConfig, ) + from core.rag.models.document import Document try: @@ -49,9 +49,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): # Sample embeddings self.sample_embeddings = [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]] - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_init(self, mock_pool_class): """Test AlibabaCloudMySQLVector initialization.""" # Mock the connection pool @@ -76,10 +74,8 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert alibabacloud_mysql_vector.distance_function == "cosine" assert alibabacloud_mysql_vector.pool is not None - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) - @patch("core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.redis_client") + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.redis_client") def test_create_collection(self, mock_redis, mock_pool_class): """Test collection creation.""" # Mock Redis operations @@ -110,9 +106,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert mock_cursor.execute.call_count >= 3 # CREATE TABLE + 2 indexes mock_redis.set.assert_called_once() - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_vector_support_check_success(self, mock_pool_class): """Test successful vector support check.""" # Mock the connection pool @@ -129,9 +123,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): vector_store = AlibabaCloudMySQLVector(self.collection_name, self.config) assert vector_store is not None - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_vector_support_check_failure(self, mock_pool_class): """Test vector support check failure.""" # Mock the connection pool @@ -149,9 +141,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert "RDS MySQL Vector functions are not available" in str(context.value) - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_vector_support_check_function_error(self, mock_pool_class): """Test vector support check with function not found error.""" # Mock the connection pool @@ -170,10 +160,8 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert "RDS MySQL Vector functions are not available" in str(context.value) - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) - @patch("core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.redis_client") + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.redis_client") def test_create_documents(self, mock_redis, mock_pool_class): """Test creating documents with embeddings.""" # Setup mocks @@ -186,9 +174,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert "doc1" in result assert "doc2" in result - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_add_texts(self, mock_pool_class): """Test adding texts to the vector store.""" # Mock the connection pool @@ -207,9 +193,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert len(result) == 2 mock_cursor.executemany.assert_called_once() - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_text_exists(self, mock_pool_class): """Test checking if text exists.""" # Mock the connection pool @@ -236,9 +220,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert "SELECT id FROM" in last_call[0][0] assert last_call[0][1] == ("doc1",) - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_text_not_exists(self, mock_pool_class): """Test checking if text does not exist.""" # Mock the connection pool @@ -260,9 +242,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert not exists - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_get_by_ids(self, mock_pool_class): """Test getting documents by IDs.""" # Mock the connection pool @@ -288,9 +268,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert docs[0].page_content == "Test document 1" assert docs[1].page_content == "Test document 2" - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_get_by_ids_empty_list(self, mock_pool_class): """Test getting documents with empty ID list.""" # Mock the connection pool @@ -308,9 +286,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert len(docs) == 0 - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_delete_by_ids(self, mock_pool_class): """Test deleting documents by IDs.""" # Mock the connection pool @@ -334,9 +310,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert "DELETE FROM" in delete_call[0][0] assert delete_call[0][1] == ["doc1", "doc2"] - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_delete_by_ids_empty_list(self, mock_pool_class): """Test deleting with empty ID list.""" # Mock the connection pool @@ -357,9 +331,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): delete_calls = [call for call in execute_calls if "DELETE" in str(call)] assert len(delete_calls) == 0 - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_delete_by_ids_table_not_exists(self, mock_pool_class): """Test deleting when table doesn't exist.""" # Mock the connection pool @@ -384,9 +356,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): # Should not raise an exception vector_store.delete_by_ids(["doc1"]) - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_delete_by_metadata_field(self, mock_pool_class): """Test deleting documents by metadata field.""" # Mock the connection pool @@ -410,9 +380,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert "JSON_UNQUOTE(JSON_EXTRACT(meta" in delete_call[0][0] assert delete_call[0][1] == ("$.document_id", "dataset1") - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_search_by_vector_cosine(self, mock_pool_class): """Test vector search with cosine distance.""" # Mock the connection pool @@ -437,9 +405,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert abs(docs[0].metadata["score"] - 0.9) < 0.1 # 1 - 0.1 = 0.9 assert docs[0].metadata["distance"] == 0.1 - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_search_by_vector_euclidean(self, mock_pool_class): """Test vector search with euclidean distance.""" config = AlibabaCloudMySQLVectorConfig( @@ -472,9 +438,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert len(docs) == 1 assert abs(docs[0].metadata["score"] - 1.0 / 3.0) < 0.01 # 1/(1+2) = 1/3 - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_search_by_vector_with_filter(self, mock_pool_class): """Test vector search with document ID filter.""" # Mock the connection pool @@ -499,9 +463,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): search_call = search_calls[0] assert "WHERE JSON_UNQUOTE" in search_call[0][0] - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_search_by_vector_with_score_threshold(self, mock_pool_class): """Test vector search with score threshold.""" # Mock the connection pool @@ -536,9 +498,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert len(docs) == 1 assert docs[0].page_content == "High similarity document" - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_search_by_vector_invalid_top_k(self, mock_pool_class): """Test vector search with invalid top_k.""" # Mock the connection pool @@ -560,9 +520,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): with pytest.raises(ValueError): vector_store.search_by_vector(query_vector, top_k="invalid") - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_search_by_full_text(self, mock_pool_class): """Test full-text search.""" # Mock the connection pool @@ -591,9 +549,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): assert docs[0].page_content == "This document contains machine learning content" assert docs[0].metadata["score"] == 1.5 - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_search_by_full_text_with_filter(self, mock_pool_class): """Test full-text search with document ID filter.""" # Mock the connection pool @@ -617,9 +573,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): search_call = search_calls[0] assert "AND JSON_UNQUOTE" in search_call[0][0] - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_search_by_full_text_invalid_top_k(self, mock_pool_class): """Test full-text search with invalid top_k.""" # Mock the connection pool @@ -640,9 +594,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): with pytest.raises(ValueError): vector_store.search_by_full_text("test", top_k="invalid") - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_delete_collection(self, mock_pool_class): """Test deleting the entire collection.""" # Mock the connection pool @@ -665,9 +617,7 @@ class TestAlibabaCloudMySQLVector(unittest.TestCase): drop_call = drop_calls[0] assert f"DROP TABLE IF EXISTS {self.collection_name.lower()}" in drop_call[0][0] - @patch( - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool" - ) + @patch("dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector.mysql.connector.pooling.MySQLConnectionPool") def test_unsupported_distance_function(self, mock_pool_class): """Test that Pydantic validation rejects unsupported distance functions.""" # Test that creating config with unsupported distance function raises ValidationError diff --git a/api/providers/vdb/vdb-analyticdb/pyproject.toml b/api/providers/vdb/vdb-analyticdb/pyproject.toml new file mode 100644 index 0000000000..af5def3061 --- /dev/null +++ b/api/providers/vdb/vdb-analyticdb/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "dify-vdb-analyticdb" +version = "0.0.1" +dependencies = [ + "alibabacloud_gpdb20160503~=5.2.0", + "alibabacloud_tea_openapi~=0.4.3", + "clickhouse-connect~=0.15.0", +] +description = "Dify vector store backend (dify-vdb-analyticdb)." + +[project.entry-points."dify.vector_backends"] +analyticdb = "dify_vdb_analyticdb.analyticdb_vector:AnalyticdbVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/analyticdb/__init__.py b/api/providers/vdb/vdb-analyticdb/src/dify_vdb_analyticdb/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/analyticdb/__init__.py rename to api/providers/vdb/vdb-analyticdb/src/dify_vdb_analyticdb/__init__.py diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py b/api/providers/vdb/vdb-analyticdb/src/dify_vdb_analyticdb/analyticdb_vector.py similarity index 95% rename from api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py rename to api/providers/vdb/vdb-analyticdb/src/dify_vdb_analyticdb/analyticdb_vector.py index 79cc5f0344..e56bb74ba3 100644 --- a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py +++ b/api/providers/vdb/vdb-analyticdb/src/dify_vdb_analyticdb/analyticdb_vector.py @@ -2,16 +2,16 @@ import json from typing import Any from configs import dify_config -from core.rag.datasource.vdb.analyticdb.analyticdb_vector_openapi import ( - AnalyticdbVectorOpenAPI, - AnalyticdbVectorOpenAPIConfig, -) -from core.rag.datasource.vdb.analyticdb.analyticdb_vector_sql import AnalyticdbVectorBySql, AnalyticdbVectorBySqlConfig from core.rag.datasource.vdb.vector_base import BaseVector from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory from core.rag.datasource.vdb.vector_type import VectorType from core.rag.embedding.embedding_base import Embeddings from core.rag.models.document import Document +from dify_vdb_analyticdb.analyticdb_vector_openapi import ( + AnalyticdbVectorOpenAPI, + AnalyticdbVectorOpenAPIConfig, +) +from dify_vdb_analyticdb.analyticdb_vector_sql import AnalyticdbVectorBySql, AnalyticdbVectorBySqlConfig from models.dataset import Dataset diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_openapi.py b/api/providers/vdb/vdb-analyticdb/src/dify_vdb_analyticdb/analyticdb_vector_openapi.py similarity index 100% rename from api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_openapi.py rename to api/providers/vdb/vdb-analyticdb/src/dify_vdb_analyticdb/analyticdb_vector_openapi.py diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_sql.py b/api/providers/vdb/vdb-analyticdb/src/dify_vdb_analyticdb/analyticdb_vector_sql.py similarity index 100% rename from api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_sql.py rename to api/providers/vdb/vdb-analyticdb/src/dify_vdb_analyticdb/analyticdb_vector_sql.py diff --git a/api/tests/integration_tests/vdb/analyticdb/test_analyticdb.py b/api/providers/vdb/vdb-analyticdb/tests/integration_tests/test_analyticdb.py similarity index 79% rename from api/tests/integration_tests/vdb/analyticdb/test_analyticdb.py rename to api/providers/vdb/vdb-analyticdb/tests/integration_tests/test_analyticdb.py index 0981523809..2bb413dcc1 100644 --- a/api/tests/integration_tests/vdb/analyticdb/test_analyticdb.py +++ b/api/providers/vdb/vdb-analyticdb/tests/integration_tests/test_analyticdb.py @@ -1,9 +1,8 @@ -from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVector -from core.rag.datasource.vdb.analyticdb.analyticdb_vector_openapi import AnalyticdbVectorOpenAPIConfig -from core.rag.datasource.vdb.analyticdb.analyticdb_vector_sql import AnalyticdbVectorBySqlConfig -from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest +from dify_vdb_analyticdb.analyticdb_vector import AnalyticdbVector +from dify_vdb_analyticdb.analyticdb_vector_openapi import AnalyticdbVectorOpenAPIConfig +from dify_vdb_analyticdb.analyticdb_vector_sql import AnalyticdbVectorBySqlConfig -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) +from core.rag.datasource.vdb.vector_integration_test_support import AbstractVectorTest class AnalyticdbVectorTest(AbstractVectorTest): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/analyticdb/test_analyticdb_vector.py b/api/providers/vdb/vdb-analyticdb/tests/unit_tests/test_analyticdb_vector.py similarity index 93% rename from api/tests/unit_tests/core/rag/datasource/vdb/analyticdb/test_analyticdb_vector.py rename to api/providers/vdb/vdb-analyticdb/tests/unit_tests/test_analyticdb_vector.py index d4fa4b3e8e..d1d471761d 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/analyticdb/test_analyticdb_vector.py +++ b/api/providers/vdb/vdb-analyticdb/tests/unit_tests/test_analyticdb_vector.py @@ -1,12 +1,12 @@ from types import SimpleNamespace from unittest.mock import MagicMock, patch +import dify_vdb_analyticdb.analyticdb_vector as analyticdb_module import pytest +from dify_vdb_analyticdb.analyticdb_vector import AnalyticdbVector, AnalyticdbVectorFactory +from dify_vdb_analyticdb.analyticdb_vector_openapi import AnalyticdbVectorOpenAPIConfig +from dify_vdb_analyticdb.analyticdb_vector_sql import AnalyticdbVectorBySqlConfig -import core.rag.datasource.vdb.analyticdb.analyticdb_vector as analyticdb_module -from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVector, AnalyticdbVectorFactory -from core.rag.datasource.vdb.analyticdb.analyticdb_vector_openapi import AnalyticdbVectorOpenAPIConfig -from core.rag.datasource.vdb.analyticdb.analyticdb_vector_sql import AnalyticdbVectorBySqlConfig from core.rag.models.document import Document diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/analyticdb/test_analyticdb_vector_openapi.py b/api/providers/vdb/vdb-analyticdb/tests/unit_tests/test_analyticdb_vector_openapi.py similarity index 98% rename from api/tests/unit_tests/core/rag/datasource/vdb/analyticdb/test_analyticdb_vector_openapi.py rename to api/providers/vdb/vdb-analyticdb/tests/unit_tests/test_analyticdb_vector_openapi.py index 4f8653a926..d2d735ae3e 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/analyticdb/test_analyticdb_vector_openapi.py +++ b/api/providers/vdb/vdb-analyticdb/tests/unit_tests/test_analyticdb_vector_openapi.py @@ -4,13 +4,13 @@ import types from types import SimpleNamespace from unittest.mock import MagicMock +import dify_vdb_analyticdb.analyticdb_vector_openapi as openapi_module import pytest - -import core.rag.datasource.vdb.analyticdb.analyticdb_vector_openapi as openapi_module -from core.rag.datasource.vdb.analyticdb.analyticdb_vector_openapi import ( +from dify_vdb_analyticdb.analyticdb_vector_openapi import ( AnalyticdbVectorOpenAPI, AnalyticdbVectorOpenAPIConfig, ) + from core.rag.models.document import Document diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/analyticdb/test_analyticdb_vector_sql.py b/api/providers/vdb/vdb-analyticdb/tests/unit_tests/test_analyticdb_vector_sql.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/analyticdb/test_analyticdb_vector_sql.py rename to api/providers/vdb/vdb-analyticdb/tests/unit_tests/test_analyticdb_vector_sql.py index f798ef8bd1..49a2ae72d0 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/analyticdb/test_analyticdb_vector_sql.py +++ b/api/providers/vdb/vdb-analyticdb/tests/unit_tests/test_analyticdb_vector_sql.py @@ -2,14 +2,14 @@ from contextlib import contextmanager from types import SimpleNamespace from unittest.mock import MagicMock +import dify_vdb_analyticdb.analyticdb_vector_sql as sql_module import psycopg2.errors import pytest - -import core.rag.datasource.vdb.analyticdb.analyticdb_vector_sql as sql_module -from core.rag.datasource.vdb.analyticdb.analyticdb_vector_sql import ( +from dify_vdb_analyticdb.analyticdb_vector_sql import ( AnalyticdbVectorBySql, AnalyticdbVectorBySqlConfig, ) + from core.rag.models.document import Document diff --git a/api/providers/vdb/vdb-baidu/pyproject.toml b/api/providers/vdb/vdb-baidu/pyproject.toml new file mode 100644 index 0000000000..bacff08793 --- /dev/null +++ b/api/providers/vdb/vdb-baidu/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "dify-vdb-baidu" +version = "0.0.1" +dependencies = [ + "pymochow==2.4.0", +] +description = "Dify vector store backend (dify-vdb-baidu)." + +[project.entry-points."dify.vector_backends"] +baidu = "dify_vdb_baidu.baidu_vector:BaiduVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/baidu/__init__.py b/api/providers/vdb/vdb-baidu/src/dify_vdb_baidu/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/baidu/__init__.py rename to api/providers/vdb/vdb-baidu/src/dify_vdb_baidu/__init__.py diff --git a/api/core/rag/datasource/vdb/baidu/baidu_vector.py b/api/providers/vdb/vdb-baidu/src/dify_vdb_baidu/baidu_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/baidu/baidu_vector.py rename to api/providers/vdb/vdb-baidu/src/dify_vdb_baidu/baidu_vector.py diff --git a/api/tests/integration_tests/vdb/__mock/baiduvectordb.py b/api/providers/vdb/vdb-baidu/tests/integration_tests/conftest.py similarity index 100% rename from api/tests/integration_tests/vdb/__mock/baiduvectordb.py rename to api/providers/vdb/vdb-baidu/tests/integration_tests/conftest.py diff --git a/api/tests/integration_tests/vdb/baidu/test_baidu.py b/api/providers/vdb/vdb-baidu/tests/integration_tests/test_baidu.py similarity index 73% rename from api/tests/integration_tests/vdb/baidu/test_baidu.py rename to api/providers/vdb/vdb-baidu/tests/integration_tests/test_baidu.py index 716f88af67..2c1d0e3554 100644 --- a/api/tests/integration_tests/vdb/baidu/test_baidu.py +++ b/api/providers/vdb/vdb-baidu/tests/integration_tests/test_baidu.py @@ -1,10 +1,6 @@ -from core.rag.datasource.vdb.baidu.baidu_vector import BaiduConfig, BaiduVector -from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text +from dify_vdb_baidu.baidu_vector import BaiduConfig, BaiduVector -pytest_plugins = ( - "tests.integration_tests.vdb.test_vector_store", - "tests.integration_tests.vdb.__mock.baiduvectordb", -) +from core.rag.datasource.vdb.vector_integration_test_support import AbstractVectorTest, get_example_text class BaiduVectorTest(AbstractVectorTest): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/baidu/test_baidu_vector.py b/api/providers/vdb/vdb-baidu/tests/unit_tests/test_baidu_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/baidu/test_baidu_vector.py rename to api/providers/vdb/vdb-baidu/tests/unit_tests/test_baidu_vector.py index 487d021697..851c09f47a 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/baidu/test_baidu_vector.py +++ b/api/providers/vdb/vdb-baidu/tests/unit_tests/test_baidu_vector.py @@ -124,7 +124,7 @@ def _build_fake_pymochow_modules(): def baidu_module(monkeypatch): for name, module in _build_fake_pymochow_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.baidu.baidu_vector as module + import dify_vdb_baidu.baidu_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-chroma/pyproject.toml b/api/providers/vdb/vdb-chroma/pyproject.toml new file mode 100644 index 0000000000..b37ee2a588 --- /dev/null +++ b/api/providers/vdb/vdb-chroma/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "dify-vdb-chroma" +version = "0.0.1" +dependencies = [ + "chromadb==0.5.20", +] +description = "Dify vector store backend (dify-vdb-chroma)." + +[project.entry-points."dify.vector_backends"] +chroma = "dify_vdb_chroma.chroma_vector:ChromaVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/chroma/__init__.py b/api/providers/vdb/vdb-chroma/src/dify_vdb_chroma/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/chroma/__init__.py rename to api/providers/vdb/vdb-chroma/src/dify_vdb_chroma/__init__.py diff --git a/api/core/rag/datasource/vdb/chroma/chroma_vector.py b/api/providers/vdb/vdb-chroma/src/dify_vdb_chroma/chroma_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/chroma/chroma_vector.py rename to api/providers/vdb/vdb-chroma/src/dify_vdb_chroma/chroma_vector.py diff --git a/api/tests/integration_tests/vdb/chroma/test_chroma.py b/api/providers/vdb/vdb-chroma/tests/integration_tests/test_chroma.py similarity index 80% rename from api/tests/integration_tests/vdb/chroma/test_chroma.py rename to api/providers/vdb/vdb-chroma/tests/integration_tests/test_chroma.py index 52beba9979..87c259f3d0 100644 --- a/api/tests/integration_tests/vdb/chroma/test_chroma.py +++ b/api/providers/vdb/vdb-chroma/tests/integration_tests/test_chroma.py @@ -1,13 +1,11 @@ import chromadb +from dify_vdb_chroma.chroma_vector import ChromaConfig, ChromaVector -from core.rag.datasource.vdb.chroma.chroma_vector import ChromaConfig, ChromaVector -from tests.integration_tests.vdb.test_vector_store import ( +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, get_example_text, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) - class ChromaVectorTest(AbstractVectorTest): def __init__(self): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/chroma/test_chroma_vector.py b/api/providers/vdb/vdb-chroma/tests/unit_tests/test_chroma_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/chroma/test_chroma_vector.py rename to api/providers/vdb/vdb-chroma/tests/unit_tests/test_chroma_vector.py index 44427b7d87..b209c9df96 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/chroma/test_chroma_vector.py +++ b/api/providers/vdb/vdb-chroma/tests/unit_tests/test_chroma_vector.py @@ -47,7 +47,7 @@ def _build_fake_chroma_modules(): def chroma_module(monkeypatch): fake_chroma = _build_fake_chroma_modules() monkeypatch.setitem(sys.modules, "chromadb", fake_chroma) - import core.rag.datasource.vdb.chroma.chroma_vector as module + import dify_vdb_chroma.chroma_vector as module return importlib.reload(module) diff --git a/api/core/rag/datasource/vdb/clickzetta/README.md b/api/providers/vdb/vdb-clickzetta/README.md similarity index 99% rename from api/core/rag/datasource/vdb/clickzetta/README.md rename to api/providers/vdb/vdb-clickzetta/README.md index 969d4e40a0..faa76707ce 100644 --- a/api/core/rag/datasource/vdb/clickzetta/README.md +++ b/api/providers/vdb/vdb-clickzetta/README.md @@ -198,4 +198,4 @@ Clickzetta supports advanced full-text search with multiple analyzers: - [Clickzetta Vector Search Documentation](https://yunqi.tech/documents/vector-search) - [Clickzetta Inverted Index Documentation](https://yunqi.tech/documents/inverted-index) -- [Clickzetta SQL Functions](https://yunqi.tech/documents/sql-reference) +- [Clickzetta SQL Functions](https://yunqi.tech/documents/sql-reference) \ No newline at end of file diff --git a/api/providers/vdb/vdb-clickzetta/pyproject.toml b/api/providers/vdb/vdb-clickzetta/pyproject.toml new file mode 100644 index 0000000000..aea94fdb2a --- /dev/null +++ b/api/providers/vdb/vdb-clickzetta/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-clickzetta" +version = "0.0.1" + +dependencies = [ + "clickzetta-connector-python>=0.8.102", +] +description = "Dify vector store backend (dify-vdb-clickzetta)." + +[project.entry-points."dify.vector_backends"] +clickzetta = "dify_vdb_clickzetta.clickzetta_vector:ClickzettaVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/clickzetta/__init__.py b/api/providers/vdb/vdb-clickzetta/src/dify_vdb_clickzetta/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/clickzetta/__init__.py rename to api/providers/vdb/vdb-clickzetta/src/dify_vdb_clickzetta/__init__.py diff --git a/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py b/api/providers/vdb/vdb-clickzetta/src/dify_vdb_clickzetta/clickzetta_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py rename to api/providers/vdb/vdb-clickzetta/src/dify_vdb_clickzetta/clickzetta_vector.py diff --git a/api/tests/integration_tests/vdb/clickzetta/README.md b/api/providers/vdb/vdb-clickzetta/tests/README.md similarity index 100% rename from api/tests/integration_tests/vdb/clickzetta/README.md rename to api/providers/vdb/vdb-clickzetta/tests/README.md diff --git a/api/tests/integration_tests/vdb/clickzetta/test_clickzetta.py b/api/providers/vdb/vdb-clickzetta/tests/integration_tests/test_clickzetta.py similarity index 92% rename from api/tests/integration_tests/vdb/clickzetta/test_clickzetta.py rename to api/providers/vdb/vdb-clickzetta/tests/integration_tests/test_clickzetta.py index 21de8be6e3..1c6819f9f1 100644 --- a/api/tests/integration_tests/vdb/clickzetta/test_clickzetta.py +++ b/api/providers/vdb/vdb-clickzetta/tests/integration_tests/test_clickzetta.py @@ -2,10 +2,10 @@ import contextlib import os import pytest +from dify_vdb_clickzetta.clickzetta_vector import ClickzettaConfig, ClickzettaVector -from core.rag.datasource.vdb.clickzetta.clickzetta_vector import ClickzettaConfig, ClickzettaVector +from core.rag.datasource.vdb.vector_integration_test_support import AbstractVectorTest, get_example_text from core.rag.models.document import Document -from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text, setup_mock_redis class TestClickzettaVector(AbstractVectorTest): @@ -14,9 +14,8 @@ class TestClickzettaVector(AbstractVectorTest): """ @pytest.fixture - def vector_store(self): + def vector_store(self, setup_mock_redis): """Create a Clickzetta vector store instance for testing.""" - # Skip test if Clickzetta credentials are not configured if not os.getenv("CLICKZETTA_USERNAME"): pytest.skip("CLICKZETTA_USERNAME is not configured") if not os.getenv("CLICKZETTA_PASSWORD"): @@ -32,21 +31,19 @@ class TestClickzettaVector(AbstractVectorTest): workspace=os.getenv("CLICKZETTA_WORKSPACE", "quick_start"), vcluster=os.getenv("CLICKZETTA_VCLUSTER", "default_ap"), schema=os.getenv("CLICKZETTA_SCHEMA", "dify_test"), - batch_size=10, # Small batch size for testing + batch_size=10, enable_inverted_index=True, analyzer_type="chinese", analyzer_mode="smart", vector_distance_function="cosine_distance", ) - with setup_mock_redis(): - vector = ClickzettaVector(collection_name="test_collection_" + str(os.getpid()), config=config) + vector = ClickzettaVector(collection_name="test_collection_" + str(os.getpid()), config=config) - yield vector + yield vector - # Cleanup: delete the test collection - with contextlib.suppress(Exception): - vector.delete() + with contextlib.suppress(Exception): + vector.delete() def test_clickzetta_vector_basic_operations(self, vector_store): """Test basic CRUD operations on Clickzetta vector store.""" diff --git a/api/tests/integration_tests/vdb/clickzetta/test_docker_integration.py b/api/providers/vdb/vdb-clickzetta/tests/integration_tests/test_docker_integration.py similarity index 55% rename from api/tests/integration_tests/vdb/clickzetta/test_docker_integration.py rename to api/providers/vdb/vdb-clickzetta/tests/integration_tests/test_docker_integration.py index 60e3f30f26..a5d32f5e81 100644 --- a/api/tests/integration_tests/vdb/clickzetta/test_docker_integration.py +++ b/api/providers/vdb/vdb-clickzetta/tests/integration_tests/test_docker_integration.py @@ -3,16 +3,19 @@ Test Clickzetta integration in Docker environment """ +import logging import os import time import httpx from clickzetta import connect +logger = logging.getLogger(__name__) + def test_clickzetta_connection(): """Test direct connection to Clickzetta""" - print("=== Testing direct Clickzetta connection ===") + logger.info("=== Testing direct Clickzetta connection ===") try: conn = connect( username=os.getenv("CLICKZETTA_USERNAME", "test_user"), @@ -25,100 +28,93 @@ def test_clickzetta_connection(): ) with conn.cursor() as cursor: - # Test basic connectivity cursor.execute("SELECT 1 as test") result = cursor.fetchone() - print(f"✓ Connection test: {result}") + logger.info("✓ Connection test: %s", result) - # Check if our test table exists cursor.execute("SHOW TABLES IN dify") tables = cursor.fetchall() - print(f"✓ Existing tables: {[t[1] for t in tables if t[0] == 'dify']}") + logger.info("✓ Existing tables: %s", [t[1] for t in tables if t[0] == "dify"]) - # Check if test collection exists test_collection = "collection_test_dataset" if test_collection in [t[1] for t in tables if t[0] == "dify"]: cursor.execute(f"DESCRIBE dify.{test_collection}") columns = cursor.fetchall() - print(f"✓ Table structure for {test_collection}:") + logger.info("✓ Table structure for %s:", test_collection) for col in columns: - print(f" - {col[0]}: {col[1]}") + logger.info(" - %s: %s", col[0], col[1]) - # Check for indexes cursor.execute(f"SHOW INDEXES IN dify.{test_collection}") indexes = cursor.fetchall() - print(f"✓ Indexes on {test_collection}:") + logger.info("✓ Indexes on %s:", test_collection) for idx in indexes: - print(f" - {idx}") + logger.info(" - %s", idx) return True - except Exception as e: - print(f"✗ Connection test failed: {e}") + except Exception: + logger.exception("✗ Connection test failed") return False def test_dify_api(): """Test Dify API with Clickzetta backend""" - print("\n=== Testing Dify API ===") + logger.info("\n=== Testing Dify API ===") base_url = "http://localhost:5001" - # Wait for API to be ready max_retries = 30 for i in range(max_retries): try: response = httpx.get(f"{base_url}/console/api/health") if response.status_code == 200: - print("✓ Dify API is ready") + logger.info("✓ Dify API is ready") break except: if i == max_retries - 1: - print("✗ Dify API is not responding") + logger.exception("✗ Dify API is not responding") return False time.sleep(2) - # Check vector store configuration try: - # This is a simplified check - in production, you'd use proper auth - print("✓ Dify is configured to use Clickzetta as vector store") + logger.info("✓ Dify is configured to use Clickzetta as vector store") return True - except Exception as e: - print(f"✗ API test failed: {e}") + except Exception: + logger.exception("✗ API test failed") return False def verify_table_structure(): """Verify the table structure meets Dify requirements""" - print("\n=== Verifying Table Structure ===") + logger.info("\n=== Verifying Table Structure ===") expected_columns = { "id": "VARCHAR", "page_content": "VARCHAR", - "metadata": "VARCHAR", # JSON stored as VARCHAR in Clickzetta + "metadata": "VARCHAR", "vector": "ARRAY", } expected_metadata_fields = ["doc_id", "doc_hash", "document_id", "dataset_id"] - print("✓ Expected table structure:") + logger.info("✓ Expected table structure:") for col, dtype in expected_columns.items(): - print(f" - {col}: {dtype}") + logger.info(" - %s: %s", col, dtype) - print("\n✓ Required metadata fields:") + logger.info("\n✓ Required metadata fields:") for field in expected_metadata_fields: - print(f" - {field}") + logger.info(" - %s", field) - print("\n✓ Index requirements:") - print(" - Vector index (HNSW) on 'vector' column") - print(" - Full-text index on 'page_content' (optional)") - print(" - Functional index on metadata->>'$.doc_id' (recommended)") - print(" - Functional index on metadata->>'$.document_id' (recommended)") + logger.info("\n✓ Index requirements:") + logger.info(" - Vector index (HNSW) on 'vector' column") + logger.info(" - Full-text index on 'page_content' (optional)") + logger.info(" - Functional index on metadata->>'$.doc_id' (recommended)") + logger.info(" - Functional index on metadata->>'$.document_id' (recommended)") return True def main(): """Run all tests""" - print("Starting Clickzetta integration tests for Dify Docker\n") + logger.info("Starting Clickzetta integration tests for Dify Docker\n") tests = [ ("Direct Clickzetta Connection", test_clickzetta_connection), @@ -131,33 +127,34 @@ def main(): try: success = test_func() results.append((test_name, success)) - except Exception as e: - print(f"\n✗ {test_name} crashed: {e}") + except Exception: + logger.exception("\n✗ %s crashed", test_name) results.append((test_name, False)) - # Summary - print("\n" + "=" * 50) - print("Test Summary:") - print("=" * 50) + logger.info("\n%s", "=" * 50) + logger.info("Test Summary:") + logger.info("=" * 50) passed = sum(1 for _, success in results if success) total = len(results) for test_name, success in results: status = "✅ PASSED" if success else "❌ FAILED" - print(f"{test_name}: {status}") + logger.info("%s: %s", test_name, status) - print(f"\nTotal: {passed}/{total} tests passed") + logger.info("\nTotal: %s/%s tests passed", passed, total) if passed == total: - print("\n🎉 All tests passed! Clickzetta is ready for Dify Docker deployment.") - print("\nNext steps:") - print("1. Run: cd docker && docker-compose -f docker-compose.yaml -f docker-compose.clickzetta.yaml up -d") - print("2. Access Dify at http://localhost:3000") - print("3. Create a dataset and test vector storage with Clickzetta") + logger.info("\n🎉 All tests passed! Clickzetta is ready for Dify Docker deployment.") + logger.info("\nNext steps:") + logger.info( + "1. Run: cd docker && docker-compose -f docker-compose.yaml -f docker-compose.clickzetta.yaml up -d" + ) + logger.info("2. Access Dify at http://localhost:3000") + logger.info("3. Create a dataset and test vector storage with Clickzetta") return 0 else: - print("\n⚠️ Some tests failed. Please check the errors above.") + logger.error("\n⚠️ Some tests failed. Please check the errors above.") return 1 diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/clickzetta/test_clickzetta_vector.py b/api/providers/vdb/vdb-clickzetta/tests/unit_tests/test_clickzetta_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/clickzetta/test_clickzetta_vector.py rename to api/providers/vdb/vdb-clickzetta/tests/unit_tests/test_clickzetta_vector.py index 0ce5c04dd6..a7473f1b91 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/clickzetta/test_clickzetta_vector.py +++ b/api/providers/vdb/vdb-clickzetta/tests/unit_tests/test_clickzetta_vector.py @@ -47,7 +47,7 @@ def _build_fake_clickzetta_module(): @pytest.fixture def clickzetta_module(monkeypatch): monkeypatch.setitem(sys.modules, "clickzetta", _build_fake_clickzetta_module()) - import core.rag.datasource.vdb.clickzetta.clickzetta_vector as module + import dify_vdb_clickzetta.clickzetta_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-couchbase/pyproject.toml b/api/providers/vdb/vdb-couchbase/pyproject.toml new file mode 100644 index 0000000000..6bc348b2eb --- /dev/null +++ b/api/providers/vdb/vdb-couchbase/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-couchbase" +version = "0.0.1" + +dependencies = [ + "couchbase~=4.6.0", +] +description = "Dify vector store backend (dify-vdb-couchbase)." + +[project.entry-points."dify.vector_backends"] +couchbase = "dify_vdb_couchbase.couchbase_vector:CouchbaseVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/couchbase/__init__.py b/api/providers/vdb/vdb-couchbase/src/dify_vdb_couchbase/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/couchbase/__init__.py rename to api/providers/vdb/vdb-couchbase/src/dify_vdb_couchbase/__init__.py diff --git a/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py b/api/providers/vdb/vdb-couchbase/src/dify_vdb_couchbase/couchbase_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/couchbase/couchbase_vector.py rename to api/providers/vdb/vdb-couchbase/src/dify_vdb_couchbase/couchbase_vector.py diff --git a/api/tests/integration_tests/vdb/couchbase/test_couchbase.py b/api/providers/vdb/vdb-couchbase/tests/integration_tests/test_couchbase.py similarity index 80% rename from api/tests/integration_tests/vdb/couchbase/test_couchbase.py rename to api/providers/vdb/vdb-couchbase/tests/integration_tests/test_couchbase.py index 0371f04233..918dae328f 100644 --- a/api/tests/integration_tests/vdb/couchbase/test_couchbase.py +++ b/api/providers/vdb/vdb-couchbase/tests/integration_tests/test_couchbase.py @@ -1,12 +1,14 @@ +import logging import subprocess import time -from core.rag.datasource.vdb.couchbase.couchbase_vector import CouchbaseConfig, CouchbaseVector -from tests.integration_tests.vdb.test_vector_store import ( +from dify_vdb_couchbase.couchbase_vector import CouchbaseConfig, CouchbaseVector + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) +logger = logging.getLogger(__name__) def wait_for_healthy_container(service_name="couchbase-server", timeout=300): @@ -16,10 +18,10 @@ def wait_for_healthy_container(service_name="couchbase-server", timeout=300): ["docker", "inspect", "--format", "{{.State.Health.Status}}", service_name], capture_output=True, text=True ) if result.stdout.strip() == "healthy": - print(f"{service_name} is healthy!") + logger.info("%s is healthy!", service_name) return True else: - print(f"Waiting for {service_name} to be healthy...") + logger.info("Waiting for %s to be healthy...", service_name) time.sleep(10) raise TimeoutError(f"{service_name} did not become healthy in time") diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/couchbase/test_couchbase_vector.py b/api/providers/vdb/vdb-couchbase/tests/unit_tests/test_couchbase_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/couchbase/test_couchbase_vector.py rename to api/providers/vdb/vdb-couchbase/tests/unit_tests/test_couchbase_vector.py index 9fea187615..7e5c40b8f2 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/couchbase/test_couchbase_vector.py +++ b/api/providers/vdb/vdb-couchbase/tests/unit_tests/test_couchbase_vector.py @@ -154,7 +154,7 @@ def couchbase_module(monkeypatch): for name, module in _build_fake_couchbase_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.couchbase.couchbase_vector as module + import dify_vdb_couchbase.couchbase_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-elasticsearch/pyproject.toml b/api/providers/vdb/vdb-elasticsearch/pyproject.toml new file mode 100644 index 0000000000..d40908f92d --- /dev/null +++ b/api/providers/vdb/vdb-elasticsearch/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "dify-vdb-elasticsearch" +version = "0.0.1" + +dependencies = [ + "elasticsearch==8.14.0", +] +description = "Dify vector store backend (dify-vdb-elasticsearch)." + +[project.entry-points."dify.vector_backends"] +elasticsearch = "dify_vdb_elasticsearch.elasticsearch_vector:ElasticSearchVectorFactory" +elasticsearch-ja = "dify_vdb_elasticsearch.elasticsearch_ja_vector:ElasticSearchJaVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/elasticsearch/__init__.py b/api/providers/vdb/vdb-elasticsearch/src/dify_vdb_elasticsearch/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/elasticsearch/__init__.py rename to api/providers/vdb/vdb-elasticsearch/src/dify_vdb_elasticsearch/__init__.py diff --git a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_ja_vector.py b/api/providers/vdb/vdb-elasticsearch/src/dify_vdb_elasticsearch/elasticsearch_ja_vector.py similarity index 98% rename from api/core/rag/datasource/vdb/elasticsearch/elasticsearch_ja_vector.py rename to api/providers/vdb/vdb-elasticsearch/src/dify_vdb_elasticsearch/elasticsearch_ja_vector.py index 1e7fe52666..87b9d813ec 100644 --- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_ja_vector.py +++ b/api/providers/vdb/vdb-elasticsearch/src/dify_vdb_elasticsearch/elasticsearch_ja_vector.py @@ -4,14 +4,14 @@ from typing import Any from flask import current_app -from core.rag.datasource.vdb.elasticsearch.elasticsearch_vector import ( +from core.rag.datasource.vdb.field import Field +from core.rag.datasource.vdb.vector_type import VectorType +from core.rag.embedding.embedding_base import Embeddings +from dify_vdb_elasticsearch.elasticsearch_vector import ( ElasticSearchConfig, ElasticSearchVector, ElasticSearchVectorFactory, ) -from core.rag.datasource.vdb.field import Field -from core.rag.datasource.vdb.vector_type import VectorType -from core.rag.embedding.embedding_base import Embeddings from extensions.ext_redis import redis_client from models.dataset import Dataset diff --git a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py b/api/providers/vdb/vdb-elasticsearch/src/dify_vdb_elasticsearch/elasticsearch_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py rename to api/providers/vdb/vdb-elasticsearch/src/dify_vdb_elasticsearch/elasticsearch_vector.py diff --git a/api/tests/integration_tests/vdb/elasticsearch/test_elasticsearch.py b/api/providers/vdb/vdb-elasticsearch/tests/integration_tests/test_elasticsearch.py similarity index 71% rename from api/tests/integration_tests/vdb/elasticsearch/test_elasticsearch.py rename to api/providers/vdb/vdb-elasticsearch/tests/integration_tests/test_elasticsearch.py index 970d2cce1a..c8b679e021 100644 --- a/api/tests/integration_tests/vdb/elasticsearch/test_elasticsearch.py +++ b/api/providers/vdb/vdb-elasticsearch/tests/integration_tests/test_elasticsearch.py @@ -1,10 +1,9 @@ -from core.rag.datasource.vdb.elasticsearch.elasticsearch_vector import ElasticSearchConfig, ElasticSearchVector -from tests.integration_tests.vdb.test_vector_store import ( +from dify_vdb_elasticsearch.elasticsearch_vector import ElasticSearchConfig, ElasticSearchVector + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) - class ElasticSearchVectorTest(AbstractVectorTest): def __init__(self): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/elasticsearch/test_elasticsearch_ja_vector.py b/api/providers/vdb/vdb-elasticsearch/tests/unit_tests/test_elasticsearch_ja_vector.py similarity index 96% rename from api/tests/unit_tests/core/rag/datasource/vdb/elasticsearch/test_elasticsearch_ja_vector.py rename to api/providers/vdb/vdb-elasticsearch/tests/unit_tests/test_elasticsearch_ja_vector.py index edd29a4649..f81ed6beea 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/elasticsearch/test_elasticsearch_ja_vector.py +++ b/api/providers/vdb/vdb-elasticsearch/tests/unit_tests/test_elasticsearch_ja_vector.py @@ -32,8 +32,8 @@ def elasticsearch_ja_module(monkeypatch): for name, module in _build_fake_elasticsearch_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.elasticsearch.elasticsearch_ja_vector as ja_module - import core.rag.datasource.vdb.elasticsearch.elasticsearch_vector as base_module + import dify_vdb_elasticsearch.elasticsearch_ja_vector as ja_module + import dify_vdb_elasticsearch.elasticsearch_vector as base_module importlib.reload(base_module) return importlib.reload(ja_module) diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/elasticsearch/test_elasticsearch_vector.py b/api/providers/vdb/vdb-elasticsearch/tests/unit_tests/test_elasticsearch_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/elasticsearch/test_elasticsearch_vector.py rename to api/providers/vdb/vdb-elasticsearch/tests/unit_tests/test_elasticsearch_vector.py index 9ecf0caa24..48f1f6dc26 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/elasticsearch/test_elasticsearch_vector.py +++ b/api/providers/vdb/vdb-elasticsearch/tests/unit_tests/test_elasticsearch_vector.py @@ -42,7 +42,7 @@ def elasticsearch_module(monkeypatch): for name, module in _build_fake_elasticsearch_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.elasticsearch.elasticsearch_vector as module + import dify_vdb_elasticsearch.elasticsearch_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-hologres/pyproject.toml b/api/providers/vdb/vdb-hologres/pyproject.toml new file mode 100644 index 0000000000..88044bf6d6 --- /dev/null +++ b/api/providers/vdb/vdb-hologres/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-hologres" +version = "0.0.1" + +dependencies = [ + "holo-search-sdk>=0.4.2", +] +description = "Dify vector store backend (dify-vdb-hologres)." + +[project.entry-points."dify.vector_backends"] +hologres = "dify_vdb_hologres.hologres_vector:HologresVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/hologres/__init__.py b/api/providers/vdb/vdb-hologres/src/dify_vdb_hologres/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/hologres/__init__.py rename to api/providers/vdb/vdb-hologres/src/dify_vdb_hologres/__init__.py diff --git a/api/core/rag/datasource/vdb/hologres/hologres_vector.py b/api/providers/vdb/vdb-hologres/src/dify_vdb_hologres/hologres_vector.py similarity index 97% rename from api/core/rag/datasource/vdb/hologres/hologres_vector.py rename to api/providers/vdb/vdb-hologres/src/dify_vdb_hologres/hologres_vector.py index 13d48b5668..2509260d41 100644 --- a/api/core/rag/datasource/vdb/hologres/hologres_vector.py +++ b/api/providers/vdb/vdb-hologres/src/dify_vdb_hologres/hologres_vector.py @@ -1,7 +1,7 @@ import json import logging import time -from typing import Any +from typing import Any, cast import holo_search_sdk as holo # type: ignore from holo_search_sdk.types import BaseQuantizationType, DistanceType, TokenizerType @@ -351,9 +351,9 @@ class HologresVectorFactory(AbstractVectorFactory): access_key_id=dify_config.HOLOGRES_ACCESS_KEY_ID or "", access_key_secret=dify_config.HOLOGRES_ACCESS_KEY_SECRET or "", schema_name=dify_config.HOLOGRES_SCHEMA, - tokenizer=dify_config.HOLOGRES_TOKENIZER, - distance_method=dify_config.HOLOGRES_DISTANCE_METHOD, - base_quantization_type=dify_config.HOLOGRES_BASE_QUANTIZATION_TYPE, + tokenizer=cast(TokenizerType, dify_config.HOLOGRES_TOKENIZER), + distance_method=cast(DistanceType, dify_config.HOLOGRES_DISTANCE_METHOD), + base_quantization_type=cast(BaseQuantizationType, dify_config.HOLOGRES_BASE_QUANTIZATION_TYPE), max_degree=dify_config.HOLOGRES_MAX_DEGREE, ef_construction=dify_config.HOLOGRES_EF_CONSTRUCTION, ), diff --git a/api/tests/integration_tests/vdb/__mock/hologres.py b/api/providers/vdb/vdb-hologres/tests/integration_tests/conftest.py similarity index 82% rename from api/tests/integration_tests/vdb/__mock/hologres.py rename to api/providers/vdb/vdb-hologres/tests/integration_tests/conftest.py index b60cf358c0..d28ded0187 100644 --- a/api/tests/integration_tests/vdb/__mock/hologres.py +++ b/api/providers/vdb/vdb-hologres/tests/integration_tests/conftest.py @@ -7,13 +7,10 @@ import pytest from _pytest.monkeypatch import MonkeyPatch from psycopg import sql as psql -# Shared in-memory storage: {table_name: {doc_id: {"id", "text", "meta", "embedding"}}} _mock_tables: dict[str, dict[str, dict[str, Any]]] = {} class MockSearchQuery: - """Mock query builder for search_vector and search_text results.""" - def __init__(self, table_name: str, search_type: str): self._table_name = table_name self._search_type = search_type @@ -32,17 +29,13 @@ class MockSearchQuery: return self def _apply_filter(self, row: dict[str, Any]) -> bool: - """Apply the filter SQL to check if a row matches.""" if self._filter_sql is None: return True - # Extract literals (the document IDs) from the filter SQL - # Filter format: meta->>'document_id' IN ('doc1', 'doc2') literals = [v for t, v in _extract_identifiers_and_literals(self._filter_sql) if t == "literal"] if not literals: return True - # Get the document_id from the row's meta field meta = row.get("meta", "{}") if isinstance(meta, str): meta = json.loads(meta) @@ -54,22 +47,17 @@ class MockSearchQuery: data = _mock_tables.get(self._table_name, {}) results = [] for row in list(data.values())[: self._limit_val]: - # Apply filter if present if not self._apply_filter(row): continue if self._search_type == "vector": - # row format expected by _process_vector_results: (distance, id, text, meta) results.append((0.1, row["id"], row["text"], row["meta"])) else: - # row format expected by _process_full_text_results: (id, text, meta, embedding, score) results.append((row["id"], row["text"], row["meta"], row.get("embedding", []), 0.9)) return results class MockTable: - """Mock table object returned by client.open_table().""" - def __init__(self, table_name: str): self._table_name = table_name @@ -97,7 +85,6 @@ class MockTable: def _extract_sql_template(query) -> str: - """Extract the SQL template string from a psycopg Composed object.""" if isinstance(query, psql.Composed): for part in query: if isinstance(part, psql.SQL): @@ -108,7 +95,6 @@ def _extract_sql_template(query) -> str: def _extract_identifiers_and_literals(query) -> list[Any]: - """Extract Identifier and Literal values from a psycopg Composed object.""" values: list[Any] = [] if isinstance(query, psql.Composed): for part in query: @@ -117,7 +103,6 @@ def _extract_identifiers_and_literals(query) -> list[Any]: elif isinstance(part, psql.Literal): values.append(("literal", part._obj)) elif isinstance(part, psql.Composed): - # Handles SQL(...).join(...) for IN clauses for sub in part: if isinstance(sub, psql.Literal): values.append(("literal", sub._obj)) @@ -125,8 +110,6 @@ def _extract_identifiers_and_literals(query) -> list[Any]: class MockHologresClient: - """Mock holo_search_sdk client that stores data in memory.""" - def connect(self): pass @@ -141,21 +124,18 @@ class MockHologresClient: params = _extract_identifiers_and_literals(query) if "CREATE TABLE" in template.upper(): - # Extract table name from first identifier table_name = next((v for t, v in params if t == "ident"), "unknown") if table_name not in _mock_tables: _mock_tables[table_name] = {} return None if "SELECT 1" in template: - # text_exists: SELECT 1 FROM {table} WHERE id = {id} LIMIT 1 table_name = next((v for t, v in params if t == "ident"), "") doc_id = next((v for t, v in params if t == "literal"), "") data = _mock_tables.get(table_name, {}) return [(1,)] if doc_id in data else [] if "SELECT id" in template: - # get_ids_by_metadata_field: SELECT id FROM {table} WHERE meta->>{key} = {value} table_name = next((v for t, v in params if t == "ident"), "") literals = [v for t, v in params if t == "literal"] key = literals[0] if len(literals) > 0 else "" @@ -166,12 +146,10 @@ class MockHologresClient: if "DELETE" in template.upper(): table_name = next((v for t, v in params if t == "ident"), "") if "id IN" in template: - # delete_by_ids ids_to_delete = [v for t, v in params if t == "literal"] for did in ids_to_delete: _mock_tables.get(table_name, {}).pop(did, None) elif "meta->>" in template: - # delete_by_metadata_field literals = [v for t, v in params if t == "literal"] key = literals[0] if len(literals) > 0 else "" value = literals[1] if len(literals) > 1 else "" @@ -190,7 +168,6 @@ class MockHologresClient: def mock_connect(**kwargs): - """Replacement for holo_search_sdk.connect() that returns a mock client.""" return MockHologresClient() diff --git a/api/tests/integration_tests/vdb/hologres/test_hologres.py b/api/providers/vdb/vdb-hologres/tests/integration_tests/test_hologres.py similarity index 94% rename from api/tests/integration_tests/vdb/hologres/test_hologres.py rename to api/providers/vdb/vdb-hologres/tests/integration_tests/test_hologres.py index d81e18841e..04024be4ae 100644 --- a/api/tests/integration_tests/vdb/hologres/test_hologres.py +++ b/api/providers/vdb/vdb-hologres/tests/integration_tests/test_hologres.py @@ -2,16 +2,11 @@ import os import uuid from typing import cast +from dify_vdb_hologres.hologres_vector import HologresVector, HologresVectorConfig from holo_search_sdk.types import BaseQuantizationType, DistanceType, TokenizerType -from core.rag.datasource.vdb.hologres.hologres_vector import HologresVector, HologresVectorConfig +from core.rag.datasource.vdb.vector_integration_test_support import AbstractVectorTest, get_example_text from core.rag.models.document import Document -from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text - -pytest_plugins = ( - "tests.integration_tests.vdb.test_vector_store", - "tests.integration_tests.vdb.__mock.hologres", -) MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true" diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/hologres/test_hologres_vector.py b/api/providers/vdb/vdb-hologres/tests/unit_tests/test_hologres_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/hologres/test_hologres_vector.py rename to api/providers/vdb/vdb-hologres/tests/unit_tests/test_hologres_vector.py index 5d9e744ded..f9a557ecce 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/hologres/test_hologres_vector.py +++ b/api/providers/vdb/vdb-hologres/tests/unit_tests/test_hologres_vector.py @@ -42,7 +42,7 @@ def hologres_module(monkeypatch): for name, module in _build_fake_hologres_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.hologres.hologres_vector as module + import dify_vdb_hologres.hologres_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-huawei-cloud/pyproject.toml b/api/providers/vdb/vdb-huawei-cloud/pyproject.toml new file mode 100644 index 0000000000..71af56786c --- /dev/null +++ b/api/providers/vdb/vdb-huawei-cloud/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-huawei-cloud" +version = "0.0.1" + +dependencies = [ + "elasticsearch==8.14.0", +] +description = "Dify vector store backend (dify-vdb-huawei-cloud)." + +[project.entry-points."dify.vector_backends"] +huawei_cloud = "dify_vdb_huawei_cloud.huawei_cloud_vector:HuaweiCloudVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/huawei/__init__.py b/api/providers/vdb/vdb-huawei-cloud/src/dify_vdb_huawei_cloud/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/huawei/__init__.py rename to api/providers/vdb/vdb-huawei-cloud/src/dify_vdb_huawei_cloud/__init__.py diff --git a/api/core/rag/datasource/vdb/huawei/huawei_cloud_vector.py b/api/providers/vdb/vdb-huawei-cloud/src/dify_vdb_huawei_cloud/huawei_cloud_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/huawei/huawei_cloud_vector.py rename to api/providers/vdb/vdb-huawei-cloud/src/dify_vdb_huawei_cloud/huawei_cloud_vector.py diff --git a/api/tests/integration_tests/vdb/__mock/huaweicloudvectordb.py b/api/providers/vdb/vdb-huawei-cloud/tests/integration_tests/conftest.py similarity index 100% rename from api/tests/integration_tests/vdb/__mock/huaweicloudvectordb.py rename to api/providers/vdb/vdb-huawei-cloud/tests/integration_tests/conftest.py diff --git a/api/tests/integration_tests/vdb/huawei/test_huawei_cloud.py b/api/providers/vdb/vdb-huawei-cloud/tests/integration_tests/test_huawei_cloud.py similarity index 69% rename from api/tests/integration_tests/vdb/huawei/test_huawei_cloud.py rename to api/providers/vdb/vdb-huawei-cloud/tests/integration_tests/test_huawei_cloud.py index 01f511358a..bb5f5b72ef 100644 --- a/api/tests/integration_tests/vdb/huawei/test_huawei_cloud.py +++ b/api/providers/vdb/vdb-huawei-cloud/tests/integration_tests/test_huawei_cloud.py @@ -1,10 +1,6 @@ -from core.rag.datasource.vdb.huawei.huawei_cloud_vector import HuaweiCloudVector, HuaweiCloudVectorConfig -from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text +from dify_vdb_huawei_cloud.huawei_cloud_vector import HuaweiCloudVector, HuaweiCloudVectorConfig -pytest_plugins = ( - "tests.integration_tests.vdb.test_vector_store", - "tests.integration_tests.vdb.__mock.huaweicloudvectordb", -) +from core.rag.datasource.vdb.vector_integration_test_support import AbstractVectorTest, get_example_text class HuaweiCloudVectorTest(AbstractVectorTest): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/huawei/test_huawei_cloud_vector.py b/api/providers/vdb/vdb-huawei-cloud/tests/unit_tests/test_huawei_cloud_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/huawei/test_huawei_cloud_vector.py rename to api/providers/vdb/vdb-huawei-cloud/tests/unit_tests/test_huawei_cloud_vector.py index 9d23dfcf63..ba3f14912b 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/huawei/test_huawei_cloud_vector.py +++ b/api/providers/vdb/vdb-huawei-cloud/tests/unit_tests/test_huawei_cloud_vector.py @@ -33,7 +33,7 @@ def huawei_module(monkeypatch): for name, module in _build_fake_elasticsearch_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.huawei.huawei_cloud_vector as module + import dify_vdb_huawei_cloud.huawei_cloud_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-iris/pyproject.toml b/api/providers/vdb/vdb-iris/pyproject.toml new file mode 100644 index 0000000000..6dd7a8e073 --- /dev/null +++ b/api/providers/vdb/vdb-iris/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-iris" +version = "0.0.1" + +dependencies = [ + "intersystems-irispython>=5.1.0", +] +description = "Dify vector store backend (dify-vdb-iris)." + +[project.entry-points."dify.vector_backends"] +iris = "dify_vdb_iris.iris_vector:IrisVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/iris/__init__.py b/api/providers/vdb/vdb-iris/src/dify_vdb_iris/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/iris/__init__.py rename to api/providers/vdb/vdb-iris/src/dify_vdb_iris/__init__.py diff --git a/api/core/rag/datasource/vdb/iris/iris_vector.py b/api/providers/vdb/vdb-iris/src/dify_vdb_iris/iris_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/iris/iris_vector.py rename to api/providers/vdb/vdb-iris/src/dify_vdb_iris/iris_vector.py diff --git a/api/tests/integration_tests/vdb/iris/test_iris.py b/api/providers/vdb/vdb-iris/tests/integration_tests/test_iris.py similarity index 85% rename from api/tests/integration_tests/vdb/iris/test_iris.py rename to api/providers/vdb/vdb-iris/tests/integration_tests/test_iris.py index 4b2da8387b..8281e89c8a 100644 --- a/api/tests/integration_tests/vdb/iris/test_iris.py +++ b/api/providers/vdb/vdb-iris/tests/integration_tests/test_iris.py @@ -1,12 +1,11 @@ """Integration tests for IRIS vector database.""" -from core.rag.datasource.vdb.iris.iris_vector import IrisVector, IrisVectorConfig -from tests.integration_tests.vdb.test_vector_store import ( +from dify_vdb_iris.iris_vector import IrisVector, IrisVectorConfig + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) - class IrisVectorTest(AbstractVectorTest): """Test suite for IRIS vector store implementation.""" diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/iris/test_iris_vector.py b/api/providers/vdb/vdb-iris/tests/unit_tests/test_iris_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/iris/test_iris_vector.py rename to api/providers/vdb/vdb-iris/tests/unit_tests/test_iris_vector.py index 63338ca809..8c038e82b9 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/iris/test_iris_vector.py +++ b/api/providers/vdb/vdb-iris/tests/unit_tests/test_iris_vector.py @@ -26,7 +26,7 @@ def _build_fake_iris_module(): def iris_module(monkeypatch): monkeypatch.setitem(sys.modules, "iris", _build_fake_iris_module()) - import core.rag.datasource.vdb.iris.iris_vector as module + import dify_vdb_iris.iris_vector as module reloaded = importlib.reload(module) reloaded._pool_instance = None diff --git a/api/providers/vdb/vdb-lindorm/pyproject.toml b/api/providers/vdb/vdb-lindorm/pyproject.toml new file mode 100644 index 0000000000..0cffc67491 --- /dev/null +++ b/api/providers/vdb/vdb-lindorm/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "dify-vdb-lindorm" +version = "0.0.1" + +dependencies = [ + "opensearch-py==3.1.0", + "tenacity>=8.0.0", +] +description = "Dify vector store backend (dify-vdb-lindorm)." + +[project.entry-points."dify.vector_backends"] +lindorm = "dify_vdb_lindorm.lindorm_vector:LindormVectorStoreFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/lindorm/__init__.py b/api/providers/vdb/vdb-lindorm/src/dify_vdb_lindorm/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/lindorm/__init__.py rename to api/providers/vdb/vdb-lindorm/src/dify_vdb_lindorm/__init__.py diff --git a/api/core/rag/datasource/vdb/lindorm/lindorm_vector.py b/api/providers/vdb/vdb-lindorm/src/dify_vdb_lindorm/lindorm_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/lindorm/lindorm_vector.py rename to api/providers/vdb/vdb-lindorm/src/dify_vdb_lindorm/lindorm_vector.py diff --git a/api/tests/integration_tests/vdb/lindorm/test_lindorm.py b/api/providers/vdb/vdb-lindorm/tests/integration_tests/test_lindorm.py similarity index 88% rename from api/tests/integration_tests/vdb/lindorm/test_lindorm.py rename to api/providers/vdb/vdb-lindorm/tests/integration_tests/test_lindorm.py index b24498fdfd..0a0c2d2d59 100644 --- a/api/tests/integration_tests/vdb/lindorm/test_lindorm.py +++ b/api/providers/vdb/vdb-lindorm/tests/integration_tests/test_lindorm.py @@ -1,9 +1,8 @@ import os -from core.rag.datasource.vdb.lindorm.lindorm_vector import LindormVectorStore, LindormVectorStoreConfig -from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest +from dify_vdb_lindorm.lindorm_vector import LindormVectorStore, LindormVectorStoreConfig -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) +from core.rag.datasource.vdb.vector_integration_test_support import AbstractVectorTest class Config: diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/lindorm/test_lindorm_vector.py b/api/providers/vdb/vdb-lindorm/tests/unit_tests/test_lindorm_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/lindorm/test_lindorm_vector.py rename to api/providers/vdb/vdb-lindorm/tests/unit_tests/test_lindorm_vector.py index 34357d5907..238145c1d6 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/lindorm/test_lindorm_vector.py +++ b/api/providers/vdb/vdb-lindorm/tests/unit_tests/test_lindorm_vector.py @@ -51,7 +51,7 @@ def lindorm_module(monkeypatch): for name, module in _build_fake_opensearch_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.lindorm.lindorm_vector as module + import dify_vdb_lindorm.lindorm_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-matrixone/pyproject.toml b/api/providers/vdb/vdb-matrixone/pyproject.toml new file mode 100644 index 0000000000..53363ed7d9 --- /dev/null +++ b/api/providers/vdb/vdb-matrixone/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-matrixone" +version = "0.0.1" + +dependencies = [ + "mo-vector~=0.1.13", +] +description = "Dify vector store backend (dify-vdb-matrixone)." + +[project.entry-points."dify.vector_backends"] +matrixone = "dify_vdb_matrixone.matrixone_vector:MatrixoneVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/matrixone/__init__.py b/api/providers/vdb/vdb-matrixone/src/dify_vdb_matrixone/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/matrixone/__init__.py rename to api/providers/vdb/vdb-matrixone/src/dify_vdb_matrixone/__init__.py diff --git a/api/core/rag/datasource/vdb/matrixone/matrixone_vector.py b/api/providers/vdb/vdb-matrixone/src/dify_vdb_matrixone/matrixone_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/matrixone/matrixone_vector.py rename to api/providers/vdb/vdb-matrixone/src/dify_vdb_matrixone/matrixone_vector.py diff --git a/api/tests/integration_tests/vdb/matrixone/test_matrixone.py b/api/providers/vdb/vdb-matrixone/tests/integration_tests/test_matrixone.py similarity index 74% rename from api/tests/integration_tests/vdb/matrixone/test_matrixone.py rename to api/providers/vdb/vdb-matrixone/tests/integration_tests/test_matrixone.py index fe592f6699..d6f4781e65 100644 --- a/api/tests/integration_tests/vdb/matrixone/test_matrixone.py +++ b/api/providers/vdb/vdb-matrixone/tests/integration_tests/test_matrixone.py @@ -1,10 +1,9 @@ -from core.rag.datasource.vdb.matrixone.matrixone_vector import MatrixoneConfig, MatrixoneVector -from tests.integration_tests.vdb.test_vector_store import ( +from dify_vdb_matrixone.matrixone_vector import MatrixoneConfig, MatrixoneVector + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) - class MatrixoneVectorTest(AbstractVectorTest): def __init__(self): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/matrixone/test_matrixone_vector.py b/api/providers/vdb/vdb-matrixone/tests/unit_tests/test_matrixone_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/matrixone/test_matrixone_vector.py rename to api/providers/vdb/vdb-matrixone/tests/unit_tests/test_matrixone_vector.py index 55e7b9112e..c22f4304e5 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/matrixone/test_matrixone_vector.py +++ b/api/providers/vdb/vdb-matrixone/tests/unit_tests/test_matrixone_vector.py @@ -36,7 +36,7 @@ def matrixone_module(monkeypatch): for name, module in _build_fake_mo_vector_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.matrixone.matrixone_vector as module + import dify_vdb_matrixone.matrixone_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-milvus/pyproject.toml b/api/providers/vdb/vdb-milvus/pyproject.toml new file mode 100644 index 0000000000..57385a4431 --- /dev/null +++ b/api/providers/vdb/vdb-milvus/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-milvus" +version = "0.0.1" + +dependencies = [ + "pymilvus~=2.6.12", +] +description = "Dify vector store backend (dify-vdb-milvus)." + +[project.entry-points."dify.vector_backends"] +milvus = "dify_vdb_milvus.milvus_vector:MilvusVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/milvus/__init__.py b/api/providers/vdb/vdb-milvus/src/dify_vdb_milvus/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/milvus/__init__.py rename to api/providers/vdb/vdb-milvus/src/dify_vdb_milvus/__init__.py diff --git a/api/core/rag/datasource/vdb/milvus/milvus_vector.py b/api/providers/vdb/vdb-milvus/src/dify_vdb_milvus/milvus_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/milvus/milvus_vector.py rename to api/providers/vdb/vdb-milvus/src/dify_vdb_milvus/milvus_vector.py diff --git a/api/tests/integration_tests/vdb/milvus/test_milvus.py b/api/providers/vdb/vdb-milvus/tests/integration_tests/test_milvus.py similarity index 80% rename from api/tests/integration_tests/vdb/milvus/test_milvus.py rename to api/providers/vdb/vdb-milvus/tests/integration_tests/test_milvus.py index b5fc4b4d10..084d808bed 100644 --- a/api/tests/integration_tests/vdb/milvus/test_milvus.py +++ b/api/providers/vdb/vdb-milvus/tests/integration_tests/test_milvus.py @@ -1,11 +1,10 @@ -from core.rag.datasource.vdb.milvus.milvus_vector import MilvusConfig, MilvusVector -from tests.integration_tests.vdb.test_vector_store import ( +from dify_vdb_milvus.milvus_vector import MilvusConfig, MilvusVector + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, get_example_text, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) - class MilvusVectorTest(AbstractVectorTest): def __init__(self): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/milvus/test_milvus.py b/api/providers/vdb/vdb-milvus/tests/unit_tests/test_milvus.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/milvus/test_milvus.py rename to api/providers/vdb/vdb-milvus/tests/unit_tests/test_milvus.py index 2ac2c40d38..36c0ed8f6f 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/milvus/test_milvus.py +++ b/api/providers/vdb/vdb-milvus/tests/unit_tests/test_milvus.py @@ -103,7 +103,7 @@ def milvus_module(monkeypatch): for name, module in _build_fake_pymilvus_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.milvus.milvus_vector as module + import dify_vdb_milvus.milvus_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-myscale/pyproject.toml b/api/providers/vdb/vdb-myscale/pyproject.toml new file mode 100644 index 0000000000..13e0f35d23 --- /dev/null +++ b/api/providers/vdb/vdb-myscale/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-myscale" +version = "0.0.1" + +dependencies = [ + "clickhouse-connect~=0.15.0", +] +description = "Dify vector store backend (dify-vdb-myscale)." + +[project.entry-points."dify.vector_backends"] +myscale = "dify_vdb_myscale.myscale_vector:MyScaleVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/myscale/__init__.py b/api/providers/vdb/vdb-myscale/src/dify_vdb_myscale/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/myscale/__init__.py rename to api/providers/vdb/vdb-myscale/src/dify_vdb_myscale/__init__.py diff --git a/api/core/rag/datasource/vdb/myscale/myscale_vector.py b/api/providers/vdb/vdb-myscale/src/dify_vdb_myscale/myscale_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/myscale/myscale_vector.py rename to api/providers/vdb/vdb-myscale/src/dify_vdb_myscale/myscale_vector.py diff --git a/api/tests/integration_tests/vdb/myscale/test_myscale.py b/api/providers/vdb/vdb-myscale/tests/integration_tests/test_myscale.py similarity index 76% rename from api/tests/integration_tests/vdb/myscale/test_myscale.py rename to api/providers/vdb/vdb-myscale/tests/integration_tests/test_myscale.py index 74cefad2af..8ea42d5f45 100644 --- a/api/tests/integration_tests/vdb/myscale/test_myscale.py +++ b/api/providers/vdb/vdb-myscale/tests/integration_tests/test_myscale.py @@ -1,10 +1,9 @@ -from core.rag.datasource.vdb.myscale.myscale_vector import MyScaleConfig, MyScaleVector -from tests.integration_tests.vdb.test_vector_store import ( +from dify_vdb_myscale.myscale_vector import MyScaleConfig, MyScaleVector + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) - class MyScaleVectorTest(AbstractVectorTest): def __init__(self): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/myscale/test_myscale_vector.py b/api/providers/vdb/vdb-myscale/tests/unit_tests/test_myscale_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/myscale/test_myscale_vector.py rename to api/providers/vdb/vdb-myscale/tests/unit_tests/test_myscale_vector.py index a75ba82238..228ea92639 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/myscale/test_myscale_vector.py +++ b/api/providers/vdb/vdb-myscale/tests/unit_tests/test_myscale_vector.py @@ -42,7 +42,7 @@ def myscale_module(monkeypatch): fake_module = _build_fake_clickhouse_connect_module() monkeypatch.setitem(sys.modules, "clickhouse_connect", fake_module) - import core.rag.datasource.vdb.myscale.myscale_vector as module + import dify_vdb_myscale.myscale_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-oceanbase/pyproject.toml b/api/providers/vdb/vdb-oceanbase/pyproject.toml new file mode 100644 index 0000000000..887869a41c --- /dev/null +++ b/api/providers/vdb/vdb-oceanbase/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "dify-vdb-oceanbase" +version = "0.0.1" + +dependencies = [ + "pyobvector~=0.2.17", + "mysql-connector-python>=9.3.0", +] +description = "Dify vector store backend (dify-vdb-oceanbase)." + +[project.entry-points."dify.vector_backends"] +oceanbase = "dify_vdb_oceanbase.oceanbase_vector:OceanBaseVectorFactory" +seekdb = "dify_vdb_oceanbase.oceanbase_vector:OceanBaseVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/oceanbase/__init__.py b/api/providers/vdb/vdb-oceanbase/src/dify_vdb_oceanbase/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/oceanbase/__init__.py rename to api/providers/vdb/vdb-oceanbase/src/dify_vdb_oceanbase/__init__.py diff --git a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py b/api/providers/vdb/vdb-oceanbase/src/dify_vdb_oceanbase/oceanbase_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py rename to api/providers/vdb/vdb-oceanbase/src/dify_vdb_oceanbase/oceanbase_vector.py diff --git a/api/tests/integration_tests/vdb/oceanbase/bench_oceanbase.py b/api/providers/vdb/vdb-oceanbase/tests/integration_tests/bench_oceanbase.py similarity index 87% rename from api/tests/integration_tests/vdb/oceanbase/bench_oceanbase.py rename to api/providers/vdb/vdb-oceanbase/tests/integration_tests/bench_oceanbase.py index 8b57be08c5..50f6736942 100644 --- a/api/tests/integration_tests/vdb/oceanbase/bench_oceanbase.py +++ b/api/providers/vdb/vdb-oceanbase/tests/integration_tests/bench_oceanbase.py @@ -2,11 +2,12 @@ Benchmark: OceanBase vector store — old (single-row) vs new (batch) insertion, metadata query with/without functional index, and vector search across metrics. -Usage: - uv run --project api python -m tests.integration_tests.vdb.oceanbase.bench_oceanbase +Usage (from repo root): + uv run --project api python api/packages/dify-vdb-oceanbase/tests/bench_oceanbase.py """ import json +import logging import random import statistics import time @@ -16,6 +17,8 @@ from pyobvector import VECTOR, ObVecClient, cosine_distance, inner_product, l2_d from sqlalchemy import JSON, Column, String, text from sqlalchemy.dialects.mysql import LONGTEXT +logger = logging.getLogger(__name__) + # --------------------------------------------------------------------------- # Config # --------------------------------------------------------------------------- @@ -114,7 +117,7 @@ def bench_metadata_query(client, table, doc_id, with_index=False): try: client.perform_raw_text_sql(f"CREATE INDEX idx_metadata_doc_id ON `{table}` ((metadata->>'$.document_id'))") except Exception: - pass # already exists + logger.debug("Index idx_metadata_doc_id already exists, skipping creation") sql = text(f"SELECT id FROM `{table}` WHERE metadata->>'$.document_id' = :val") times = [] @@ -164,11 +167,11 @@ def main(): client = _make_client() client_pooled = _make_client(pool_size=5, max_overflow=10, pool_recycle=3600, pool_pre_ping=True) - print("=" * 70) - print("OceanBase Vector Store — Performance Benchmark") - print(f" Endpoint : {HOST}:{PORT}") - print(f" Vec dim : {VEC_DIM}") - print("=" * 70) + logger.info("=" * 70) + logger.info("OceanBase Vector Store — Performance Benchmark") + logger.info(" Endpoint : %s:%s", HOST, PORT) + logger.info(" Vec dim : %s", VEC_DIM) + logger.info("=" * 70) # ------------------------------------------------------------------ # 1. Insertion benchmark @@ -187,10 +190,10 @@ def main(): t_batch = bench_insert_batch(client_pooled, tbl_batch, rows, batch_size=100) speedup = t_single / t_batch if t_batch > 0 else float("inf") - print(f"\n[Insert {n_docs} docs]") - print(f" Single-row : {t_single:.2f}s") - print(f" Batch(100) : {t_batch:.2f}s") - print(f" Speedup : {speedup:.1f}x") + logger.info("\n[Insert %s docs]", n_docs) + logger.info(" Single-row : %.2fs", t_single) + logger.info(" Batch(100) : %.2fs", t_batch) + logger.info(" Speedup : %.1fx", speedup) # ------------------------------------------------------------------ # 2. Metadata query benchmark (use the 1000-doc batch table) @@ -203,16 +206,16 @@ def main(): res = conn.execute(text(f"SELECT metadata->>'$.document_id' FROM `{tbl_meta}` LIMIT 1")) doc_id_1000 = res.fetchone()[0] - print("\n[Metadata filter query — 1000 rows, by document_id]") + logger.info("\n[Metadata filter query — 1000 rows, by document_id]") times_no_idx = bench_metadata_query(client, tbl_meta, doc_id_1000, with_index=False) - print(f" Without index : {_fmt(times_no_idx)}") + logger.info(" Without index : %s", _fmt(times_no_idx)) times_with_idx = bench_metadata_query(client, tbl_meta, doc_id_1000, with_index=True) - print(f" With index : {_fmt(times_with_idx)}") + logger.info(" With index : %s", _fmt(times_with_idx)) # ------------------------------------------------------------------ # 3. Vector search benchmark — across metrics # ------------------------------------------------------------------ - print("\n[Vector search — top-10, 20 queries each, on 1000 rows]") + logger.info("\n[Vector search — top-10, 20 queries each, on 1000 rows]") for metric in ["l2", "cosine", "inner_product"]: tbl_vs = f"bench_vs_{metric}" @@ -222,7 +225,7 @@ def main(): rows_vs, _ = _gen_rows(1000) bench_insert_batch(client_pooled, tbl_vs, rows_vs, batch_size=100) times = bench_vector_search(client_pooled, tbl_vs, metric, topk=10, n_queries=20) - print(f" {metric:15s}: {_fmt(times)}") + logger.info(" %-15s: %s", metric, _fmt(times)) _drop(client_pooled, tbl_vs) # ------------------------------------------------------------------ @@ -232,9 +235,9 @@ def main(): _drop(client, f"bench_single_{n}") _drop(client, f"bench_batch_{n}") - print("\n" + "=" * 70) - print("Benchmark complete.") - print("=" * 70) + logger.info("\n%s", "=" * 70) + logger.info("Benchmark complete.") + logger.info("=" * 70) if __name__ == "__main__": diff --git a/api/tests/integration_tests/vdb/oceanbase/test_oceanbase.py b/api/providers/vdb/vdb-oceanbase/tests/integration_tests/test_oceanbase.py similarity index 82% rename from api/tests/integration_tests/vdb/oceanbase/test_oceanbase.py rename to api/providers/vdb/vdb-oceanbase/tests/integration_tests/test_oceanbase.py index 410de2c5ad..28f22d3cbc 100644 --- a/api/tests/integration_tests/vdb/oceanbase/test_oceanbase.py +++ b/api/providers/vdb/vdb-oceanbase/tests/integration_tests/test_oceanbase.py @@ -1,15 +1,13 @@ import pytest - -from core.rag.datasource.vdb.oceanbase.oceanbase_vector import ( +from dify_vdb_oceanbase.oceanbase_vector import ( OceanBaseVector, OceanBaseVectorConfig, ) -from tests.integration_tests.vdb.test_vector_store import ( + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) - @pytest.fixture def oceanbase_vector(): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/oceanbase/test_oceanbase_vector.py b/api/providers/vdb/vdb-oceanbase/tests/unit_tests/test_oceanbase_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/oceanbase/test_oceanbase_vector.py rename to api/providers/vdb/vdb-oceanbase/tests/unit_tests/test_oceanbase_vector.py index 27d8198ec0..31f9ff3e56 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/oceanbase/test_oceanbase_vector.py +++ b/api/providers/vdb/vdb-oceanbase/tests/unit_tests/test_oceanbase_vector.py @@ -56,7 +56,7 @@ def _build_fake_pyobvector_module(): def oceanbase_module(monkeypatch): monkeypatch.setitem(sys.modules, "pyobvector", _build_fake_pyobvector_module()) - import core.rag.datasource.vdb.oceanbase.oceanbase_vector as module + import dify_vdb_oceanbase.oceanbase_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-opengauss/pyproject.toml b/api/providers/vdb/vdb-opengauss/pyproject.toml new file mode 100644 index 0000000000..79be94b9e3 --- /dev/null +++ b/api/providers/vdb/vdb-opengauss/pyproject.toml @@ -0,0 +1,12 @@ +[project] +name = "dify-vdb-opengauss" +version = "0.0.1" + +dependencies = [] +description = "Dify vector store backend (dify-vdb-opengauss)." + +[project.entry-points."dify.vector_backends"] +opengauss = "dify_vdb_opengauss.opengauss:OpenGaussFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/opengauss/__init__.py b/api/providers/vdb/vdb-opengauss/src/dify_vdb_opengauss/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/opengauss/__init__.py rename to api/providers/vdb/vdb-opengauss/src/dify_vdb_opengauss/__init__.py diff --git a/api/core/rag/datasource/vdb/opengauss/opengauss.py b/api/providers/vdb/vdb-opengauss/src/dify_vdb_opengauss/opengauss.py similarity index 100% rename from api/core/rag/datasource/vdb/opengauss/opengauss.py rename to api/providers/vdb/vdb-opengauss/src/dify_vdb_opengauss/opengauss.py diff --git a/api/tests/integration_tests/vdb/opengauss/test_opengauss.py b/api/providers/vdb/vdb-opengauss/tests/integration_tests/test_opengauss.py similarity index 82% rename from api/tests/integration_tests/vdb/opengauss/test_opengauss.py rename to api/providers/vdb/vdb-opengauss/tests/integration_tests/test_opengauss.py index 78436a19ee..8b444527d7 100644 --- a/api/tests/integration_tests/vdb/opengauss/test_opengauss.py +++ b/api/providers/vdb/vdb-opengauss/tests/integration_tests/test_opengauss.py @@ -1,14 +1,12 @@ import time import psycopg2 +from dify_vdb_opengauss.opengauss import OpenGauss, OpenGaussConfig -from core.rag.datasource.vdb.opengauss.opengauss import OpenGauss, OpenGaussConfig -from tests.integration_tests.vdb.test_vector_store import ( +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) - class OpenGaussTest(AbstractVectorTest): def __init__(self): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/opengauss/test_opengauss.py b/api/providers/vdb/vdb-opengauss/tests/unit_tests/test_opengauss.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/opengauss/test_opengauss.py rename to api/providers/vdb/vdb-opengauss/tests/unit_tests/test_opengauss.py index 6641dbe4a0..09abd625fc 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/opengauss/test_opengauss.py +++ b/api/providers/vdb/vdb-opengauss/tests/unit_tests/test_opengauss.py @@ -41,7 +41,7 @@ def opengauss_module(monkeypatch): for name, module in _build_fake_psycopg2_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.opengauss.opengauss as module + import dify_vdb_opengauss.opengauss as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-opensearch/pyproject.toml b/api/providers/vdb/vdb-opensearch/pyproject.toml new file mode 100644 index 0000000000..56f303fdf5 --- /dev/null +++ b/api/providers/vdb/vdb-opensearch/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-opensearch" +version = "0.0.1" + +dependencies = [ + "opensearch-py==3.1.0", +] +description = "Dify vector store backend (dify-vdb-opensearch)." + +[project.entry-points."dify.vector_backends"] +opensearch = "dify_vdb_opensearch.opensearch_vector:OpenSearchVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/opensearch/__init__.py b/api/providers/vdb/vdb-opensearch/src/dify_vdb_opensearch/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/opensearch/__init__.py rename to api/providers/vdb/vdb-opensearch/src/dify_vdb_opensearch/__init__.py diff --git a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py b/api/providers/vdb/vdb-opensearch/src/dify_vdb_opensearch/opensearch_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/opensearch/opensearch_vector.py rename to api/providers/vdb/vdb-opensearch/src/dify_vdb_opensearch/opensearch_vector.py diff --git a/api/providers/vdb/vdb-opensearch/tests/unit_tests/test_opensearch.py b/api/providers/vdb/vdb-opensearch/tests/unit_tests/test_opensearch.py new file mode 100644 index 0000000000..f2ed7cb6fb --- /dev/null +++ b/api/providers/vdb/vdb-opensearch/tests/unit_tests/test_opensearch.py @@ -0,0 +1,332 @@ +import importlib +import sys +import types +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from core.rag.datasource.vdb.field import Field +from core.rag.models.document import Document +from extensions import ext_redis + + +def _build_fake_opensearch_modules(): + """Build fake opensearchpy modules to avoid the ``from events import Events`` + namespace collision (opensearch-py #756).""" + opensearchpy = types.ModuleType("opensearchpy") + opensearchpy_helpers = types.ModuleType("opensearchpy.helpers") + + class BulkIndexError(Exception): + def __init__(self, errors): + super().__init__("bulk error") + self.errors = errors + + class Urllib3AWSV4SignerAuth: + def __init__(self, credentials, region, service): + self.credentials = credentials + self.region = region + self.service = service + + class Urllib3HttpConnection: + pass + + class _IndicesClient: + def __init__(self): + self.exists = MagicMock(return_value=False) + self.create = MagicMock() + self.delete = MagicMock() + + class OpenSearch: + def __init__(self, **kwargs): + self.kwargs = kwargs + self.indices = _IndicesClient() + self.search = MagicMock(return_value={"hits": {"hits": []}}) + self.get = MagicMock() + + helpers = SimpleNamespace(bulk=MagicMock()) + + opensearchpy.OpenSearch = OpenSearch + opensearchpy.Urllib3AWSV4SignerAuth = Urllib3AWSV4SignerAuth + opensearchpy.Urllib3HttpConnection = Urllib3HttpConnection + opensearchpy.helpers = helpers + opensearchpy_helpers.BulkIndexError = BulkIndexError + + return { + "opensearchpy": opensearchpy, + "opensearchpy.helpers": opensearchpy_helpers, + } + + +@pytest.fixture +def opensearch_module(monkeypatch): + for name, module in _build_fake_opensearch_modules().items(): + monkeypatch.setitem(sys.modules, name, module) + + import dify_vdb_opensearch.opensearch_vector as module + + return importlib.reload(module) + + +def _config(module, **overrides): + values = { + "host": "localhost", + "port": 9200, + "secure": False, + "user": "admin", + "password": "password", + } + values.update(overrides) + return module.OpenSearchConfig.model_validate(values) + + +def get_example_text() -> str: + return "This is a sample text for testing purposes." + + +class TestOpenSearchConfig: + def test_to_opensearch_params(self, opensearch_module): + config = _config(opensearch_module, secure=True) + params = config.to_opensearch_params() + + assert params["hosts"] == [{"host": "localhost", "port": 9200}] + assert params["use_ssl"] is True + assert params["verify_certs"] is True + assert params["connection_class"].__name__ == "Urllib3HttpConnection" + assert params["http_auth"] == ("admin", "password") + + def test_to_opensearch_params_with_aws_managed_iam(self, opensearch_module, monkeypatch): + class _Session: + def get_credentials(self): + return "creds" + + boto3 = types.ModuleType("boto3") + boto3.Session = _Session + monkeypatch.setitem(sys.modules, "boto3", boto3) + + config = _config( + opensearch_module, + secure=True, + auth_method="aws_managed_iam", + aws_region="ap-southeast-2", + aws_service="aoss", + host="aoss-endpoint.ap-southeast-2.aoss.amazonaws.com", + port=9201, + ) + params = config.to_opensearch_params() + + assert params["hosts"] == [{"host": "aoss-endpoint.ap-southeast-2.aoss.amazonaws.com", "port": 9201}] + assert params["use_ssl"] is True + assert params["verify_certs"] is True + assert params["connection_class"].__name__ == "Urllib3HttpConnection" + assert params["http_auth"].credentials == "creds" + assert params["http_auth"].region == "ap-southeast-2" + assert params["http_auth"].service == "aoss" + + +class TestOpenSearchVector: + COLLECTION_NAME = "test_collection" + EXAMPLE_DOC_ID = "example_doc_id" + + def _make_vector(self, module): + vector = module.OpenSearchVector(self.COLLECTION_NAME, _config(module)) + vector._client = MagicMock() + return vector + + @pytest.mark.parametrize( + ("search_response", "expected_length", "expected_doc_id"), + [ + ( + { + "hits": { + "total": {"value": 1}, + "hits": [ + { + "_source": { + "page_content": get_example_text(), + "metadata": {"document_id": "example_doc_id"}, + } + } + ], + } + }, + 1, + "example_doc_id", + ), + ({"hits": {"total": {"value": 0}, "hits": []}}, 0, None), + ], + ) + def test_search_by_full_text(self, opensearch_module, search_response, expected_length, expected_doc_id): + vector = self._make_vector(opensearch_module) + vector._client.search.return_value = search_response + + hits = vector.search_by_full_text(query=get_example_text()) + assert len(hits) == expected_length + if expected_length > 0: + assert hits[0].metadata["document_id"] == expected_doc_id + + def test_search_by_vector(self, opensearch_module): + vector = self._make_vector(opensearch_module) + query_vector = [0.1] * 128 + mock_response = { + "hits": { + "total": {"value": 1}, + "hits": [ + { + "_source": { + Field.CONTENT_KEY: get_example_text(), + Field.METADATA_KEY: {"document_id": self.EXAMPLE_DOC_ID}, + }, + "_score": 1.0, + } + ], + } + } + vector._client.search.return_value = mock_response + + hits = vector.search_by_vector(query_vector=query_vector) + + assert len(hits) > 0 + assert hits[0].metadata["document_id"] == self.EXAMPLE_DOC_ID + + def test_get_ids_by_metadata_field(self, opensearch_module): + vector = self._make_vector(opensearch_module) + mock_response = {"hits": {"total": {"value": 1}, "hits": [{"_id": "mock_id"}]}} + vector._client.search.return_value = mock_response + + doc = Document(page_content="Test content", metadata={"document_id": self.EXAMPLE_DOC_ID}) + embedding = [0.1] * 128 + + opensearch_module.helpers.bulk.reset_mock() + vector.add_texts([doc], [embedding]) + + ids = vector.get_ids_by_metadata_field(key="document_id", value=self.EXAMPLE_DOC_ID) + assert len(ids) == 1 + assert ids[0] == "mock_id" + + def test_add_texts(self, opensearch_module): + vector = self._make_vector(opensearch_module) + vector._client.index.return_value = {"result": "created"} + + doc = Document(page_content="Test content", metadata={"document_id": self.EXAMPLE_DOC_ID}) + embedding = [0.1] * 128 + + opensearch_module.helpers.bulk.reset_mock() + vector.add_texts([doc], [embedding]) + + mock_response = {"hits": {"total": {"value": 1}, "hits": [{"_id": "mock_id"}]}} + vector._client.search.return_value = mock_response + + ids = vector.get_ids_by_metadata_field(key="document_id", value=self.EXAMPLE_DOC_ID) + assert len(ids) == 1 + assert ids[0] == "mock_id" + + def test_delete_nonexistent_index(self, opensearch_module): + """ignore_unavailable=True handles non-existent indices gracefully.""" + vector = self._make_vector(opensearch_module) + vector.delete() + + vector._client.indices.delete.assert_called_once_with( + index=self.COLLECTION_NAME.lower(), ignore_unavailable=True + ) + + def test_delete_existing_index(self, opensearch_module): + vector = self._make_vector(opensearch_module) + vector.delete() + + vector._client.indices.delete.assert_called_once_with( + index=self.COLLECTION_NAME.lower(), ignore_unavailable=True + ) + + +@pytest.fixture(scope="module") +def setup_mock_redis(): + ext_redis.redis_client.get = MagicMock(return_value=None) + ext_redis.redis_client.set = MagicMock(return_value=None) + + mock_redis_lock = MagicMock() + mock_redis_lock.__enter__ = MagicMock() + mock_redis_lock.__exit__ = MagicMock() + ext_redis.redis_client.lock = MagicMock(return_value=mock_redis_lock) + + +@pytest.mark.usefixtures("setup_mock_redis") +class TestOpenSearchVectorWithRedis: + COLLECTION_NAME = "test_collection" + EXAMPLE_DOC_ID = "example_doc_id" + + def _make_vector(self, module): + vector = module.OpenSearchVector(self.COLLECTION_NAME, _config(module)) + vector._client = MagicMock() + return vector + + def test_search_by_full_text(self, opensearch_module): + vector = self._make_vector(opensearch_module) + search_response = { + "hits": { + "total": {"value": 1}, + "hits": [ + {"_source": {"page_content": get_example_text(), "metadata": {"document_id": "example_doc_id"}}} + ], + } + } + vector._client.search.return_value = search_response + + hits = vector.search_by_full_text(query=get_example_text()) + assert len(hits) == 1 + assert hits[0].metadata["document_id"] == "example_doc_id" + + def test_get_ids_by_metadata_field(self, opensearch_module): + vector = self._make_vector(opensearch_module) + mock_response = {"hits": {"total": {"value": 1}, "hits": [{"_id": "mock_id"}]}} + vector._client.search.return_value = mock_response + + doc = Document(page_content="Test content", metadata={"document_id": self.EXAMPLE_DOC_ID}) + embedding = [0.1] * 128 + + opensearch_module.helpers.bulk.reset_mock() + vector.add_texts([doc], [embedding]) + + ids = vector.get_ids_by_metadata_field(key="document_id", value=self.EXAMPLE_DOC_ID) + assert len(ids) == 1 + assert ids[0] == "mock_id" + + def test_add_texts(self, opensearch_module): + vector = self._make_vector(opensearch_module) + vector._client.index.return_value = {"result": "created"} + + doc = Document(page_content="Test content", metadata={"document_id": self.EXAMPLE_DOC_ID}) + embedding = [0.1] * 128 + + opensearch_module.helpers.bulk.reset_mock() + vector.add_texts([doc], [embedding]) + + mock_response = {"hits": {"total": {"value": 1}, "hits": [{"_id": "mock_id"}]}} + vector._client.search.return_value = mock_response + + ids = vector.get_ids_by_metadata_field(key="document_id", value=self.EXAMPLE_DOC_ID) + assert len(ids) == 1 + assert ids[0] == "mock_id" + + def test_search_by_vector(self, opensearch_module): + vector = self._make_vector(opensearch_module) + query_vector = [0.1] * 128 + mock_response = { + "hits": { + "total": {"value": 1}, + "hits": [ + { + "_source": { + Field.CONTENT_KEY: get_example_text(), + Field.METADATA_KEY: {"document_id": self.EXAMPLE_DOC_ID}, + }, + "_score": 1.0, + } + ], + } + } + vector._client.search.return_value = mock_response + + hits = vector.search_by_vector(query_vector=query_vector) + assert len(hits) > 0 + assert hits[0].metadata["document_id"] == self.EXAMPLE_DOC_ID diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/opensearch/test_opensearch_vector.py b/api/providers/vdb/vdb-opensearch/tests/unit_tests/test_opensearch_vector.py similarity index 98% rename from api/tests/unit_tests/core/rag/datasource/vdb/opensearch/test_opensearch_vector.py rename to api/providers/vdb/vdb-opensearch/tests/unit_tests/test_opensearch_vector.py index 1030158dd1..1c2921f85b 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/opensearch/test_opensearch_vector.py +++ b/api/providers/vdb/vdb-opensearch/tests/unit_tests/test_opensearch_vector.py @@ -10,6 +10,8 @@ from pydantic import ValidationError from core.rag.models.document import Document +# TODO(wylswz): There's a known issue with namespace collision +# https://github.com/langgenius/dify/issues/34732 def _build_fake_opensearch_modules(): opensearchpy = types.ModuleType("opensearchpy") opensearchpy_helpers = types.ModuleType("opensearchpy.helpers") @@ -60,7 +62,7 @@ def opensearch_module(monkeypatch): for name, module in _build_fake_opensearch_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.opensearch.opensearch_vector as module + import dify_vdb_opensearch.opensearch_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-oracle/pyproject.toml b/api/providers/vdb/vdb-oracle/pyproject.toml new file mode 100644 index 0000000000..6747485041 --- /dev/null +++ b/api/providers/vdb/vdb-oracle/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-oracle" +version = "0.0.1" + +dependencies = [ + "oracledb==3.4.2", +] +description = "Dify vector store backend (dify-vdb-oracle)." + +[project.entry-points."dify.vector_backends"] +oracle = "dify_vdb_oracle.oraclevector:OracleVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/oracle/__init__.py b/api/providers/vdb/vdb-oracle/src/dify_vdb_oracle/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/oracle/__init__.py rename to api/providers/vdb/vdb-oracle/src/dify_vdb_oracle/__init__.py diff --git a/api/core/rag/datasource/vdb/oracle/oraclevector.py b/api/providers/vdb/vdb-oracle/src/dify_vdb_oracle/oraclevector.py similarity index 100% rename from api/core/rag/datasource/vdb/oracle/oraclevector.py rename to api/providers/vdb/vdb-oracle/src/dify_vdb_oracle/oraclevector.py diff --git a/api/tests/integration_tests/vdb/oracle/test_oraclevector.py b/api/providers/vdb/vdb-oracle/tests/integration_tests/test_oraclevector.py similarity index 76% rename from api/tests/integration_tests/vdb/oracle/test_oraclevector.py rename to api/providers/vdb/vdb-oracle/tests/integration_tests/test_oraclevector.py index 8920dc97eb..aceb41289c 100644 --- a/api/tests/integration_tests/vdb/oracle/test_oraclevector.py +++ b/api/providers/vdb/vdb-oracle/tests/integration_tests/test_oraclevector.py @@ -1,11 +1,10 @@ -from core.rag.datasource.vdb.oracle.oraclevector import OracleVector, OracleVectorConfig -from core.rag.models.document import Document -from tests.integration_tests.vdb.test_vector_store import ( +from dify_vdb_oracle.oraclevector import OracleVector, OracleVectorConfig + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, get_example_text, ) - -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) +from core.rag.models.document import Document class OracleVectorTest(AbstractVectorTest): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/oracle/test_oraclevector.py b/api/providers/vdb/vdb-oracle/tests/unit_tests/test_oraclevector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/oracle/test_oraclevector.py rename to api/providers/vdb/vdb-oracle/tests/unit_tests/test_oraclevector.py index 817a7d342b..678cf876b0 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/oracle/test_oraclevector.py +++ b/api/providers/vdb/vdb-oracle/tests/unit_tests/test_oraclevector.py @@ -55,7 +55,7 @@ def oracle_module(monkeypatch): for name, module in _build_fake_oracle_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.oracle.oraclevector as module + import dify_vdb_oracle.oraclevector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-pgvecto-rs/pyproject.toml b/api/providers/vdb/vdb-pgvecto-rs/pyproject.toml new file mode 100644 index 0000000000..9a25442e9e --- /dev/null +++ b/api/providers/vdb/vdb-pgvecto-rs/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-pgvecto-rs" +version = "0.0.1" + +dependencies = [ + "pgvecto-rs[sqlalchemy]~=0.2.2", +] +description = "Dify vector store backend (dify-vdb-pgvecto-rs)." + +[project.entry-points."dify.vector_backends"] +pgvecto-rs = "dify_vdb_pgvecto_rs.pgvecto_rs:PGVectoRSFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/pgvecto_rs/__init__.py b/api/providers/vdb/vdb-pgvecto-rs/src/dify_vdb_pgvecto_rs/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/pgvecto_rs/__init__.py rename to api/providers/vdb/vdb-pgvecto-rs/src/dify_vdb_pgvecto_rs/__init__.py diff --git a/api/core/rag/datasource/vdb/pgvecto_rs/collection.py b/api/providers/vdb/vdb-pgvecto-rs/src/dify_vdb_pgvecto_rs/collection.py similarity index 100% rename from api/core/rag/datasource/vdb/pgvecto_rs/collection.py rename to api/providers/vdb/vdb-pgvecto-rs/src/dify_vdb_pgvecto_rs/collection.py diff --git a/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py b/api/providers/vdb/vdb-pgvecto-rs/src/dify_vdb_pgvecto_rs/pgvecto_rs.py similarity index 99% rename from api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py rename to api/providers/vdb/vdb-pgvecto-rs/src/dify_vdb_pgvecto_rs/pgvecto_rs.py index 387e918c76..2f52af5681 100644 --- a/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py +++ b/api/providers/vdb/vdb-pgvecto-rs/src/dify_vdb_pgvecto_rs/pgvecto_rs.py @@ -12,12 +12,12 @@ from sqlalchemy.dialects import postgresql from sqlalchemy.orm import Mapped, Session, mapped_column, sessionmaker from configs import dify_config -from core.rag.datasource.vdb.pgvecto_rs.collection import CollectionORM from core.rag.datasource.vdb.vector_base import BaseVector from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory from core.rag.datasource.vdb.vector_type import VectorType from core.rag.embedding.embedding_base import Embeddings from core.rag.models.document import Document +from dify_vdb_pgvecto_rs.collection import CollectionORM from extensions.ext_redis import redis_client from models.dataset import Dataset diff --git a/api/tests/integration_tests/vdb/pgvecto_rs/test_pgvecto_rs.py b/api/providers/vdb/vdb-pgvecto-rs/tests/integration_tests/test_pgvecto_rs.py similarity index 82% rename from api/tests/integration_tests/vdb/pgvecto_rs/test_pgvecto_rs.py rename to api/providers/vdb/vdb-pgvecto-rs/tests/integration_tests/test_pgvecto_rs.py index 6210613d42..9fc8627851 100644 --- a/api/tests/integration_tests/vdb/pgvecto_rs/test_pgvecto_rs.py +++ b/api/providers/vdb/vdb-pgvecto-rs/tests/integration_tests/test_pgvecto_rs.py @@ -1,11 +1,10 @@ -from core.rag.datasource.vdb.pgvecto_rs.pgvecto_rs import PGVectoRS, PgvectoRSConfig -from tests.integration_tests.vdb.test_vector_store import ( +from dify_vdb_pgvecto_rs.pgvecto_rs import PGVectoRS, PgvectoRSConfig + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, get_example_text, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) - class PGVectoRSVectorTest(AbstractVectorTest): def __init__(self): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/pgvecto_rs/test_pgvecto_rs.py b/api/providers/vdb/vdb-pgvecto-rs/tests/unit_tests/test_pgvecto_rs.py similarity index 98% rename from api/tests/unit_tests/core/rag/datasource/vdb/pgvecto_rs/test_pgvecto_rs.py rename to api/providers/vdb/vdb-pgvecto-rs/tests/unit_tests/test_pgvecto_rs.py index 5b9ec8002a..c3291f7f12 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/pgvecto_rs/test_pgvecto_rs.py +++ b/api/providers/vdb/vdb-pgvecto-rs/tests/unit_tests/test_pgvecto_rs.py @@ -83,8 +83,8 @@ def pgvecto_module(monkeypatch): for name, module in _build_fake_pgvecto_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.pgvecto_rs.collection as collection_module - import core.rag.datasource.vdb.pgvecto_rs.pgvecto_rs as module + import dify_vdb_pgvecto_rs.collection as collection_module + import dify_vdb_pgvecto_rs.pgvecto_rs as module return importlib.reload(module), importlib.reload(collection_module) diff --git a/api/providers/vdb/vdb-pgvector/pyproject.toml b/api/providers/vdb/vdb-pgvector/pyproject.toml new file mode 100644 index 0000000000..2a972aa277 --- /dev/null +++ b/api/providers/vdb/vdb-pgvector/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-pgvector" +version = "0.0.1" + +dependencies = [ + "pgvector==0.4.2", +] +description = "Dify vector store backend (dify-vdb-pgvector)." + +[project.entry-points."dify.vector_backends"] +pgvector = "dify_vdb_pgvector.pgvector:PGVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/pgvector/__init__.py b/api/providers/vdb/vdb-pgvector/src/dify_vdb_pgvector/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/pgvector/__init__.py rename to api/providers/vdb/vdb-pgvector/src/dify_vdb_pgvector/__init__.py diff --git a/api/core/rag/datasource/vdb/pgvector/pgvector.py b/api/providers/vdb/vdb-pgvector/src/dify_vdb_pgvector/pgvector.py similarity index 100% rename from api/core/rag/datasource/vdb/pgvector/pgvector.py rename to api/providers/vdb/vdb-pgvector/src/dify_vdb_pgvector/pgvector.py diff --git a/api/tests/integration_tests/vdb/pgvector/test_pgvector.py b/api/providers/vdb/vdb-pgvector/tests/integration_tests/test_pgvector.py similarity index 73% rename from api/tests/integration_tests/vdb/pgvector/test_pgvector.py rename to api/providers/vdb/vdb-pgvector/tests/integration_tests/test_pgvector.py index 4fdeca5a3a..974657510e 100644 --- a/api/tests/integration_tests/vdb/pgvector/test_pgvector.py +++ b/api/providers/vdb/vdb-pgvector/tests/integration_tests/test_pgvector.py @@ -1,10 +1,9 @@ -from core.rag.datasource.vdb.pgvector.pgvector import PGVector, PGVectorConfig -from tests.integration_tests.vdb.test_vector_store import ( +from dify_vdb_pgvector.pgvector import PGVector, PGVectorConfig + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) - class PGVectorTest(AbstractVectorTest): def __init__(self): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/pgvector/test_pgvector.py b/api/providers/vdb/vdb-pgvector/tests/unit_tests/test_pgvector.py similarity index 92% rename from api/tests/unit_tests/core/rag/datasource/vdb/pgvector/test_pgvector.py rename to api/providers/vdb/vdb-pgvector/tests/unit_tests/test_pgvector.py index 7505262eb7..99a6e00c16 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/pgvector/test_pgvector.py +++ b/api/providers/vdb/vdb-pgvector/tests/unit_tests/test_pgvector.py @@ -2,13 +2,10 @@ from contextlib import contextmanager from types import SimpleNamespace from unittest.mock import MagicMock, patch +import dify_vdb_pgvector.pgvector as pgvector_module import pytest +from dify_vdb_pgvector.pgvector import PGVector, PGVectorConfig -import core.rag.datasource.vdb.pgvector.pgvector as pgvector_module -from core.rag.datasource.vdb.pgvector.pgvector import ( - PGVector, - PGVectorConfig, -) from core.rag.models.document import Document @@ -26,7 +23,7 @@ class TestPGVector: ) self.collection_name = "test_collection" - @patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") + @patch("dify_vdb_pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") def test_init(self, mock_pool_class): """Test PGVector initialization.""" mock_pool = MagicMock() @@ -41,7 +38,7 @@ class TestPGVector: assert pgvector.pg_bigm is False assert pgvector.index_hash is not None - @patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") + @patch("dify_vdb_pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") def test_init_with_pg_bigm(self, mock_pool_class): """Test PGVector initialization with pg_bigm enabled.""" config = PGVectorConfig( @@ -61,8 +58,8 @@ class TestPGVector: assert pgvector.pg_bigm is True - @patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") - @patch("core.rag.datasource.vdb.pgvector.pgvector.redis_client") + @patch("dify_vdb_pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") + @patch("dify_vdb_pgvector.pgvector.redis_client") def test_create_collection_basic(self, mock_redis, mock_pool_class): """Test basic collection creation.""" # Mock Redis operations @@ -104,8 +101,8 @@ class TestPGVector: # Verify Redis cache was set mock_redis.set.assert_called_once() - @patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") - @patch("core.rag.datasource.vdb.pgvector.pgvector.redis_client") + @patch("dify_vdb_pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") + @patch("dify_vdb_pgvector.pgvector.redis_client") def test_create_collection_with_large_dimension(self, mock_redis, mock_pool_class): """Test collection creation with dimension > 2000 (no HNSW index).""" # Mock Redis operations @@ -139,8 +136,8 @@ class TestPGVector: hnsw_index_calls = [call for call in mock_cursor.execute.call_args_list if "hnsw" in str(call)] assert len(hnsw_index_calls) == 0 - @patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") - @patch("core.rag.datasource.vdb.pgvector.pgvector.redis_client") + @patch("dify_vdb_pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") + @patch("dify_vdb_pgvector.pgvector.redis_client") def test_create_collection_with_pg_bigm(self, mock_redis, mock_pool_class): """Test collection creation with pg_bigm enabled.""" config = PGVectorConfig( @@ -180,8 +177,8 @@ class TestPGVector: bigm_index_calls = [call for call in mock_cursor.execute.call_args_list if "gin_bigm_ops" in str(call)] assert len(bigm_index_calls) == 1 - @patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") - @patch("core.rag.datasource.vdb.pgvector.pgvector.redis_client") + @patch("dify_vdb_pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") + @patch("dify_vdb_pgvector.pgvector.redis_client") def test_create_collection_creates_vector_extension(self, mock_redis, mock_pool_class): """Test that vector extension is created if it doesn't exist.""" # Mock Redis operations @@ -213,8 +210,8 @@ class TestPGVector: ] assert len(create_extension_calls) == 1 - @patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") - @patch("core.rag.datasource.vdb.pgvector.pgvector.redis_client") + @patch("dify_vdb_pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") + @patch("dify_vdb_pgvector.pgvector.redis_client") def test_create_collection_with_cache_hit(self, mock_redis, mock_pool_class): """Test that collection creation is skipped when cache exists.""" # Mock Redis operations - cache exists @@ -240,8 +237,8 @@ class TestPGVector: # Check that no SQL was executed (early return due to cache) assert mock_cursor.execute.call_count == 0 - @patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") - @patch("core.rag.datasource.vdb.pgvector.pgvector.redis_client") + @patch("dify_vdb_pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") + @patch("dify_vdb_pgvector.pgvector.redis_client") def test_create_collection_with_redis_lock(self, mock_redis, mock_pool_class): """Test that Redis lock is used during collection creation.""" # Mock Redis operations @@ -273,7 +270,7 @@ class TestPGVector: mock_lock.__enter__.assert_called_once() mock_lock.__exit__.assert_called_once() - @patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") + @patch("dify_vdb_pgvector.pgvector.psycopg2.pool.SimpleConnectionPool") def test_get_cursor_context_manager(self, mock_pool_class): """Test that _get_cursor properly manages connection lifecycle.""" mock_pool = MagicMock() diff --git a/api/providers/vdb/vdb-qdrant/pyproject.toml b/api/providers/vdb/vdb-qdrant/pyproject.toml new file mode 100644 index 0000000000..6dd0b9560b --- /dev/null +++ b/api/providers/vdb/vdb-qdrant/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-qdrant" +version = "0.0.1" + +dependencies = [ + "qdrant-client==1.9.0", +] +description = "Dify vector store backend (dify-vdb-qdrant)." + +[project.entry-points."dify.vector_backends"] +qdrant = "dify_vdb_qdrant.qdrant_vector:QdrantVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/pyvastbase/__init__.py b/api/providers/vdb/vdb-qdrant/src/dify_vdb_qdrant/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/pyvastbase/__init__.py rename to api/providers/vdb/vdb-qdrant/src/dify_vdb_qdrant/__init__.py diff --git a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py b/api/providers/vdb/vdb-qdrant/src/dify_vdb_qdrant/qdrant_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/qdrant/qdrant_vector.py rename to api/providers/vdb/vdb-qdrant/src/dify_vdb_qdrant/qdrant_vector.py diff --git a/api/tests/integration_tests/vdb/qdrant/test_qdrant.py b/api/providers/vdb/vdb-qdrant/tests/integration_tests/test_qdrant.py similarity index 95% rename from api/tests/integration_tests/vdb/qdrant/test_qdrant.py rename to api/providers/vdb/vdb-qdrant/tests/integration_tests/test_qdrant.py index 709cc2e14e..e0badeb5de 100644 --- a/api/tests/integration_tests/vdb/qdrant/test_qdrant.py +++ b/api/providers/vdb/vdb-qdrant/tests/integration_tests/test_qdrant.py @@ -1,12 +1,11 @@ import uuid -from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantConfig, QdrantVector -from core.rag.models.document import Document -from tests.integration_tests.vdb.test_vector_store import ( +from dify_vdb_qdrant.qdrant_vector import QdrantConfig, QdrantVector + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, ) - -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) +from core.rag.models.document import Document class QdrantVectorTest(AbstractVectorTest): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/qdrant/test_qdrant_vector.py b/api/providers/vdb/vdb-qdrant/tests/unit_tests/test_qdrant_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/qdrant/test_qdrant_vector.py rename to api/providers/vdb/vdb-qdrant/tests/unit_tests/test_qdrant_vector.py index 0408506563..0ed5491fbe 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/qdrant/test_qdrant_vector.py +++ b/api/providers/vdb/vdb-qdrant/tests/unit_tests/test_qdrant_vector.py @@ -125,7 +125,7 @@ def qdrant_module(monkeypatch): for name, module in _build_fake_qdrant_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.qdrant.qdrant_vector as module + import dify_vdb_qdrant.qdrant_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-relyt/pyproject.toml b/api/providers/vdb/vdb-relyt/pyproject.toml new file mode 100644 index 0000000000..2a7c7fac87 --- /dev/null +++ b/api/providers/vdb/vdb-relyt/pyproject.toml @@ -0,0 +1,12 @@ +[project] +name = "dify-vdb-relyt" +version = "0.0.1" + +dependencies = [] +description = "Dify vector store backend (dify-vdb-relyt)." + +[project.entry-points."dify.vector_backends"] +relyt = "dify_vdb_relyt.relyt_vector:RelytVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/qdrant/__init__.py b/api/providers/vdb/vdb-relyt/src/dify_vdb_relyt/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/qdrant/__init__.py rename to api/providers/vdb/vdb-relyt/src/dify_vdb_relyt/__init__.py diff --git a/api/core/rag/datasource/vdb/relyt/relyt_vector.py b/api/providers/vdb/vdb-relyt/src/dify_vdb_relyt/relyt_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/relyt/relyt_vector.py rename to api/providers/vdb/vdb-relyt/src/dify_vdb_relyt/relyt_vector.py diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/relyt/test_relyt_vector.py b/api/providers/vdb/vdb-relyt/tests/unit_tests/test_relyt_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/relyt/test_relyt_vector.py rename to api/providers/vdb/vdb-relyt/tests/unit_tests/test_relyt_vector.py index 43cdb4948d..f97ad1400a 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/relyt/test_relyt_vector.py +++ b/api/providers/vdb/vdb-relyt/tests/unit_tests/test_relyt_vector.py @@ -63,7 +63,7 @@ def relyt_module(monkeypatch): for name, module in _build_fake_relyt_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.relyt.relyt_vector as module + import dify_vdb_relyt.relyt_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-tablestore/pyproject.toml b/api/providers/vdb/vdb-tablestore/pyproject.toml new file mode 100644 index 0000000000..fd1a2d54e0 --- /dev/null +++ b/api/providers/vdb/vdb-tablestore/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-tablestore" +version = "0.0.1" + +dependencies = [ + "tablestore==6.4.4", +] +description = "Dify vector store backend (dify-vdb-tablestore)." + +[project.entry-points."dify.vector_backends"] +tablestore = "dify_vdb_tablestore.tablestore_vector:TableStoreVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/relyt/__init__.py b/api/providers/vdb/vdb-tablestore/src/dify_vdb_tablestore/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/relyt/__init__.py rename to api/providers/vdb/vdb-tablestore/src/dify_vdb_tablestore/__init__.py diff --git a/api/core/rag/datasource/vdb/tablestore/tablestore_vector.py b/api/providers/vdb/vdb-tablestore/src/dify_vdb_tablestore/tablestore_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/tablestore/tablestore_vector.py rename to api/providers/vdb/vdb-tablestore/src/dify_vdb_tablestore/tablestore_vector.py diff --git a/api/tests/integration_tests/vdb/tablestore/test_tablestore.py b/api/providers/vdb/vdb-tablestore/tests/integration_tests/test_tablestore.py similarity index 93% rename from api/tests/integration_tests/vdb/tablestore/test_tablestore.py rename to api/providers/vdb/vdb-tablestore/tests/integration_tests/test_tablestore.py index b60e26a881..97c9626ee1 100644 --- a/api/tests/integration_tests/vdb/tablestore/test_tablestore.py +++ b/api/providers/vdb/vdb-tablestore/tests/integration_tests/test_tablestore.py @@ -1,20 +1,21 @@ +import logging import os import uuid import tablestore from _pytest.python_api import approx - -from core.rag.datasource.vdb.tablestore.tablestore_vector import ( +from dify_vdb_tablestore.tablestore_vector import ( TableStoreConfig, TableStoreVector, ) -from tests.integration_tests.vdb.test_vector_store import ( + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, get_example_document, get_example_text, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) +logger = logging.getLogger(__name__) class TableStoreVectorTest(AbstractVectorTest): @@ -90,7 +91,7 @@ class TableStoreVectorTest(AbstractVectorTest): try: self.vector.delete() except Exception: - pass + logger.debug("Failed to delete vector store during test setup, it may not exist yet") return super().run_all_tests() diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/tablestore/test_tablestore_vector.py b/api/providers/vdb/vdb-tablestore/tests/unit_tests/test_tablestore_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/tablestore/test_tablestore_vector.py rename to api/providers/vdb/vdb-tablestore/tests/unit_tests/test_tablestore_vector.py index e3b6676d9b..62a11e0445 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/tablestore/test_tablestore_vector.py +++ b/api/providers/vdb/vdb-tablestore/tests/unit_tests/test_tablestore_vector.py @@ -81,7 +81,7 @@ def tablestore_module(monkeypatch): fake_module = _build_fake_tablestore_module() monkeypatch.setitem(sys.modules, "tablestore", fake_module) - import core.rag.datasource.vdb.tablestore.tablestore_vector as module + import dify_vdb_tablestore.tablestore_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-tencent/pyproject.toml b/api/providers/vdb/vdb-tencent/pyproject.toml new file mode 100644 index 0000000000..7bb761b169 --- /dev/null +++ b/api/providers/vdb/vdb-tencent/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-tencent" +version = "0.0.1" + +dependencies = [ + "tcvectordb~=2.1.0", +] +description = "Dify vector store backend (dify-vdb-tencent)." + +[project.entry-points."dify.vector_backends"] +tencent = "dify_vdb_tencent.tencent_vector:TencentVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/tablestore/__init__.py b/api/providers/vdb/vdb-tencent/src/dify_vdb_tencent/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/tablestore/__init__.py rename to api/providers/vdb/vdb-tencent/src/dify_vdb_tencent/__init__.py diff --git a/api/core/rag/datasource/vdb/tencent/tencent_vector.py b/api/providers/vdb/vdb-tencent/src/dify_vdb_tencent/tencent_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/tencent/tencent_vector.py rename to api/providers/vdb/vdb-tencent/src/dify_vdb_tencent/tencent_vector.py diff --git a/api/tests/integration_tests/vdb/__mock/tcvectordb.py b/api/providers/vdb/vdb-tencent/tests/integration_tests/conftest.py similarity index 100% rename from api/tests/integration_tests/vdb/__mock/tcvectordb.py rename to api/providers/vdb/vdb-tencent/tests/integration_tests/conftest.py diff --git a/api/tests/integration_tests/vdb/tcvectordb/test_tencent.py b/api/providers/vdb/vdb-tencent/tests/integration_tests/test_tencent.py similarity index 76% rename from api/tests/integration_tests/vdb/tcvectordb/test_tencent.py rename to api/providers/vdb/vdb-tencent/tests/integration_tests/test_tencent.py index 3d6deff2a0..a53ec87f92 100644 --- a/api/tests/integration_tests/vdb/tcvectordb/test_tencent.py +++ b/api/providers/vdb/vdb-tencent/tests/integration_tests/test_tencent.py @@ -1,12 +1,8 @@ from unittest.mock import MagicMock -from core.rag.datasource.vdb.tencent.tencent_vector import TencentConfig, TencentVector -from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text +from dify_vdb_tencent.tencent_vector import TencentConfig, TencentVector -pytest_plugins = ( - "tests.integration_tests.vdb.test_vector_store", - "tests.integration_tests.vdb.__mock.tcvectordb", -) +from core.rag.datasource.vdb.vector_integration_test_support import AbstractVectorTest, get_example_text mock_client = MagicMock() mock_client.list_databases.return_value = [{"name": "test"}] diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/tencent/test_tencent_vector.py b/api/providers/vdb/vdb-tencent/tests/unit_tests/test_tencent_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/tencent/test_tencent_vector.py rename to api/providers/vdb/vdb-tencent/tests/unit_tests/test_tencent_vector.py index d8f35a6019..299e40ee1e 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/tencent/test_tencent_vector.py +++ b/api/providers/vdb/vdb-tencent/tests/unit_tests/test_tencent_vector.py @@ -140,7 +140,7 @@ def tencent_module(monkeypatch): for name, module in _build_fake_tencent_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.tencent.tencent_vector as module + import dify_vdb_tencent.tencent_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-tidb-on-qdrant/pyproject.toml b/api/providers/vdb/vdb-tidb-on-qdrant/pyproject.toml new file mode 100644 index 0000000000..5040fb38ba --- /dev/null +++ b/api/providers/vdb/vdb-tidb-on-qdrant/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-tidb-on-qdrant" +version = "0.0.1" + +dependencies = [ + "qdrant-client==1.9.0", +] +description = "Dify vector store backend (dify-vdb-tidb-on-qdrant)." + +[project.entry-points."dify.vector_backends"] +tidb_on_qdrant = "dify_vdb_tidb_on_qdrant.tidb_on_qdrant_vector:TidbOnQdrantVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/tencent/__init__.py b/api/providers/vdb/vdb-tidb-on-qdrant/src/dify_vdb_tidb_on_qdrant/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/tencent/__init__.py rename to api/providers/vdb/vdb-tidb-on-qdrant/src/dify_vdb_tidb_on_qdrant/__init__.py diff --git a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py b/api/providers/vdb/vdb-tidb-on-qdrant/src/dify_vdb_tidb_on_qdrant/tidb_on_qdrant_vector.py similarity index 99% rename from api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py rename to api/providers/vdb/vdb-tidb-on-qdrant/src/dify_vdb_tidb_on_qdrant/tidb_on_qdrant_vector.py index 11eaf35fa5..bb8a580ebf 100644 --- a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py +++ b/api/providers/vdb/vdb-tidb-on-qdrant/src/dify_vdb_tidb_on_qdrant/tidb_on_qdrant_vector.py @@ -24,12 +24,12 @@ from sqlalchemy import select from configs import dify_config from core.rag.datasource.vdb.field import Field -from core.rag.datasource.vdb.tidb_on_qdrant.tidb_service import TidbService from core.rag.datasource.vdb.vector_base import BaseVector, VectorIndexStructDict from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory from core.rag.datasource.vdb.vector_type import VectorType from core.rag.embedding.embedding_base import Embeddings from core.rag.models.document import Document +from dify_vdb_tidb_on_qdrant.tidb_service import TidbService from extensions.ext_database import db from extensions.ext_redis import redis_client from models.dataset import Dataset, TidbAuthBinding diff --git a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_service.py b/api/providers/vdb/vdb-tidb-on-qdrant/src/dify_vdb_tidb_on_qdrant/tidb_service.py similarity index 100% rename from api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_service.py rename to api/providers/vdb/vdb-tidb-on-qdrant/src/dify_vdb_tidb_on_qdrant/tidb_service.py diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/tidb_on_qdrant/test_tidb_on_qdrant_vector.py b/api/providers/vdb/vdb-tidb-on-qdrant/tests/unit_tests/test_tidb_on_qdrant_vector.py similarity index 96% rename from api/tests/unit_tests/core/rag/datasource/vdb/tidb_on_qdrant/test_tidb_on_qdrant_vector.py rename to api/providers/vdb/vdb-tidb-on-qdrant/tests/unit_tests/test_tidb_on_qdrant_vector.py index c25af79ae4..3e9229fea5 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/tidb_on_qdrant/test_tidb_on_qdrant_vector.py +++ b/api/providers/vdb/vdb-tidb-on-qdrant/tests/unit_tests/test_tidb_on_qdrant_vector.py @@ -2,13 +2,12 @@ from unittest.mock import patch import httpx import pytest -from qdrant_client.http import models as rest -from qdrant_client.http.exceptions import UnexpectedResponse - -from core.rag.datasource.vdb.tidb_on_qdrant.tidb_on_qdrant_vector import ( +from dify_vdb_tidb_on_qdrant.tidb_on_qdrant_vector import ( TidbOnQdrantConfig, TidbOnQdrantVector, ) +from qdrant_client.http import models as rest +from qdrant_client.http.exceptions import UnexpectedResponse class TestTidbOnQdrantVectorDeleteByIds: @@ -22,7 +21,7 @@ class TestTidbOnQdrantVectorDeleteByIds: api_key="test_api_key", ) - with patch("core.rag.datasource.vdb.tidb_on_qdrant.tidb_on_qdrant_vector.qdrant_client.QdrantClient"): + with patch("dify_vdb_tidb_on_qdrant.tidb_on_qdrant_vector.qdrant_client.QdrantClient"): vector = TidbOnQdrantVector( collection_name="test_collection", group_id="test_group", diff --git a/api/providers/vdb/vdb-tidb-vector/pyproject.toml b/api/providers/vdb/vdb-tidb-vector/pyproject.toml new file mode 100644 index 0000000000..0e2f0ad88f --- /dev/null +++ b/api/providers/vdb/vdb-tidb-vector/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-tidb-vector" +version = "0.0.1" + +dependencies = [ + "tidb-vector==0.0.15", +] +description = "Dify vector store backend (dify-vdb-tidb-vector)." + +[project.entry-points."dify.vector_backends"] +tidb_vector = "dify_vdb_tidb_vector.tidb_vector:TiDBVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/tidb_on_qdrant/__init__.py b/api/providers/vdb/vdb-tidb-vector/src/dify_vdb_tidb_vector/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/tidb_on_qdrant/__init__.py rename to api/providers/vdb/vdb-tidb-vector/src/dify_vdb_tidb_vector/__init__.py diff --git a/api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py b/api/providers/vdb/vdb-tidb-vector/src/dify_vdb_tidb_vector/tidb_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/tidb_vector/tidb_vector.py rename to api/providers/vdb/vdb-tidb-vector/src/dify_vdb_tidb_vector/tidb_vector.py diff --git a/api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py b/api/providers/vdb/vdb-tidb-vector/tests/integration_tests/check_tiflash_ready.py similarity index 72% rename from api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py rename to api/providers/vdb/vdb-tidb-vector/tests/integration_tests/check_tiflash_ready.py index f76700aa0e..97f8406e42 100644 --- a/api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py +++ b/api/providers/vdb/vdb-tidb-vector/tests/integration_tests/check_tiflash_ready.py @@ -1,9 +1,13 @@ +import logging import time import pymysql +logger = logging.getLogger(__name__) + def check_tiflash_ready() -> bool: + connection = None try: connection = pymysql.connect( host="localhost", @@ -23,8 +27,8 @@ def check_tiflash_ready() -> bool: cursor.execute(select_tiflash_query) result = cursor.fetchall() return result is not None and len(result) > 0 - except Exception as e: - print(f"TiFlash is not ready. Exception: {e}") + except Exception: + logger.exception("TiFlash is not ready.") return False finally: if connection: @@ -38,20 +42,20 @@ def main(): for attempt in range(max_attempts): try: is_tiflash_ready = check_tiflash_ready() - except Exception as e: - print(f"TiFlash is not ready. Exception: {e}") + except Exception: + logger.exception("TiFlash is not ready.") is_tiflash_ready = False if is_tiflash_ready: break else: - print(f"Attempt {attempt + 1} failed, retry in {retry_interval_seconds} seconds...") + logger.error("Attempt %s failed, retry in %s seconds...", attempt + 1, retry_interval_seconds) time.sleep(retry_interval_seconds) if is_tiflash_ready: - print("TiFlash is ready in TiDB.") + logger.info("TiFlash is ready in TiDB.") else: - print(f"TiFlash is not ready in TiDB after {max_attempts} attempting checks.") + logger.error("TiFlash is not ready in TiDB after %s attempting checks.", max_attempts) exit(1) diff --git a/api/tests/integration_tests/vdb/tidb_vector/test_tidb_vector.py b/api/providers/vdb/vdb-tidb-vector/tests/integration_tests/test_tidb_vector.py similarity index 77% rename from api/tests/integration_tests/vdb/tidb_vector/test_tidb_vector.py rename to api/providers/vdb/vdb-tidb-vector/tests/integration_tests/test_tidb_vector.py index 14c6d1c67c..ac854acbf9 100644 --- a/api/tests/integration_tests/vdb/tidb_vector/test_tidb_vector.py +++ b/api/providers/vdb/vdb-tidb-vector/tests/integration_tests/test_tidb_vector.py @@ -1,10 +1,8 @@ import pytest +from dify_vdb_tidb_vector.tidb_vector import TiDBVector, TiDBVectorConfig -from core.rag.datasource.vdb.tidb_vector.tidb_vector import TiDBVector, TiDBVectorConfig -from models.dataset import Document -from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text - -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) +from core.rag.datasource.vdb.vector_integration_test_support import AbstractVectorTest, get_example_text +from core.rag.models.document import Document @pytest.fixture diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/tidb_vector/test_tidb_vector.py b/api/providers/vdb/vdb-tidb-vector/tests/unit_tests/test_tidb_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/tidb_vector/test_tidb_vector.py rename to api/providers/vdb/vdb-tidb-vector/tests/unit_tests/test_tidb_vector.py index 8e19a59af8..bdbed2f740 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/tidb_vector/test_tidb_vector.py +++ b/api/providers/vdb/vdb-tidb-vector/tests/unit_tests/test_tidb_vector.py @@ -12,7 +12,7 @@ from core.rag.models.document import Document @pytest.fixture def tidb_module(): - import core.rag.datasource.vdb.tidb_vector.tidb_vector as module + import dify_vdb_tidb_vector.tidb_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-upstash/pyproject.toml b/api/providers/vdb/vdb-upstash/pyproject.toml new file mode 100644 index 0000000000..f71773cdbb --- /dev/null +++ b/api/providers/vdb/vdb-upstash/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-upstash" +version = "0.0.1" + +dependencies = [ + "upstash-vector==0.8.0", +] +description = "Dify vector store backend (dify-vdb-upstash)." + +[project.entry-points."dify.vector_backends"] +upstash = "dify_vdb_upstash.upstash_vector:UpstashVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/tidb_vector/__init__.py b/api/providers/vdb/vdb-upstash/src/dify_vdb_upstash/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/tidb_vector/__init__.py rename to api/providers/vdb/vdb-upstash/src/dify_vdb_upstash/__init__.py diff --git a/api/core/rag/datasource/vdb/upstash/upstash_vector.py b/api/providers/vdb/vdb-upstash/src/dify_vdb_upstash/upstash_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/upstash/upstash_vector.py rename to api/providers/vdb/vdb-upstash/src/dify_vdb_upstash/upstash_vector.py diff --git a/api/tests/integration_tests/vdb/__mock/upstashvectordb.py b/api/providers/vdb/vdb-upstash/tests/integration_tests/conftest.py similarity index 94% rename from api/tests/integration_tests/vdb/__mock/upstashvectordb.py rename to api/providers/vdb/vdb-upstash/tests/integration_tests/conftest.py index 70c85d4c98..adba0c150c 100644 --- a/api/tests/integration_tests/vdb/__mock/upstashvectordb.py +++ b/api/providers/vdb/vdb-upstash/tests/integration_tests/conftest.py @@ -6,7 +6,6 @@ from _pytest.monkeypatch import MonkeyPatch from upstash_vector import Index -# Mocking the Index class from upstash_vector class MockIndex: def __init__(self, url="", token=""): self.url = url @@ -37,7 +36,6 @@ class MockIndex: namespace: str = "", include_data: bool = False, ): - # Simple mock query, in real scenario you would calculate similarity mock_result = [] for vector_data in self.vectors: mock_result.append(vector_data) diff --git a/api/tests/integration_tests/vdb/upstash/test_upstash_vector.py b/api/providers/vdb/vdb-upstash/tests/integration_tests/test_upstash_vector.py similarity index 75% rename from api/tests/integration_tests/vdb/upstash/test_upstash_vector.py rename to api/providers/vdb/vdb-upstash/tests/integration_tests/test_upstash_vector.py index 8cea0a05eb..f4a65030b6 100644 --- a/api/tests/integration_tests/vdb/upstash/test_upstash_vector.py +++ b/api/providers/vdb/vdb-upstash/tests/integration_tests/test_upstash_vector.py @@ -1,8 +1,7 @@ -from core.rag.datasource.vdb.upstash.upstash_vector import UpstashVector, UpstashVectorConfig -from core.rag.models.document import Document -from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text +from dify_vdb_upstash.upstash_vector import UpstashVector, UpstashVectorConfig -pytest_plugins = ("tests.integration_tests.vdb.__mock.upstashvectordb",) +from core.rag.datasource.vdb.vector_integration_test_support import AbstractVectorTest, get_example_text +from core.rag.models.document import Document class UpstashVectorTest(AbstractVectorTest): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/upstash/test_upstash_vector.py b/api/providers/vdb/vdb-upstash/tests/unit_tests/test_upstash_vector.py similarity index 97% rename from api/tests/unit_tests/core/rag/datasource/vdb/upstash/test_upstash_vector.py rename to api/providers/vdb/vdb-upstash/tests/unit_tests/test_upstash_vector.py index ac8a63a44b..a884275c89 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/upstash/test_upstash_vector.py +++ b/api/providers/vdb/vdb-upstash/tests/unit_tests/test_upstash_vector.py @@ -38,11 +38,11 @@ def _build_fake_upstash_module(): @pytest.fixture def upstash_module(monkeypatch): # Remove patched modules if present - for modname in ["upstash_vector", "core.rag.datasource.vdb.upstash.upstash_vector"]: + for modname in ["upstash_vector", "dify_vdb_upstash.upstash_vector"]: if modname in sys.modules: monkeypatch.delitem(sys.modules, modname, raising=False) monkeypatch.setitem(sys.modules, "upstash_vector", _build_fake_upstash_module()) - module = importlib.import_module("core.rag.datasource.vdb.upstash.upstash_vector") + module = importlib.import_module("dify_vdb_upstash.upstash_vector") return module diff --git a/api/providers/vdb/vdb-vastbase/pyproject.toml b/api/providers/vdb/vdb-vastbase/pyproject.toml new file mode 100644 index 0000000000..287eb147dc --- /dev/null +++ b/api/providers/vdb/vdb-vastbase/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-vastbase" +version = "0.0.1" + +dependencies = [ + "pyobvector~=0.2.17", +] +description = "Dify vector store backend (dify-vdb-vastbase)." + +[project.entry-points."dify.vector_backends"] +vastbase = "dify_vdb_vastbase.vastbase_vector:VastbaseVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/upstash/__init__.py b/api/providers/vdb/vdb-vastbase/src/dify_vdb_vastbase/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/upstash/__init__.py rename to api/providers/vdb/vdb-vastbase/src/dify_vdb_vastbase/__init__.py diff --git a/api/core/rag/datasource/vdb/pyvastbase/vastbase_vector.py b/api/providers/vdb/vdb-vastbase/src/dify_vdb_vastbase/vastbase_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/pyvastbase/vastbase_vector.py rename to api/providers/vdb/vdb-vastbase/src/dify_vdb_vastbase/vastbase_vector.py diff --git a/api/tests/integration_tests/vdb/pyvastbase/test_vastbase_vector.py b/api/providers/vdb/vdb-vastbase/tests/integration_tests/test_vastbase_vector.py similarity index 72% rename from api/tests/integration_tests/vdb/pyvastbase/test_vastbase_vector.py rename to api/providers/vdb/vdb-vastbase/tests/integration_tests/test_vastbase_vector.py index a47f13625c..0467dec37a 100644 --- a/api/tests/integration_tests/vdb/pyvastbase/test_vastbase_vector.py +++ b/api/providers/vdb/vdb-vastbase/tests/integration_tests/test_vastbase_vector.py @@ -1,10 +1,9 @@ -from core.rag.datasource.vdb.pyvastbase.vastbase_vector import VastbaseVector, VastbaseVectorConfig -from tests.integration_tests.vdb.test_vector_store import ( +from dify_vdb_vastbase.vastbase_vector import VastbaseVector, VastbaseVectorConfig + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) - class VastbaseVectorTest(AbstractVectorTest): def __init__(self): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/pyvastbase/test_vastbase_vector.py b/api/providers/vdb/vdb-vastbase/tests/unit_tests/test_vastbase_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/pyvastbase/test_vastbase_vector.py rename to api/providers/vdb/vdb-vastbase/tests/unit_tests/test_vastbase_vector.py index bd8df520ba..4dfb956c00 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/pyvastbase/test_vastbase_vector.py +++ b/api/providers/vdb/vdb-vastbase/tests/unit_tests/test_vastbase_vector.py @@ -41,7 +41,7 @@ def vastbase_module(monkeypatch): for name, module in _build_fake_psycopg2_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.pyvastbase.vastbase_vector as module + import dify_vdb_vastbase.vastbase_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-vikingdb/pyproject.toml b/api/providers/vdb/vdb-vikingdb/pyproject.toml new file mode 100644 index 0000000000..fdf59f76a4 --- /dev/null +++ b/api/providers/vdb/vdb-vikingdb/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-vikingdb" +version = "0.0.1" + +dependencies = [ + "volcengine-compat~=1.0.0", +] +description = "Dify vector store backend (dify-vdb-vikingdb)." + +[project.entry-points."dify.vector_backends"] +vikingdb = "dify_vdb_vikingdb.vikingdb_vector:VikingDBVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/vikingdb/__init__.py b/api/providers/vdb/vdb-vikingdb/src/dify_vdb_vikingdb/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/vikingdb/__init__.py rename to api/providers/vdb/vdb-vikingdb/src/dify_vdb_vikingdb/__init__.py diff --git a/api/core/rag/datasource/vdb/vikingdb/vikingdb_vector.py b/api/providers/vdb/vdb-vikingdb/src/dify_vdb_vikingdb/vikingdb_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/vikingdb/vikingdb_vector.py rename to api/providers/vdb/vdb-vikingdb/src/dify_vdb_vikingdb/vikingdb_vector.py diff --git a/api/tests/integration_tests/vdb/__mock/vikingdb.py b/api/providers/vdb/vdb-vikingdb/tests/integration_tests/conftest.py similarity index 100% rename from api/tests/integration_tests/vdb/__mock/vikingdb.py rename to api/providers/vdb/vdb-vikingdb/tests/integration_tests/conftest.py diff --git a/api/tests/integration_tests/vdb/vikingdb/test_vikingdb.py b/api/providers/vdb/vdb-vikingdb/tests/integration_tests/test_vikingdb.py similarity index 78% rename from api/tests/integration_tests/vdb/vikingdb/test_vikingdb.py rename to api/providers/vdb/vdb-vikingdb/tests/integration_tests/test_vikingdb.py index 56311acd25..5a3908d14b 100644 --- a/api/tests/integration_tests/vdb/vikingdb/test_vikingdb.py +++ b/api/providers/vdb/vdb-vikingdb/tests/integration_tests/test_vikingdb.py @@ -1,10 +1,6 @@ -from core.rag.datasource.vdb.vikingdb.vikingdb_vector import VikingDBConfig, VikingDBVector -from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text +from dify_vdb_vikingdb.vikingdb_vector import VikingDBConfig, VikingDBVector -pytest_plugins = ( - "tests.integration_tests.vdb.test_vector_store", - "tests.integration_tests.vdb.__mock.vikingdb", -) +from core.rag.datasource.vdb.vector_integration_test_support import AbstractVectorTest, get_example_text class VikingDBVectorTest(AbstractVectorTest): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/vikingdb/test_vikingdb_vector.py b/api/providers/vdb/vdb-vikingdb/tests/unit_tests/test_vikingdb_vector.py similarity index 99% rename from api/tests/unit_tests/core/rag/datasource/vdb/vikingdb/test_vikingdb_vector.py rename to api/providers/vdb/vdb-vikingdb/tests/unit_tests/test_vikingdb_vector.py index 9da92af2d0..544b8163be 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/vikingdb/test_vikingdb_vector.py +++ b/api/providers/vdb/vdb-vikingdb/tests/unit_tests/test_vikingdb_vector.py @@ -83,7 +83,7 @@ def vikingdb_module(monkeypatch): for name, module in _build_fake_vikingdb_modules().items(): monkeypatch.setitem(sys.modules, name, module) - import core.rag.datasource.vdb.vikingdb.vikingdb_vector as module + import dify_vdb_vikingdb.vikingdb_vector as module return importlib.reload(module) diff --git a/api/providers/vdb/vdb-weaviate/pyproject.toml b/api/providers/vdb/vdb-weaviate/pyproject.toml new file mode 100644 index 0000000000..035fbd396d --- /dev/null +++ b/api/providers/vdb/vdb-weaviate/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "dify-vdb-weaviate" +version = "0.0.1" + +dependencies = [ + "weaviate-client==4.20.5", +] +description = "Dify vector store backend (dify-vdb-weaviate)." + +[project.entry-points."dify.vector_backends"] +weaviate = "dify_vdb_weaviate.weaviate_vector:WeaviateVectorFactory" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/api/core/rag/datasource/vdb/weaviate/__init__.py b/api/providers/vdb/vdb-weaviate/src/dify_vdb_weaviate/__init__.py similarity index 100% rename from api/core/rag/datasource/vdb/weaviate/__init__.py rename to api/providers/vdb/vdb-weaviate/src/dify_vdb_weaviate/__init__.py diff --git a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py b/api/providers/vdb/vdb-weaviate/src/dify_vdb_weaviate/weaviate_vector.py similarity index 100% rename from api/core/rag/datasource/vdb/weaviate/weaviate_vector.py rename to api/providers/vdb/vdb-weaviate/src/dify_vdb_weaviate/weaviate_vector.py diff --git a/api/tests/integration_tests/vdb/weaviate/test_weaviate.py b/api/providers/vdb/vdb-weaviate/tests/integration_tests/test_weaviate.py similarity index 72% rename from api/tests/integration_tests/vdb/weaviate/test_weaviate.py rename to api/providers/vdb/vdb-weaviate/tests/integration_tests/test_weaviate.py index a1d9850979..631d23d653 100644 --- a/api/tests/integration_tests/vdb/weaviate/test_weaviate.py +++ b/api/providers/vdb/vdb-weaviate/tests/integration_tests/test_weaviate.py @@ -1,10 +1,9 @@ -from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateConfig, WeaviateVector -from tests.integration_tests.vdb.test_vector_store import ( +from dify_vdb_weaviate.weaviate_vector import WeaviateConfig, WeaviateVector + +from core.rag.datasource.vdb.vector_integration_test_support import ( AbstractVectorTest, ) -pytest_plugins = ("tests.integration_tests.vdb.test_vector_store",) - class WeaviateVectorTest(AbstractVectorTest): def __init__(self): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/weaviate/test_weavaite.py b/api/providers/vdb/vdb-weaviate/tests/unit_tests/test_weavaite.py similarity index 92% rename from api/tests/unit_tests/core/rag/datasource/vdb/weaviate/test_weavaite.py rename to api/providers/vdb/vdb-weaviate/tests/unit_tests/test_weavaite.py index baf8c9e5f8..c773e4d552 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/weaviate/test_weavaite.py +++ b/api/providers/vdb/vdb-weaviate/tests/unit_tests/test_weavaite.py @@ -1,6 +1,6 @@ from unittest.mock import MagicMock, patch -from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateConfig, WeaviateVector +from dify_vdb_weaviate.weaviate_vector import WeaviateConfig, WeaviateVector def test_init_client_with_valid_config(): diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/weaviate/test_weaviate_vector.py b/api/providers/vdb/vdb-weaviate/tests/unit_tests/test_weaviate_vector.py similarity index 92% rename from api/tests/unit_tests/core/rag/datasource/vdb/weaviate/test_weaviate_vector.py rename to api/providers/vdb/vdb-weaviate/tests/unit_tests/test_weaviate_vector.py index 69d1833001..b43a4a20c8 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/weaviate/test_weaviate_vector.py +++ b/api/providers/vdb/vdb-weaviate/tests/unit_tests/test_weaviate_vector.py @@ -14,9 +14,9 @@ from types import SimpleNamespace from unittest.mock import MagicMock, patch import pytest +from dify_vdb_weaviate import weaviate_vector as weaviate_vector_module +from dify_vdb_weaviate.weaviate_vector import WeaviateConfig, WeaviateVector -from core.rag.datasource.vdb.weaviate import weaviate_vector as weaviate_vector_module -from core.rag.datasource.vdb.weaviate.weaviate_vector import WeaviateConfig, WeaviateVector from core.rag.models.document import Document @@ -40,7 +40,7 @@ class TestWeaviateVector(unittest.TestCase): with pytest.raises(ValueError, match="config WEAVIATE_ENDPOINT is required"): WeaviateConfig(endpoint="") - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def _create_weaviate_vector(self, mock_weaviate_module): """Helper to create a WeaviateVector instance with mocked client.""" mock_client = MagicMock() @@ -66,7 +66,7 @@ class TestWeaviateVector(unittest.TestCase): mock_client.close.assert_called_once() mock_debug.assert_called_once() - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate.connect_to_custom") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate.connect_to_custom") def test_init_client_reuses_cached_client_without_reconnect(self, mock_connect): cached_client = MagicMock() cached_client.is_ready.return_value = True @@ -79,7 +79,7 @@ class TestWeaviateVector(unittest.TestCase): assert client is cached_client mock_connect.assert_not_called() - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate.connect_to_custom") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate.connect_to_custom") def test_init_client_reuses_cached_client_after_lock_recheck(self, mock_connect): cached_client = MagicMock() cached_client.is_ready.side_effect = [False, True] @@ -92,8 +92,8 @@ class TestWeaviateVector(unittest.TestCase): assert client is cached_client mock_connect.assert_not_called() - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.Auth.api_key", return_value="auth-token") - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate.connect_to_custom") + @patch("dify_vdb_weaviate.weaviate_vector.Auth.api_key", return_value="auth-token") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate.connect_to_custom") def test_init_client_parses_custom_grpc_endpoint_without_scheme(self, mock_connect, mock_api_key): mock_client = MagicMock() mock_client.is_ready.return_value = True @@ -122,7 +122,7 @@ class TestWeaviateVector(unittest.TestCase): } mock_api_key.assert_called_once_with("test-key") - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate.connect_to_custom") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate.connect_to_custom") def test_init_client_raises_when_database_not_ready(self, mock_connect): mock_client = MagicMock() mock_client.is_ready.return_value = False @@ -133,7 +133,7 @@ class TestWeaviateVector(unittest.TestCase): with pytest.raises(ConnectionError, match="Vector database is not ready"): wv._init_client(self.config) - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_init(self, mock_weaviate_module): """Test WeaviateVector initialization stores attributes including doc_type.""" mock_client = MagicMock() @@ -183,9 +183,9 @@ class TestWeaviateVector(unittest.TestCase): wv._create_collection.assert_called_once() wv.add_texts.assert_called_once_with([doc], [[0.1, 0.2]]) - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.redis_client") - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.dify_config") - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.redis_client") + @patch("dify_vdb_weaviate.weaviate_vector.dify_config") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_create_collection_includes_doc_type_property(self, mock_weaviate_module, mock_dify_config, mock_redis): """Test that _create_collection defines doc_type in the schema properties.""" # Mock Redis @@ -232,7 +232,7 @@ class TestWeaviateVector(unittest.TestCase): f"doc_type should be in collection schema properties, got: {property_names}" ) - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.redis_client") + @patch("dify_vdb_weaviate.weaviate_vector.redis_client") def test_create_collection_returns_early_when_cache_key_exists(self, mock_redis): mock_lock = MagicMock() mock_lock.__enter__ = MagicMock() @@ -251,7 +251,7 @@ class TestWeaviateVector(unittest.TestCase): wv._ensure_properties.assert_not_called() mock_redis.set.assert_not_called() - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.redis_client") + @patch("dify_vdb_weaviate.weaviate_vector.redis_client") def test_create_collection_logs_and_reraises_errors(self, mock_redis): mock_lock = MagicMock() mock_lock.__enter__ = MagicMock() @@ -270,7 +270,7 @@ class TestWeaviateVector(unittest.TestCase): mock_exception.assert_called_once() - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_ensure_properties_adds_missing_doc_type(self, mock_weaviate_module): """Test that _ensure_properties adds doc_type when it's missing from existing schema.""" mock_client = MagicMock() @@ -305,7 +305,7 @@ class TestWeaviateVector(unittest.TestCase): added_names = [call.args[0].name for call in add_calls] assert "doc_type" in added_names, f"doc_type should be added to existing collection, added: {added_names}" - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_ensure_properties_adds_all_missing_core_properties(self, mock_weaviate_module): mock_client = MagicMock() mock_client.is_ready.return_value = True @@ -328,7 +328,7 @@ class TestWeaviateVector(unittest.TestCase): added_names = [call.args[0].name for call in add_calls] assert added_names == ["document_id", "doc_id", "doc_type", "chunk_index"] - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_ensure_properties_skips_existing_doc_type(self, mock_weaviate_module): """Test that _ensure_properties does not add doc_type when it already exists.""" mock_client = MagicMock() @@ -361,7 +361,7 @@ class TestWeaviateVector(unittest.TestCase): # No properties should be added mock_col.config.add_property.assert_not_called() - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_ensure_properties_logs_warning_when_property_addition_fails(self, mock_weaviate_module): mock_client = MagicMock() mock_client.is_ready.return_value = True @@ -385,7 +385,7 @@ class TestWeaviateVector(unittest.TestCase): assert mock_warning.call_count == 4 - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_search_by_vector_returns_doc_type_in_metadata(self, mock_weaviate_module): """Test that search_by_vector returns doc_type in document metadata. @@ -432,7 +432,7 @@ class TestWeaviateVector(unittest.TestCase): assert len(docs) == 1 assert docs[0].metadata.get("doc_type") == "image" - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_search_by_vector_uses_document_filter_and_default_distance(self, mock_weaviate_module): mock_client = MagicMock() mock_client.is_ready.return_value = True @@ -469,7 +469,7 @@ class TestWeaviateVector(unittest.TestCase): assert docs[0].metadata["score"] == 0.0 assert mock_col.query.near_vector.call_args.kwargs["filters"] is not None - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_search_by_vector_returns_empty_when_collection_is_missing(self, mock_weaviate_module): mock_client = MagicMock() mock_client.is_ready.return_value = True @@ -484,7 +484,7 @@ class TestWeaviateVector(unittest.TestCase): assert wv.search_by_vector(query_vector=[0.1] * 3) == [] - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_search_by_full_text_returns_doc_type_in_metadata(self, mock_weaviate_module): """Test that search_by_full_text also returns doc_type in document metadata.""" mock_client = MagicMock() @@ -526,7 +526,7 @@ class TestWeaviateVector(unittest.TestCase): assert len(docs) == 1 assert docs[0].metadata.get("doc_type") == "image" - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_search_by_full_text_uses_document_filter(self, mock_weaviate_module): mock_client = MagicMock() mock_client.is_ready.return_value = True @@ -554,7 +554,7 @@ class TestWeaviateVector(unittest.TestCase): assert docs[0].vector == [0.3, 0.4] assert mock_col.query.bm25.call_args.kwargs["filters"] is not None - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_search_by_full_text_returns_empty_when_collection_is_missing(self, mock_weaviate_module): mock_client = MagicMock() mock_client.is_ready.return_value = True @@ -569,7 +569,7 @@ class TestWeaviateVector(unittest.TestCase): assert wv.search_by_full_text(query="missing") == [] - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_add_texts_stores_doc_type_in_properties(self, mock_weaviate_module): """Test that add_texts includes doc_type from document metadata in stored properties.""" mock_client = MagicMock() @@ -611,7 +611,7 @@ class TestWeaviateVector(unittest.TestCase): stored_props = call_kwargs.kwargs.get("properties") assert stored_props.get("doc_type") == "image", f"doc_type should be stored in properties, got: {stored_props}" - @patch("core.rag.datasource.vdb.weaviate.weaviate_vector.weaviate") + @patch("dify_vdb_weaviate.weaviate_vector.weaviate") def test_add_texts_falls_back_to_random_uuid_and_serializes_datetime_metadata(self, mock_weaviate_module): mock_client = MagicMock() mock_client.is_ready.return_value = True @@ -635,7 +635,7 @@ class TestWeaviateVector(unittest.TestCase): with ( patch.object(wv, "_get_uuids", return_value=["not-a-uuid"]), - patch("core.rag.datasource.vdb.weaviate.weaviate_vector._uuid.uuid4", return_value="fallback-uuid"), + patch("dify_vdb_weaviate.weaviate_vector._uuid.uuid4", return_value="fallback-uuid"), ): ids = wv.add_texts(documents=[doc], embeddings=[[]]) @@ -775,9 +775,7 @@ class TestWeaviateVectorFactory(unittest.TestCase): patch.object(weaviate_vector_module.dify_config, "WEAVIATE_GRPC_ENDPOINT", "localhost:50051"), patch.object(weaviate_vector_module.dify_config, "WEAVIATE_API_KEY", "api-key"), patch.object(weaviate_vector_module.dify_config, "WEAVIATE_BATCH_SIZE", 88), - patch( - "core.rag.datasource.vdb.weaviate.weaviate_vector.WeaviateVector", return_value="vector" - ) as mock_vector, + patch("dify_vdb_weaviate.weaviate_vector.WeaviateVector", return_value="vector") as mock_vector, ): factory = weaviate_vector_module.WeaviateVectorFactory() result = factory.init_vector(dataset, attributes, MagicMock()) @@ -806,9 +804,7 @@ class TestWeaviateVectorFactory(unittest.TestCase): "gen_collection_name_by_id", return_value="GeneratedCollection_Node", ), - patch( - "core.rag.datasource.vdb.weaviate.weaviate_vector.WeaviateVector", return_value="vector" - ) as mock_vector, + patch("dify_vdb_weaviate.weaviate_vector.WeaviateVector", return_value="vector") as mock_vector, ): factory = weaviate_vector_module.WeaviateVectorFactory() result = factory.init_vector(dataset, attributes, MagicMock()) diff --git a/api/pyproject.toml b/api/pyproject.toml index fdc6a0f9d7..3b7e5f8e1f 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -86,7 +86,6 @@ dependencies = [ "flask-restx~=1.3.2", "packaging~=26.0", "croniter>=6.2.2", - "weaviate-client==4.20.5", "apscheduler>=3.11.2", "weave>=0.52.36", "fastopenapi[flask]>=0.7.0", @@ -98,8 +97,44 @@ dependencies = [ [tool.setuptools] packages = [] +[tool.uv.workspace] +members = ["providers/vdb/*"] +exclude = ["providers/vdb/__pycache__"] + +[tool.uv.sources] +dify-vdb-alibabacloud-mysql = { workspace = true } +dify-vdb-analyticdb = { workspace = true } +dify-vdb-baidu = { workspace = true } +dify-vdb-chroma = { workspace = true } +dify-vdb-clickzetta = { workspace = true } +dify-vdb-couchbase = { workspace = true } +dify-vdb-elasticsearch = { workspace = true } +dify-vdb-hologres = { workspace = true } +dify-vdb-huawei-cloud = { workspace = true } +dify-vdb-iris = { workspace = true } +dify-vdb-lindorm = { workspace = true } +dify-vdb-matrixone = { workspace = true } +dify-vdb-milvus = { workspace = true } +dify-vdb-myscale = { workspace = true } +dify-vdb-oceanbase = { workspace = true } +dify-vdb-opengauss = { workspace = true } +dify-vdb-opensearch = { workspace = true } +dify-vdb-oracle = { workspace = true } +dify-vdb-pgvecto-rs = { workspace = true } +dify-vdb-pgvector = { workspace = true } +dify-vdb-qdrant = { workspace = true } +dify-vdb-relyt = { workspace = true } +dify-vdb-tablestore = { workspace = true } +dify-vdb-tencent = { workspace = true } +dify-vdb-tidb-on-qdrant = { workspace = true } +dify-vdb-tidb-vector = { workspace = true } +dify-vdb-upstash = { workspace = true } +dify-vdb-vastbase = { workspace = true } +dify-vdb-vikingdb = { workspace = true } +dify-vdb-weaviate = { workspace = true } + [tool.uv] -default-groups = ["storage", "tools", "vdb"] +default-groups = ["storage", "tools", "vdb-all"] package = false [dependency-groups] @@ -172,6 +207,7 @@ dev = [ "pytest-timeout>=2.4.0", "pytest-xdist>=3.8.0", "pyrefly>=0.60.0", + "xinference-client~=2.4.0", ] ############################################################ @@ -196,37 +232,74 @@ storage = [ tools = ["cloudscraper~=1.2.71", "nltk~=3.9.1"] ############################################################ -# [ VDB ] dependency group -# Required by vector store clients +# [ VDB ] workspace plugins — hollow packages under providers/vdb/* +# Each declares its own third-party deps and registers dify.vector_backends entry points. +# Use: uv sync --group vdb-all | uv sync --group vdb-qdrant ############################################################ -vdb = [ - "alibabacloud_gpdb20160503~=5.2.0", - "alibabacloud_tea_openapi~=0.4.3", - "chromadb==0.5.20", - "clickhouse-connect~=0.15.0", - "clickzetta-connector-python>=0.8.102", - "couchbase~=4.6.0", - "elasticsearch==8.14.0", - "opensearch-py==3.1.0", - "oracledb==3.4.2", - "pgvecto-rs[sqlalchemy]~=0.2.2", - "pgvector==0.4.2", - "pymilvus~=2.6.12", - "pymochow==2.4.0", - "pyobvector~=0.2.17", - "qdrant-client==1.9.0", - "intersystems-irispython>=5.1.0", - "tablestore==6.4.4", - "tcvectordb~=2.1.0", - "tidb-vector==0.0.15", - "upstash-vector==0.8.0", - "volcengine-compat~=1.0.0", - "weaviate-client==4.20.5", - "xinference-client~=2.4.0", - "mo-vector~=0.1.13", - "mysql-connector-python>=9.3.0", - "holo-search-sdk>=0.4.2", +vdb-all = [ + "dify-vdb-alibabacloud-mysql", + "dify-vdb-analyticdb", + "dify-vdb-baidu", + "dify-vdb-chroma", + "dify-vdb-clickzetta", + "dify-vdb-couchbase", + "dify-vdb-elasticsearch", + "dify-vdb-hologres", + "dify-vdb-huawei-cloud", + "dify-vdb-iris", + "dify-vdb-lindorm", + "dify-vdb-matrixone", + "dify-vdb-milvus", + "dify-vdb-myscale", + "dify-vdb-oceanbase", + "dify-vdb-opengauss", + "dify-vdb-opensearch", + "dify-vdb-oracle", + "dify-vdb-pgvecto-rs", + "dify-vdb-pgvector", + "dify-vdb-qdrant", + "dify-vdb-relyt", + "dify-vdb-tablestore", + "dify-vdb-tencent", + "dify-vdb-tidb-on-qdrant", + "dify-vdb-tidb-vector", + "dify-vdb-upstash", + "dify-vdb-vastbase", + "dify-vdb-vikingdb", + "dify-vdb-weaviate", ] +vdb-alibabacloud-mysql = ["dify-vdb-alibabacloud-mysql"] +vdb-analyticdb = ["dify-vdb-analyticdb"] +vdb-baidu = ["dify-vdb-baidu"] +vdb-chroma = ["dify-vdb-chroma"] +vdb-clickzetta = ["dify-vdb-clickzetta"] +vdb-couchbase = ["dify-vdb-couchbase"] +vdb-elasticsearch = ["dify-vdb-elasticsearch"] +vdb-hologres = ["dify-vdb-hologres"] +vdb-huawei-cloud = ["dify-vdb-huawei-cloud"] +vdb-iris = ["dify-vdb-iris"] +vdb-lindorm = ["dify-vdb-lindorm"] +vdb-matrixone = ["dify-vdb-matrixone"] +vdb-milvus = ["dify-vdb-milvus"] +vdb-myscale = ["dify-vdb-myscale"] +vdb-oceanbase = ["dify-vdb-oceanbase"] +vdb-opengauss = ["dify-vdb-opengauss"] +vdb-opensearch = ["dify-vdb-opensearch"] +vdb-oracle = ["dify-vdb-oracle"] +vdb-pgvecto-rs = ["dify-vdb-pgvecto-rs"] +vdb-pgvector = ["dify-vdb-pgvector"] +vdb-qdrant = ["dify-vdb-qdrant"] +vdb-relyt = ["dify-vdb-relyt"] +vdb-tablestore = ["dify-vdb-tablestore"] +vdb-tencent = ["dify-vdb-tencent"] +vdb-tidb-on-qdrant = ["dify-vdb-tidb-on-qdrant"] +vdb-tidb-vector = ["dify-vdb-tidb-vector"] +vdb-upstash = ["dify-vdb-upstash"] +vdb-vastbase = ["dify-vdb-vastbase"] +vdb-vikingdb = ["dify-vdb-vikingdb"] +vdb-weaviate = ["dify-vdb-weaviate"] +# Optional client used by some tests / integrations (not a vector backend plugin) +vdb-xinference = ["xinference-client~=2.4.0"] [tool.pyrefly] project-includes = ["."] diff --git a/api/pyrefly-local-excludes.txt b/api/pyrefly-local-excludes.txt index 43f604c2de..3e5ece1fcf 100644 --- a/api/pyrefly-local-excludes.txt +++ b/api/pyrefly-local-excludes.txt @@ -45,31 +45,7 @@ core/plugin/backwards_invocation/model.py core/prompt/utils/extract_thread_messages.py core/rag/datasource/keyword/jieba/jieba.py core/rag/datasource/keyword/jieba/jieba_keyword_table_handler.py -core/rag/datasource/vdb/analyticdb/analyticdb_vector.py -core/rag/datasource/vdb/analyticdb/analyticdb_vector_openapi.py -core/rag/datasource/vdb/baidu/baidu_vector.py -core/rag/datasource/vdb/chroma/chroma_vector.py -core/rag/datasource/vdb/clickzetta/clickzetta_vector.py -core/rag/datasource/vdb/couchbase/couchbase_vector.py -core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py -core/rag/datasource/vdb/huawei/huawei_cloud_vector.py -core/rag/datasource/vdb/lindorm/lindorm_vector.py -core/rag/datasource/vdb/matrixone/matrixone_vector.py -core/rag/datasource/vdb/milvus/milvus_vector.py -core/rag/datasource/vdb/myscale/myscale_vector.py -core/rag/datasource/vdb/oceanbase/oceanbase_vector.py -core/rag/datasource/vdb/opensearch/opensearch_vector.py -core/rag/datasource/vdb/oracle/oraclevector.py -core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py -core/rag/datasource/vdb/relyt/relyt_vector.py -core/rag/datasource/vdb/tablestore/tablestore_vector.py -core/rag/datasource/vdb/tencent/tencent_vector.py -core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py -core/rag/datasource/vdb/tidb_on_qdrant/tidb_service.py -core/rag/datasource/vdb/tidb_vector/tidb_vector.py -core/rag/datasource/vdb/upstash/upstash_vector.py -core/rag/datasource/vdb/vikingdb/vikingdb_vector.py -core/rag/datasource/vdb/weaviate/weaviate_vector.py +providers/vdb/** core/rag/extractor/csv_extractor.py core/rag/extractor/excel_extractor.py core/rag/extractor/firecrawl/firecrawl_app.py diff --git a/api/pyrightconfig.json b/api/pyrightconfig.json index 424563bc11..c4582e891d 100644 --- a/api/pyrightconfig.json +++ b/api/pyrightconfig.json @@ -4,7 +4,8 @@ "tests/", ".venv", "migrations/", - "core/rag" + "core/rag", + "providers/", ], "typeCheckingMode": "strict", "allowedUntypedLibraries": [ @@ -36,7 +37,9 @@ "gmpy2", "sendgrid", "sendgrid.helpers.mail", - "holo_search_sdk.types" + "holo_search_sdk.types", + "dify_vdb_qdrant", + "dify_vdb_tidb_on_qdrant" ], "reportUnknownMemberType": "hint", "reportUnknownParameterType": "hint", diff --git a/api/schedule/create_tidb_serverless_task.py b/api/schedule/create_tidb_serverless_task.py index 6ceb3ef856..c4c203c150 100644 --- a/api/schedule/create_tidb_serverless_task.py +++ b/api/schedule/create_tidb_serverless_task.py @@ -1,11 +1,11 @@ import time import click +from dify_vdb_tidb_on_qdrant.tidb_service import TidbService from sqlalchemy import func, select import app from configs import dify_config -from core.rag.datasource.vdb.tidb_on_qdrant.tidb_service import TidbService from extensions.ext_database import db from models.dataset import TidbAuthBinding from models.enums import TidbAuthBindingStatus diff --git a/api/schedule/update_tidb_serverless_status_task.py b/api/schedule/update_tidb_serverless_status_task.py index 10003b1b97..46d1b85aa0 100644 --- a/api/schedule/update_tidb_serverless_status_task.py +++ b/api/schedule/update_tidb_serverless_status_task.py @@ -2,11 +2,11 @@ import time from collections.abc import Sequence import click +from dify_vdb_tidb_on_qdrant.tidb_service import TidbService from sqlalchemy import select import app from configs import dify_config -from core.rag.datasource.vdb.tidb_on_qdrant.tidb_service import TidbService from extensions.ext_database import db from models.dataset import TidbAuthBinding from models.enums import TidbAuthBindingStatus diff --git a/api/tests/__init__.py b/api/tests/__init__.py index e69de29bb2..ced6188ce8 100644 --- a/api/tests/__init__.py +++ b/api/tests/__init__.py @@ -0,0 +1 @@ +"""Test suite root package (enables ``import tests.integration_tests...`` with ``pythonpath = .``).""" diff --git a/api/tests/integration_tests/__init__.py b/api/tests/integration_tests/__init__.py index e69de29bb2..c66cd71b7e 100644 --- a/api/tests/integration_tests/__init__.py +++ b/api/tests/integration_tests/__init__.py @@ -0,0 +1 @@ +"""Integration tests package.""" diff --git a/api/tests/integration_tests/vdb/__mock/__init__.py b/api/tests/integration_tests/vdb/__mock/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/analyticdb/__init__.py b/api/tests/integration_tests/vdb/analyticdb/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/baidu/__init__.py b/api/tests/integration_tests/vdb/baidu/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/chroma/__init__.py b/api/tests/integration_tests/vdb/chroma/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/couchbase/__init__.py b/api/tests/integration_tests/vdb/couchbase/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/elasticsearch/__init__.py b/api/tests/integration_tests/vdb/elasticsearch/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/hologres/__init__.py b/api/tests/integration_tests/vdb/hologres/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/huawei/__init__.py b/api/tests/integration_tests/vdb/huawei/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/iris/__init__.py b/api/tests/integration_tests/vdb/iris/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/lindorm/__init__.py b/api/tests/integration_tests/vdb/lindorm/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/matrixone/__init__.py b/api/tests/integration_tests/vdb/matrixone/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/milvus/__init__.py b/api/tests/integration_tests/vdb/milvus/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/myscale/__init__.py b/api/tests/integration_tests/vdb/myscale/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/oceanbase/__init__.py b/api/tests/integration_tests/vdb/oceanbase/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/opengauss/__init__.py b/api/tests/integration_tests/vdb/opengauss/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/opensearch/__init__.py b/api/tests/integration_tests/vdb/opensearch/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/opensearch/test_opensearch.py b/api/tests/integration_tests/vdb/opensearch/test_opensearch.py deleted file mode 100644 index 81ebb1d2f7..0000000000 --- a/api/tests/integration_tests/vdb/opensearch/test_opensearch.py +++ /dev/null @@ -1,235 +0,0 @@ -from unittest.mock import MagicMock, patch - -import pytest - -from core.rag.datasource.vdb.field import Field -from core.rag.datasource.vdb.opensearch.opensearch_vector import OpenSearchConfig, OpenSearchVector -from core.rag.models.document import Document -from extensions import ext_redis - - -def get_example_text() -> str: - return "This is a sample text for testing purposes." - - -@pytest.fixture(scope="module") -def setup_mock_redis(): - ext_redis.redis_client.get = MagicMock(return_value=None) - ext_redis.redis_client.set = MagicMock(return_value=None) - - mock_redis_lock = MagicMock() - mock_redis_lock.__enter__ = MagicMock() - mock_redis_lock.__exit__ = MagicMock() - ext_redis.redis_client.lock = MagicMock(return_value=mock_redis_lock) - - -class TestOpenSearchConfig: - def test_to_opensearch_params(self): - config = OpenSearchConfig( - host="localhost", - port=9200, - secure=True, - user="admin", - password="password", - ) - - params = config.to_opensearch_params() - - assert params["hosts"] == [{"host": "localhost", "port": 9200}] - assert params["use_ssl"] is True - assert params["verify_certs"] is True - assert params["connection_class"].__name__ == "Urllib3HttpConnection" - assert params["http_auth"] == ("admin", "password") - - @patch("boto3.Session", autospec=True) - @patch("core.rag.datasource.vdb.opensearch.opensearch_vector.Urllib3AWSV4SignerAuth", autospec=True) - def test_to_opensearch_params_with_aws_managed_iam( - self, mock_aws_signer_auth: MagicMock, mock_boto_session: MagicMock - ): - mock_credentials = MagicMock() - mock_boto_session.return_value.get_credentials.return_value = mock_credentials - - mock_auth_instance = mock_aws_signer_auth.return_value - aws_region = "ap-southeast-2" - aws_service = "aoss" - host = f"aoss-endpoint.{aws_region}.aoss.amazonaws.com" - port = 9201 - - config = OpenSearchConfig( - host=host, - port=port, - secure=True, - auth_method="aws_managed_iam", - aws_region=aws_region, - aws_service=aws_service, - ) - - params = config.to_opensearch_params() - - assert params["hosts"] == [{"host": host, "port": port}] - assert params["use_ssl"] is True - assert params["verify_certs"] is True - assert params["connection_class"].__name__ == "Urllib3HttpConnection" - assert params["http_auth"] is mock_auth_instance - - mock_aws_signer_auth.assert_called_once_with( - credentials=mock_credentials, region=aws_region, service=aws_service - ) - assert mock_boto_session.return_value.get_credentials.called - - -class TestOpenSearchVector: - def setup_method(self): - self.collection_name = "test_collection" - self.example_doc_id = "example_doc_id" - self.vector = OpenSearchVector( - collection_name=self.collection_name, - config=OpenSearchConfig(host="localhost", port=9200, secure=False, user="admin", password="password"), - ) - self.vector._client = MagicMock() - - @pytest.mark.parametrize( - ("search_response", "expected_length", "expected_doc_id"), - [ - ( - { - "hits": { - "total": {"value": 1}, - "hits": [ - { - "_source": { - "page_content": get_example_text(), - "metadata": {"document_id": "example_doc_id"}, - } - } - ], - } - }, - 1, - "example_doc_id", - ), - ({"hits": {"total": {"value": 0}, "hits": []}}, 0, None), - ], - ) - def test_search_by_full_text(self, search_response, expected_length, expected_doc_id): - self.vector._client.search.return_value = search_response - - hits_by_full_text = self.vector.search_by_full_text(query=get_example_text()) - assert len(hits_by_full_text) == expected_length - if expected_length > 0: - assert hits_by_full_text[0].metadata["document_id"] == expected_doc_id - - def test_search_by_vector(self): - vector = [0.1] * 128 - mock_response = { - "hits": { - "total": {"value": 1}, - "hits": [ - { - "_source": { - Field.CONTENT_KEY: get_example_text(), - Field.METADATA_KEY: {"document_id": self.example_doc_id}, - }, - "_score": 1.0, - } - ], - } - } - self.vector._client.search.return_value = mock_response - - hits_by_vector = self.vector.search_by_vector(query_vector=vector) - - print("Hits by vector:", hits_by_vector) - print("Expected document ID:", self.example_doc_id) - print("Actual document ID:", hits_by_vector[0].metadata["document_id"] if hits_by_vector else "No hits") - - assert len(hits_by_vector) > 0, f"Expected at least one hit, got {len(hits_by_vector)}" - assert hits_by_vector[0].metadata["document_id"] == self.example_doc_id, ( - f"Expected document ID {self.example_doc_id}, got {hits_by_vector[0].metadata['document_id']}" - ) - - def test_get_ids_by_metadata_field(self): - mock_response = {"hits": {"total": {"value": 1}, "hits": [{"_id": "mock_id"}]}} - self.vector._client.search.return_value = mock_response - - doc = Document(page_content="Test content", metadata={"document_id": self.example_doc_id}) - embedding = [0.1] * 128 - - with patch("opensearchpy.helpers.bulk", autospec=True) as mock_bulk: - mock_bulk.return_value = ([], []) - self.vector.add_texts([doc], [embedding]) - - ids = self.vector.get_ids_by_metadata_field(key="document_id", value=self.example_doc_id) - assert len(ids) == 1 - assert ids[0] == "mock_id" - - def test_add_texts(self): - self.vector._client.index.return_value = {"result": "created"} - - doc = Document(page_content="Test content", metadata={"document_id": self.example_doc_id}) - embedding = [0.1] * 128 - - with patch("opensearchpy.helpers.bulk", autospec=True) as mock_bulk: - mock_bulk.return_value = ([], []) - self.vector.add_texts([doc], [embedding]) - - mock_response = {"hits": {"total": {"value": 1}, "hits": [{"_id": "mock_id"}]}} - self.vector._client.search.return_value = mock_response - - ids = self.vector.get_ids_by_metadata_field(key="document_id", value=self.example_doc_id) - assert len(ids) == 1 - assert ids[0] == "mock_id" - - def test_delete_nonexistent_index(self): - """Test deleting a non-existent index.""" - # Create a vector instance with a non-existent collection name - self.vector._client.indices.exists.return_value = False - - # Should not raise an exception - self.vector.delete() - - # Verify that exists was called but delete was not - self.vector._client.indices.exists.assert_called_once_with(index=self.collection_name.lower()) - self.vector._client.indices.delete.assert_not_called() - - def test_delete_existing_index(self): - """Test deleting an existing index.""" - self.vector._client.indices.exists.return_value = True - - self.vector.delete() - - # Verify both exists and delete were called - self.vector._client.indices.exists.assert_called_once_with(index=self.collection_name.lower()) - self.vector._client.indices.delete.assert_called_once_with(index=self.collection_name.lower()) - - -@pytest.mark.usefixtures("setup_mock_redis") -class TestOpenSearchVectorWithRedis: - def setup_method(self): - self.tester = TestOpenSearchVector() - - def test_search_by_full_text(self): - self.tester.setup_method() - search_response = { - "hits": { - "total": {"value": 1}, - "hits": [ - {"_source": {"page_content": get_example_text(), "metadata": {"document_id": "example_doc_id"}}} - ], - } - } - expected_length = 1 - expected_doc_id = "example_doc_id" - self.tester.test_search_by_full_text(search_response, expected_length, expected_doc_id) - - def test_get_ids_by_metadata_field(self): - self.tester.setup_method() - self.tester.test_get_ids_by_metadata_field() - - def test_add_texts(self): - self.tester.setup_method() - self.tester.test_add_texts() - - def test_search_by_vector(self): - self.tester.setup_method() - self.tester.test_search_by_vector() diff --git a/api/tests/integration_tests/vdb/oracle/__init__.py b/api/tests/integration_tests/vdb/oracle/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/pgvecto_rs/__init__.py b/api/tests/integration_tests/vdb/pgvecto_rs/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/pgvector/__init__.py b/api/tests/integration_tests/vdb/pgvector/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/pyvastbase/__init__.py b/api/tests/integration_tests/vdb/pyvastbase/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/qdrant/__init__.py b/api/tests/integration_tests/vdb/qdrant/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/tablestore/__init__.py b/api/tests/integration_tests/vdb/tablestore/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/tcvectordb/__init__.py b/api/tests/integration_tests/vdb/tcvectordb/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/tidb_vector/__init__.py b/api/tests/integration_tests/vdb/tidb_vector/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/upstash/__init__.py b/api/tests/integration_tests/vdb/upstash/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/vikingdb/__init__.py b/api/tests/integration_tests/vdb/vikingdb/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/integration_tests/vdb/weaviate/__init__.py b/api/tests/integration_tests/vdb/weaviate/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/pgvector/__init__.py b/api/tests/unit_tests/core/rag/datasource/vdb/pgvector/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/test_vector_factory.py b/api/tests/unit_tests/core/rag/datasource/vdb/test_vector_factory.py index 5a0e4dcd75..dc21d378a2 100644 --- a/api/tests/unit_tests/core/rag/datasource/vdb/test_vector_factory.py +++ b/api/tests/unit_tests/core/rag/datasource/vdb/test_vector_factory.py @@ -21,6 +21,9 @@ def _register_fake_factory_module(monkeypatch, module_path: str, class_name: str def vector_factory_module(): import importlib + from core.rag.datasource.vdb import vector_backend_registry as reg + + reg.clear_vector_factory_cache() import core.rag.datasource.vdb.vector_factory as module return importlib.reload(module) @@ -41,61 +44,62 @@ def test_gen_index_struct_dict(vector_factory_module): @pytest.mark.parametrize( ("vector_type", "module_path", "class_name"), [ - ("CHROMA", "core.rag.datasource.vdb.chroma.chroma_vector", "ChromaVectorFactory"), - ("MILVUS", "core.rag.datasource.vdb.milvus.milvus_vector", "MilvusVectorFactory"), + ("CHROMA", "dify_vdb_chroma.chroma_vector", "ChromaVectorFactory"), + ("MILVUS", "dify_vdb_milvus.milvus_vector", "MilvusVectorFactory"), ( "ALIBABACLOUD_MYSQL", - "core.rag.datasource.vdb.alibabacloud_mysql.alibabacloud_mysql_vector", + "dify_vdb_alibabacloud_mysql.alibabacloud_mysql_vector", "AlibabaCloudMySQLVectorFactory", ), - ("MYSCALE", "core.rag.datasource.vdb.myscale.myscale_vector", "MyScaleVectorFactory"), - ("PGVECTOR", "core.rag.datasource.vdb.pgvector.pgvector", "PGVectorFactory"), - ("VASTBASE", "core.rag.datasource.vdb.pyvastbase.vastbase_vector", "VastbaseVectorFactory"), - ("PGVECTO_RS", "core.rag.datasource.vdb.pgvecto_rs.pgvecto_rs", "PGVectoRSFactory"), - ("QDRANT", "core.rag.datasource.vdb.qdrant.qdrant_vector", "QdrantVectorFactory"), - ("RELYT", "core.rag.datasource.vdb.relyt.relyt_vector", "RelytVectorFactory"), + ("MYSCALE", "dify_vdb_myscale.myscale_vector", "MyScaleVectorFactory"), + ("PGVECTOR", "dify_vdb_pgvector.pgvector", "PGVectorFactory"), + ("VASTBASE", "dify_vdb_vastbase.vastbase_vector", "VastbaseVectorFactory"), + ("PGVECTO_RS", "dify_vdb_pgvecto_rs.pgvecto_rs", "PGVectoRSFactory"), + ("QDRANT", "dify_vdb_qdrant.qdrant_vector", "QdrantVectorFactory"), + ("RELYT", "dify_vdb_relyt.relyt_vector", "RelytVectorFactory"), ( "ELASTICSEARCH", - "core.rag.datasource.vdb.elasticsearch.elasticsearch_vector", + "dify_vdb_elasticsearch.elasticsearch_vector", "ElasticSearchVectorFactory", ), ( "ELASTICSEARCH_JA", - "core.rag.datasource.vdb.elasticsearch.elasticsearch_ja_vector", + "dify_vdb_elasticsearch.elasticsearch_ja_vector", "ElasticSearchJaVectorFactory", ), - ("TIDB_VECTOR", "core.rag.datasource.vdb.tidb_vector.tidb_vector", "TiDBVectorFactory"), - ("WEAVIATE", "core.rag.datasource.vdb.weaviate.weaviate_vector", "WeaviateVectorFactory"), - ("TENCENT", "core.rag.datasource.vdb.tencent.tencent_vector", "TencentVectorFactory"), - ("ORACLE", "core.rag.datasource.vdb.oracle.oraclevector", "OracleVectorFactory"), + ("TIDB_VECTOR", "dify_vdb_tidb_vector.tidb_vector", "TiDBVectorFactory"), + ("WEAVIATE", "dify_vdb_weaviate.weaviate_vector", "WeaviateVectorFactory"), + ("TENCENT", "dify_vdb_tencent.tencent_vector", "TencentVectorFactory"), + ("ORACLE", "dify_vdb_oracle.oraclevector", "OracleVectorFactory"), ( "OPENSEARCH", - "core.rag.datasource.vdb.opensearch.opensearch_vector", + "dify_vdb_opensearch.opensearch_vector", "OpenSearchVectorFactory", ), - ("ANALYTICDB", "core.rag.datasource.vdb.analyticdb.analyticdb_vector", "AnalyticdbVectorFactory"), - ("COUCHBASE", "core.rag.datasource.vdb.couchbase.couchbase_vector", "CouchbaseVectorFactory"), - ("BAIDU", "core.rag.datasource.vdb.baidu.baidu_vector", "BaiduVectorFactory"), - ("VIKINGDB", "core.rag.datasource.vdb.vikingdb.vikingdb_vector", "VikingDBVectorFactory"), - ("UPSTASH", "core.rag.datasource.vdb.upstash.upstash_vector", "UpstashVectorFactory"), + ("ANALYTICDB", "dify_vdb_analyticdb.analyticdb_vector", "AnalyticdbVectorFactory"), + ("COUCHBASE", "dify_vdb_couchbase.couchbase_vector", "CouchbaseVectorFactory"), + ("BAIDU", "dify_vdb_baidu.baidu_vector", "BaiduVectorFactory"), + ("VIKINGDB", "dify_vdb_vikingdb.vikingdb_vector", "VikingDBVectorFactory"), + ("UPSTASH", "dify_vdb_upstash.upstash_vector", "UpstashVectorFactory"), ( "TIDB_ON_QDRANT", - "core.rag.datasource.vdb.tidb_on_qdrant.tidb_on_qdrant_vector", + "dify_vdb_tidb_on_qdrant.tidb_on_qdrant_vector", "TidbOnQdrantVectorFactory", ), - ("LINDORM", "core.rag.datasource.vdb.lindorm.lindorm_vector", "LindormVectorStoreFactory"), - ("OCEANBASE", "core.rag.datasource.vdb.oceanbase.oceanbase_vector", "OceanBaseVectorFactory"), - ("SEEKDB", "core.rag.datasource.vdb.oceanbase.oceanbase_vector", "OceanBaseVectorFactory"), - ("OPENGAUSS", "core.rag.datasource.vdb.opengauss.opengauss", "OpenGaussFactory"), - ("TABLESTORE", "core.rag.datasource.vdb.tablestore.tablestore_vector", "TableStoreVectorFactory"), + ("LINDORM", "dify_vdb_lindorm.lindorm_vector", "LindormVectorStoreFactory"), + ("OCEANBASE", "dify_vdb_oceanbase.oceanbase_vector", "OceanBaseVectorFactory"), + ("SEEKDB", "dify_vdb_oceanbase.oceanbase_vector", "OceanBaseVectorFactory"), + ("OPENGAUSS", "dify_vdb_opengauss.opengauss", "OpenGaussFactory"), + ("TABLESTORE", "dify_vdb_tablestore.tablestore_vector", "TableStoreVectorFactory"), ( "HUAWEI_CLOUD", - "core.rag.datasource.vdb.huawei.huawei_cloud_vector", + "dify_vdb_huawei_cloud.huawei_cloud_vector", "HuaweiCloudVectorFactory", ), - ("MATRIXONE", "core.rag.datasource.vdb.matrixone.matrixone_vector", "MatrixoneVectorFactory"), - ("CLICKZETTA", "core.rag.datasource.vdb.clickzetta.clickzetta_vector", "ClickzettaVectorFactory"), - ("IRIS", "core.rag.datasource.vdb.iris.iris_vector", "IrisVectorFactory"), + ("MATRIXONE", "dify_vdb_matrixone.matrixone_vector", "MatrixoneVectorFactory"), + ("CLICKZETTA", "dify_vdb_clickzetta.clickzetta_vector", "ClickzettaVectorFactory"), + ("IRIS", "dify_vdb_iris.iris_vector", "IrisVectorFactory"), + ("HOLOGRES", "dify_vdb_hologres.hologres_vector", "HologresVectorFactory"), ], ) def test_get_vector_factory_supported(vector_factory_module, monkeypatch, vector_type, module_path, class_name): @@ -111,6 +115,34 @@ def test_get_vector_factory_unsupported(vector_factory_module): vector_factory_module.Vector.get_vector_factory("unknown") +class _PluginChromaFactory: + """Stub used only for entry-point override test.""" + + +def test_get_vector_factory_entry_point_overrides_builtin(vector_factory_module, monkeypatch): + from importlib.metadata import EntryPoint + + from core.rag.datasource.vdb import vector_backend_registry as reg + + reg.clear_vector_factory_cache() + ep = EntryPoint( + name="chroma", + value=f"{__name__}:_PluginChromaFactory", + group="dify.vector_backends", + ) + + class _FakeGroups: + def select(self, *, group: str): + if group == "dify.vector_backends": + return (ep,) + return () + + monkeypatch.setattr(reg, "entry_points", lambda: _FakeGroups()) + + result_cls = vector_factory_module.Vector.get_vector_factory(vector_factory_module.VectorType.CHROMA) + assert result_cls is _PluginChromaFactory + + def test_vector_init_uses_default_and_custom_attributes(vector_factory_module): dataset = SimpleNamespace(id="dataset-1") diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/tidb_on_qdrant/__init__.py b/api/tests/unit_tests/core/rag/datasource/vdb/tidb_on_qdrant/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/tests/unit_tests/core/rag/datasource/vdb/weaviate/__init__.py b/api/tests/unit_tests/core/rag/datasource/vdb/weaviate/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/uv.lock b/api/uv.lock index e73ecf867d..38a2ea21e2 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -8,6 +8,41 @@ resolution-markers = [ "sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", ] +[manifest] +members = [ + "dify-api", + "dify-vdb-alibabacloud-mysql", + "dify-vdb-analyticdb", + "dify-vdb-baidu", + "dify-vdb-chroma", + "dify-vdb-clickzetta", + "dify-vdb-couchbase", + "dify-vdb-elasticsearch", + "dify-vdb-hologres", + "dify-vdb-huawei-cloud", + "dify-vdb-iris", + "dify-vdb-lindorm", + "dify-vdb-matrixone", + "dify-vdb-milvus", + "dify-vdb-myscale", + "dify-vdb-oceanbase", + "dify-vdb-opengauss", + "dify-vdb-opensearch", + "dify-vdb-oracle", + "dify-vdb-pgvecto-rs", + "dify-vdb-pgvector", + "dify-vdb-qdrant", + "dify-vdb-relyt", + "dify-vdb-tablestore", + "dify-vdb-tencent", + "dify-vdb-tidb-on-qdrant", + "dify-vdb-tidb-vector", + "dify-vdb-upstash", + "dify-vdb-vastbase", + "dify-vdb-vikingdb", + "dify-vdb-weaviate", +] + [[package]] name = "abnf" version = "2.2.0" @@ -1354,7 +1389,6 @@ dependencies = [ { name = "transformers" }, { name = "unstructured", extra = ["docx", "epub", "md", "ppt", "pptx"] }, { name = "weave" }, - { name = "weaviate-client" }, { name = "yarl" }, ] @@ -1422,6 +1456,7 @@ dev = [ { name = "types-tensorflow" }, { name = "types-tqdm" }, { name = "types-ujson" }, + { name = "xinference-client" }, ] storage = [ { name = "azure-storage-blob" }, @@ -1438,32 +1473,129 @@ tools = [ { name = "cloudscraper" }, { name = "nltk" }, ] -vdb = [ - { name = "alibabacloud-gpdb20160503" }, - { name = "alibabacloud-tea-openapi" }, - { name = "chromadb" }, - { name = "clickhouse-connect" }, - { name = "clickzetta-connector-python" }, - { name = "couchbase" }, - { name = "elasticsearch" }, - { name = "holo-search-sdk" }, - { name = "intersystems-irispython" }, - { name = "mo-vector" }, - { name = "mysql-connector-python" }, - { name = "opensearch-py" }, - { name = "oracledb" }, - { name = "pgvecto-rs", extra = ["sqlalchemy"] }, - { name = "pgvector" }, - { name = "pymilvus" }, - { name = "pymochow" }, - { name = "pyobvector" }, - { name = "qdrant-client" }, - { name = "tablestore" }, - { name = "tcvectordb" }, - { name = "tidb-vector" }, - { name = "upstash-vector" }, - { name = "volcengine-compat" }, - { name = "weaviate-client" }, +vdb-alibabacloud-mysql = [ + { name = "dify-vdb-alibabacloud-mysql" }, +] +vdb-all = [ + { name = "dify-vdb-alibabacloud-mysql" }, + { name = "dify-vdb-analyticdb" }, + { name = "dify-vdb-baidu" }, + { name = "dify-vdb-chroma" }, + { name = "dify-vdb-clickzetta" }, + { name = "dify-vdb-couchbase" }, + { name = "dify-vdb-elasticsearch" }, + { name = "dify-vdb-hologres" }, + { name = "dify-vdb-huawei-cloud" }, + { name = "dify-vdb-iris" }, + { name = "dify-vdb-lindorm" }, + { name = "dify-vdb-matrixone" }, + { name = "dify-vdb-milvus" }, + { name = "dify-vdb-myscale" }, + { name = "dify-vdb-oceanbase" }, + { name = "dify-vdb-opengauss" }, + { name = "dify-vdb-opensearch" }, + { name = "dify-vdb-oracle" }, + { name = "dify-vdb-pgvecto-rs" }, + { name = "dify-vdb-pgvector" }, + { name = "dify-vdb-qdrant" }, + { name = "dify-vdb-relyt" }, + { name = "dify-vdb-tablestore" }, + { name = "dify-vdb-tencent" }, + { name = "dify-vdb-tidb-on-qdrant" }, + { name = "dify-vdb-tidb-vector" }, + { name = "dify-vdb-upstash" }, + { name = "dify-vdb-vastbase" }, + { name = "dify-vdb-vikingdb" }, + { name = "dify-vdb-weaviate" }, +] +vdb-analyticdb = [ + { name = "dify-vdb-analyticdb" }, +] +vdb-baidu = [ + { name = "dify-vdb-baidu" }, +] +vdb-chroma = [ + { name = "dify-vdb-chroma" }, +] +vdb-clickzetta = [ + { name = "dify-vdb-clickzetta" }, +] +vdb-couchbase = [ + { name = "dify-vdb-couchbase" }, +] +vdb-elasticsearch = [ + { name = "dify-vdb-elasticsearch" }, +] +vdb-hologres = [ + { name = "dify-vdb-hologres" }, +] +vdb-huawei-cloud = [ + { name = "dify-vdb-huawei-cloud" }, +] +vdb-iris = [ + { name = "dify-vdb-iris" }, +] +vdb-lindorm = [ + { name = "dify-vdb-lindorm" }, +] +vdb-matrixone = [ + { name = "dify-vdb-matrixone" }, +] +vdb-milvus = [ + { name = "dify-vdb-milvus" }, +] +vdb-myscale = [ + { name = "dify-vdb-myscale" }, +] +vdb-oceanbase = [ + { name = "dify-vdb-oceanbase" }, +] +vdb-opengauss = [ + { name = "dify-vdb-opengauss" }, +] +vdb-opensearch = [ + { name = "dify-vdb-opensearch" }, +] +vdb-oracle = [ + { name = "dify-vdb-oracle" }, +] +vdb-pgvecto-rs = [ + { name = "dify-vdb-pgvecto-rs" }, +] +vdb-pgvector = [ + { name = "dify-vdb-pgvector" }, +] +vdb-qdrant = [ + { name = "dify-vdb-qdrant" }, +] +vdb-relyt = [ + { name = "dify-vdb-relyt" }, +] +vdb-tablestore = [ + { name = "dify-vdb-tablestore" }, +] +vdb-tencent = [ + { name = "dify-vdb-tencent" }, +] +vdb-tidb-on-qdrant = [ + { name = "dify-vdb-tidb-on-qdrant" }, +] +vdb-tidb-vector = [ + { name = "dify-vdb-tidb-vector" }, +] +vdb-upstash = [ + { name = "dify-vdb-upstash" }, +] +vdb-vastbase = [ + { name = "dify-vdb-vastbase" }, +] +vdb-vikingdb = [ + { name = "dify-vdb-vikingdb" }, +] +vdb-weaviate = [ + { name = "dify-vdb-weaviate" }, +] +vdb-xinference = [ { name = "xinference-client" }, ] @@ -1554,7 +1686,6 @@ requires-dist = [ { name = "transformers", specifier = "~=5.3.0" }, { name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.21.5" }, { name = "weave", specifier = ">=0.52.36" }, - { name = "weaviate-client", specifier = "==4.20.5" }, { name = "yarl", specifier = "~=1.23.0" }, ] @@ -1622,6 +1753,7 @@ dev = [ { name = "types-tensorflow", specifier = ">=2.18.0.20260408" }, { name = "types-tqdm", specifier = ">=4.67.3.20260408" }, { name = "types-ujson", specifier = ">=5.10.0" }, + { name = "xinference-client", specifier = "~=2.4.0" }, ] storage = [ { name = "azure-storage-blob", specifier = "==12.28.0" }, @@ -1638,35 +1770,402 @@ tools = [ { name = "cloudscraper", specifier = "~=1.2.71" }, { name = "nltk", specifier = "~=3.9.1" }, ] -vdb = [ +vdb-alibabacloud-mysql = [{ name = "dify-vdb-alibabacloud-mysql", editable = "providers/vdb/vdb-alibabacloud-mysql" }] +vdb-all = [ + { name = "dify-vdb-alibabacloud-mysql", editable = "providers/vdb/vdb-alibabacloud-mysql" }, + { name = "dify-vdb-analyticdb", editable = "providers/vdb/vdb-analyticdb" }, + { name = "dify-vdb-baidu", editable = "providers/vdb/vdb-baidu" }, + { name = "dify-vdb-chroma", editable = "providers/vdb/vdb-chroma" }, + { name = "dify-vdb-clickzetta", editable = "providers/vdb/vdb-clickzetta" }, + { name = "dify-vdb-couchbase", editable = "providers/vdb/vdb-couchbase" }, + { name = "dify-vdb-elasticsearch", editable = "providers/vdb/vdb-elasticsearch" }, + { name = "dify-vdb-hologres", editable = "providers/vdb/vdb-hologres" }, + { name = "dify-vdb-huawei-cloud", editable = "providers/vdb/vdb-huawei-cloud" }, + { name = "dify-vdb-iris", editable = "providers/vdb/vdb-iris" }, + { name = "dify-vdb-lindorm", editable = "providers/vdb/vdb-lindorm" }, + { name = "dify-vdb-matrixone", editable = "providers/vdb/vdb-matrixone" }, + { name = "dify-vdb-milvus", editable = "providers/vdb/vdb-milvus" }, + { name = "dify-vdb-myscale", editable = "providers/vdb/vdb-myscale" }, + { name = "dify-vdb-oceanbase", editable = "providers/vdb/vdb-oceanbase" }, + { name = "dify-vdb-opengauss", editable = "providers/vdb/vdb-opengauss" }, + { name = "dify-vdb-opensearch", editable = "providers/vdb/vdb-opensearch" }, + { name = "dify-vdb-oracle", editable = "providers/vdb/vdb-oracle" }, + { name = "dify-vdb-pgvecto-rs", editable = "providers/vdb/vdb-pgvecto-rs" }, + { name = "dify-vdb-pgvector", editable = "providers/vdb/vdb-pgvector" }, + { name = "dify-vdb-qdrant", editable = "providers/vdb/vdb-qdrant" }, + { name = "dify-vdb-relyt", editable = "providers/vdb/vdb-relyt" }, + { name = "dify-vdb-tablestore", editable = "providers/vdb/vdb-tablestore" }, + { name = "dify-vdb-tencent", editable = "providers/vdb/vdb-tencent" }, + { name = "dify-vdb-tidb-on-qdrant", editable = "providers/vdb/vdb-tidb-on-qdrant" }, + { name = "dify-vdb-tidb-vector", editable = "providers/vdb/vdb-tidb-vector" }, + { name = "dify-vdb-upstash", editable = "providers/vdb/vdb-upstash" }, + { name = "dify-vdb-vastbase", editable = "providers/vdb/vdb-vastbase" }, + { name = "dify-vdb-vikingdb", editable = "providers/vdb/vdb-vikingdb" }, + { name = "dify-vdb-weaviate", editable = "providers/vdb/vdb-weaviate" }, +] +vdb-analyticdb = [{ name = "dify-vdb-analyticdb", editable = "providers/vdb/vdb-analyticdb" }] +vdb-baidu = [{ name = "dify-vdb-baidu", editable = "providers/vdb/vdb-baidu" }] +vdb-chroma = [{ name = "dify-vdb-chroma", editable = "providers/vdb/vdb-chroma" }] +vdb-clickzetta = [{ name = "dify-vdb-clickzetta", editable = "providers/vdb/vdb-clickzetta" }] +vdb-couchbase = [{ name = "dify-vdb-couchbase", editable = "providers/vdb/vdb-couchbase" }] +vdb-elasticsearch = [{ name = "dify-vdb-elasticsearch", editable = "providers/vdb/vdb-elasticsearch" }] +vdb-hologres = [{ name = "dify-vdb-hologres", editable = "providers/vdb/vdb-hologres" }] +vdb-huawei-cloud = [{ name = "dify-vdb-huawei-cloud", editable = "providers/vdb/vdb-huawei-cloud" }] +vdb-iris = [{ name = "dify-vdb-iris", editable = "providers/vdb/vdb-iris" }] +vdb-lindorm = [{ name = "dify-vdb-lindorm", editable = "providers/vdb/vdb-lindorm" }] +vdb-matrixone = [{ name = "dify-vdb-matrixone", editable = "providers/vdb/vdb-matrixone" }] +vdb-milvus = [{ name = "dify-vdb-milvus", editable = "providers/vdb/vdb-milvus" }] +vdb-myscale = [{ name = "dify-vdb-myscale", editable = "providers/vdb/vdb-myscale" }] +vdb-oceanbase = [{ name = "dify-vdb-oceanbase", editable = "providers/vdb/vdb-oceanbase" }] +vdb-opengauss = [{ name = "dify-vdb-opengauss", editable = "providers/vdb/vdb-opengauss" }] +vdb-opensearch = [{ name = "dify-vdb-opensearch", editable = "providers/vdb/vdb-opensearch" }] +vdb-oracle = [{ name = "dify-vdb-oracle", editable = "providers/vdb/vdb-oracle" }] +vdb-pgvecto-rs = [{ name = "dify-vdb-pgvecto-rs", editable = "providers/vdb/vdb-pgvecto-rs" }] +vdb-pgvector = [{ name = "dify-vdb-pgvector", editable = "providers/vdb/vdb-pgvector" }] +vdb-qdrant = [{ name = "dify-vdb-qdrant", editable = "providers/vdb/vdb-qdrant" }] +vdb-relyt = [{ name = "dify-vdb-relyt", editable = "providers/vdb/vdb-relyt" }] +vdb-tablestore = [{ name = "dify-vdb-tablestore", editable = "providers/vdb/vdb-tablestore" }] +vdb-tencent = [{ name = "dify-vdb-tencent", editable = "providers/vdb/vdb-tencent" }] +vdb-tidb-on-qdrant = [{ name = "dify-vdb-tidb-on-qdrant", editable = "providers/vdb/vdb-tidb-on-qdrant" }] +vdb-tidb-vector = [{ name = "dify-vdb-tidb-vector", editable = "providers/vdb/vdb-tidb-vector" }] +vdb-upstash = [{ name = "dify-vdb-upstash", editable = "providers/vdb/vdb-upstash" }] +vdb-vastbase = [{ name = "dify-vdb-vastbase", editable = "providers/vdb/vdb-vastbase" }] +vdb-vikingdb = [{ name = "dify-vdb-vikingdb", editable = "providers/vdb/vdb-vikingdb" }] +vdb-weaviate = [{ name = "dify-vdb-weaviate", editable = "providers/vdb/vdb-weaviate" }] +vdb-xinference = [{ name = "xinference-client", specifier = "~=2.4.0" }] + +[[package]] +name = "dify-vdb-alibabacloud-mysql" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-alibabacloud-mysql" } +dependencies = [ + { name = "mysql-connector-python" }, +] + +[package.metadata] +requires-dist = [{ name = "mysql-connector-python", specifier = ">=9.3.0" }] + +[[package]] +name = "dify-vdb-analyticdb" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-analyticdb" } +dependencies = [ + { name = "alibabacloud-gpdb20160503" }, + { name = "alibabacloud-tea-openapi" }, + { name = "clickhouse-connect" }, +] + +[package.metadata] +requires-dist = [ { name = "alibabacloud-gpdb20160503", specifier = "~=5.2.0" }, { name = "alibabacloud-tea-openapi", specifier = "~=0.4.3" }, - { name = "chromadb", specifier = "==0.5.20" }, { name = "clickhouse-connect", specifier = "~=0.15.0" }, - { name = "clickzetta-connector-python", specifier = ">=0.8.102" }, - { name = "couchbase", specifier = "~=4.6.0" }, - { name = "elasticsearch", specifier = "==8.14.0" }, - { name = "holo-search-sdk", specifier = ">=0.4.2" }, - { name = "intersystems-irispython", specifier = ">=5.1.0" }, - { name = "mo-vector", specifier = "~=0.1.13" }, - { name = "mysql-connector-python", specifier = ">=9.3.0" }, - { name = "opensearch-py", specifier = "==3.1.0" }, - { name = "oracledb", specifier = "==3.4.2" }, - { name = "pgvecto-rs", extras = ["sqlalchemy"], specifier = "~=0.2.2" }, - { name = "pgvector", specifier = "==0.4.2" }, - { name = "pymilvus", specifier = "~=2.6.12" }, - { name = "pymochow", specifier = "==2.4.0" }, - { name = "pyobvector", specifier = "~=0.2.17" }, - { name = "qdrant-client", specifier = "==1.9.0" }, - { name = "tablestore", specifier = "==6.4.4" }, - { name = "tcvectordb", specifier = "~=2.1.0" }, - { name = "tidb-vector", specifier = "==0.0.15" }, - { name = "upstash-vector", specifier = "==0.8.0" }, - { name = "volcengine-compat", specifier = "~=1.0.0" }, - { name = "weaviate-client", specifier = "==4.20.5" }, - { name = "xinference-client", specifier = "~=2.4.0" }, ] +[[package]] +name = "dify-vdb-baidu" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-baidu" } +dependencies = [ + { name = "pymochow" }, +] + +[package.metadata] +requires-dist = [{ name = "pymochow", specifier = "==2.4.0" }] + +[[package]] +name = "dify-vdb-chroma" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-chroma" } +dependencies = [ + { name = "chromadb" }, +] + +[package.metadata] +requires-dist = [{ name = "chromadb", specifier = "==0.5.20" }] + +[[package]] +name = "dify-vdb-clickzetta" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-clickzetta" } +dependencies = [ + { name = "clickzetta-connector-python" }, +] + +[package.metadata] +requires-dist = [{ name = "clickzetta-connector-python", specifier = ">=0.8.102" }] + +[[package]] +name = "dify-vdb-couchbase" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-couchbase" } +dependencies = [ + { name = "couchbase" }, +] + +[package.metadata] +requires-dist = [{ name = "couchbase", specifier = "~=4.6.0" }] + +[[package]] +name = "dify-vdb-elasticsearch" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-elasticsearch" } +dependencies = [ + { name = "elasticsearch" }, +] + +[package.metadata] +requires-dist = [{ name = "elasticsearch", specifier = "==8.14.0" }] + +[[package]] +name = "dify-vdb-hologres" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-hologres" } +dependencies = [ + { name = "holo-search-sdk" }, +] + +[package.metadata] +requires-dist = [{ name = "holo-search-sdk", specifier = ">=0.4.2" }] + +[[package]] +name = "dify-vdb-huawei-cloud" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-huawei-cloud" } +dependencies = [ + { name = "elasticsearch" }, +] + +[package.metadata] +requires-dist = [{ name = "elasticsearch", specifier = "==8.14.0" }] + +[[package]] +name = "dify-vdb-iris" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-iris" } +dependencies = [ + { name = "intersystems-irispython" }, +] + +[package.metadata] +requires-dist = [{ name = "intersystems-irispython", specifier = ">=5.1.0" }] + +[[package]] +name = "dify-vdb-lindorm" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-lindorm" } +dependencies = [ + { name = "opensearch-py" }, + { name = "tenacity" }, +] + +[package.metadata] +requires-dist = [ + { name = "opensearch-py", specifier = "==3.1.0" }, + { name = "tenacity", specifier = ">=8.0.0" }, +] + +[[package]] +name = "dify-vdb-matrixone" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-matrixone" } +dependencies = [ + { name = "mo-vector" }, +] + +[package.metadata] +requires-dist = [{ name = "mo-vector", specifier = "~=0.1.13" }] + +[[package]] +name = "dify-vdb-milvus" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-milvus" } +dependencies = [ + { name = "pymilvus" }, +] + +[package.metadata] +requires-dist = [{ name = "pymilvus", specifier = "~=2.6.12" }] + +[[package]] +name = "dify-vdb-myscale" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-myscale" } +dependencies = [ + { name = "clickhouse-connect" }, +] + +[package.metadata] +requires-dist = [{ name = "clickhouse-connect", specifier = "~=0.15.0" }] + +[[package]] +name = "dify-vdb-oceanbase" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-oceanbase" } +dependencies = [ + { name = "mysql-connector-python" }, + { name = "pyobvector" }, +] + +[package.metadata] +requires-dist = [ + { name = "mysql-connector-python", specifier = ">=9.3.0" }, + { name = "pyobvector", specifier = "~=0.2.17" }, +] + +[[package]] +name = "dify-vdb-opengauss" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-opengauss" } + +[[package]] +name = "dify-vdb-opensearch" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-opensearch" } +dependencies = [ + { name = "opensearch-py" }, +] + +[package.metadata] +requires-dist = [{ name = "opensearch-py", specifier = "==3.1.0" }] + +[[package]] +name = "dify-vdb-oracle" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-oracle" } +dependencies = [ + { name = "oracledb" }, +] + +[package.metadata] +requires-dist = [{ name = "oracledb", specifier = "==3.4.2" }] + +[[package]] +name = "dify-vdb-pgvecto-rs" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-pgvecto-rs" } +dependencies = [ + { name = "pgvecto-rs", extra = ["sqlalchemy"] }, +] + +[package.metadata] +requires-dist = [{ name = "pgvecto-rs", extras = ["sqlalchemy"], specifier = "~=0.2.2" }] + +[[package]] +name = "dify-vdb-pgvector" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-pgvector" } +dependencies = [ + { name = "pgvector" }, +] + +[package.metadata] +requires-dist = [{ name = "pgvector", specifier = "==0.4.2" }] + +[[package]] +name = "dify-vdb-qdrant" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-qdrant" } +dependencies = [ + { name = "qdrant-client" }, +] + +[package.metadata] +requires-dist = [{ name = "qdrant-client", specifier = "==1.9.0" }] + +[[package]] +name = "dify-vdb-relyt" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-relyt" } + +[[package]] +name = "dify-vdb-tablestore" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-tablestore" } +dependencies = [ + { name = "tablestore" }, +] + +[package.metadata] +requires-dist = [{ name = "tablestore", specifier = "==6.4.4" }] + +[[package]] +name = "dify-vdb-tencent" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-tencent" } +dependencies = [ + { name = "tcvectordb" }, +] + +[package.metadata] +requires-dist = [{ name = "tcvectordb", specifier = "~=2.1.0" }] + +[[package]] +name = "dify-vdb-tidb-on-qdrant" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-tidb-on-qdrant" } +dependencies = [ + { name = "qdrant-client" }, +] + +[package.metadata] +requires-dist = [{ name = "qdrant-client", specifier = "==1.9.0" }] + +[[package]] +name = "dify-vdb-tidb-vector" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-tidb-vector" } +dependencies = [ + { name = "tidb-vector" }, +] + +[package.metadata] +requires-dist = [{ name = "tidb-vector", specifier = "==0.0.15" }] + +[[package]] +name = "dify-vdb-upstash" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-upstash" } +dependencies = [ + { name = "upstash-vector" }, +] + +[package.metadata] +requires-dist = [{ name = "upstash-vector", specifier = "==0.8.0" }] + +[[package]] +name = "dify-vdb-vastbase" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-vastbase" } +dependencies = [ + { name = "pyobvector" }, +] + +[package.metadata] +requires-dist = [{ name = "pyobvector", specifier = "~=0.2.17" }] + +[[package]] +name = "dify-vdb-vikingdb" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-vikingdb" } +dependencies = [ + { name = "volcengine-compat" }, +] + +[package.metadata] +requires-dist = [{ name = "volcengine-compat", specifier = "~=1.0.0" }] + +[[package]] +name = "dify-vdb-weaviate" +version = "0.0.1" +source = { editable = "providers/vdb/vdb-weaviate" } +dependencies = [ + { name = "weaviate-client" }, +] + +[package.metadata] +requires-dist = [{ name = "weaviate-client", specifier = "==4.20.5" }] + [[package]] name = "diskcache-weave" version = "5.6.3.post1" diff --git a/dev/pytest/pytest_unit_tests.sh b/dev/pytest/pytest_unit_tests.sh index 1d4ff4d86f..962532de81 100755 --- a/dev/pytest/pytest_unit_tests.sh +++ b/dev/pytest/pytest_unit_tests.sh @@ -10,7 +10,11 @@ PYTEST_XDIST_ARGS="${PYTEST_XDIST_ARGS:--n auto}" # Run most tests in parallel (excluding controllers which have import conflicts with xdist) # Controller tests have module-level side effects (Flask route registration) that cause # race conditions when imported concurrently by multiple pytest-xdist workers. -pytest --timeout "${PYTEST_TIMEOUT}" ${PYTEST_XDIST_ARGS} api/tests/unit_tests --ignore=api/tests/unit_tests/controllers +pytest --timeout "${PYTEST_TIMEOUT}" ${PYTEST_XDIST_ARGS} \ + api/tests/unit_tests \ + api/providers/vdb/*/tests/unit_tests \ + --ignore=api/tests/unit_tests/controllers # Run controller tests sequentially to avoid import race conditions pytest --timeout "${PYTEST_TIMEOUT}" --cov-append api/tests/unit_tests/controllers + diff --git a/dev/pytest/pytest_vdb.sh b/dev/pytest/pytest_vdb.sh index 126aebf7bd..c1f129bee0 100755 --- a/dev/pytest/pytest_vdb.sh +++ b/dev/pytest/pytest_vdb.sh @@ -6,19 +6,7 @@ cd "$SCRIPT_DIR/../.." PYTEST_TIMEOUT="${PYTEST_TIMEOUT:-180}" -pytest --timeout "${PYTEST_TIMEOUT}" api/tests/integration_tests/vdb/chroma \ - api/tests/integration_tests/vdb/milvus \ - api/tests/integration_tests/vdb/pgvecto_rs \ - api/tests/integration_tests/vdb/pgvector \ - api/tests/integration_tests/vdb/qdrant \ - api/tests/integration_tests/vdb/weaviate \ - api/tests/integration_tests/vdb/elasticsearch \ - api/tests/integration_tests/vdb/vikingdb \ - api/tests/integration_tests/vdb/baidu \ - api/tests/integration_tests/vdb/tcvectordb \ - api/tests/integration_tests/vdb/upstash \ - api/tests/integration_tests/vdb/couchbase \ - api/tests/integration_tests/vdb/oceanbase \ - api/tests/integration_tests/vdb/tidb_vector \ - api/tests/integration_tests/vdb/huawei \ - api/tests/integration_tests/vdb/hologres \ +uv sync --project api --group dev + +uv run --project api pytest --timeout "${PYTEST_TIMEOUT}" \ + api/providers/vdb/*/tests/integration_tests \ diff --git a/docker/dify-env-sync.py b/docker/dify-env-sync.py index d7c762748c..afa39d8451 100755 --- a/docker/dify-env-sync.py +++ b/docker/dify-env-sync.py @@ -172,7 +172,10 @@ def analyze_value_change(current: str, recommended: str) -> str | None: return None # Boolean comparison - if current.lower() in {"true", "false"} and recommended.lower() in {"true", "false"}: + if current.lower() in {"true", "false"} and recommended.lower() in { + "true", + "false", + }: if current.lower() != recommended.lower(): return colorize(BLUE, f" -> Boolean value change ({current} -> {recommended})") return None @@ -187,7 +190,10 @@ def analyze_value_change(current: str, recommended: str) -> str | None: # String length if len(current) != len(recommended): - return colorize(YELLOW, f" -> String length change ({len(current)} -> {len(recommended)} characters)") + return colorize( + YELLOW, + f" -> String length change ({len(current)} -> {len(recommended)} characters)", + ) return None @@ -311,7 +317,10 @@ def sync_env_file(work_dir: Path, env_vars: dict[str, str], diffs: dict[str, tup env_var_pattern = re.compile(r"^([A-Za-z_][A-Za-z0-9_]*)\s*=") - with example_file.open(encoding="utf-8") as src, new_env_file.open("w", encoding="utf-8") as dst: + with ( + example_file.open(encoding="utf-8") as src, + new_env_file.open("w", encoding="utf-8") as dst, + ): for line in src: raw_line = line.rstrip("\n") match = env_var_pattern.match(raw_line)