From 9d7ea953ea426774c9322e6bf7c21311f84c4679 Mon Sep 17 00:00:00 2001 From: zhangx1n Date: Wed, 25 Mar 2026 11:32:34 +0800 Subject: [PATCH] remove analyticdb --- api/.env.example | 16 +- api/commands/vector.py | 1 - api/configs/middleware/__init__.py | 2 - .../middleware/vdb/analyticdb_config.py | 49 --- api/controllers/console/datasets/datasets.py | 1 - .../rag/datasource/vdb/analyticdb/__init__.py | 0 .../vdb/analyticdb/analyticdb_vector.py | 104 ------ .../analyticdb/analyticdb_vector_openapi.py | 321 ------------------ .../vdb/analyticdb/analyticdb_vector_sql.py | 275 --------------- api/core/rag/datasource/vdb/vector_factory.py | 4 - api/core/rag/datasource/vdb/vector_type.py | 1 - api/pyproject.toml | 2 - api/pyrefly-local-excludes.txt | 2 - .../vdb/analyticdb/test_analyticdb.py | 49 --- api/uv.lock | 175 ---------- docker/.env.example | 16 +- docker/docker-compose.yaml | 12 - 17 files changed, 2 insertions(+), 1028 deletions(-) delete mode 100644 api/configs/middleware/vdb/analyticdb_config.py delete mode 100644 api/core/rag/datasource/vdb/analyticdb/__init__.py delete mode 100644 api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py delete mode 100644 api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_openapi.py delete mode 100644 api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_sql.py delete mode 100644 api/tests/integration_tests/vdb/analyticdb/test_analyticdb.py diff --git a/api/.env.example b/api/.env.example index 40e1c2dfdf..560e2c2294 100644 --- a/api/.env.example +++ b/api/.env.example @@ -180,7 +180,7 @@ CONSOLE_CORS_ALLOW_ORIGINS=http://localhost:3000,* COOKIE_DOMAIN= # Vector database configuration -# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `hologres`. +# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `hologres`. VECTOR_STORE=weaviate # Prefix used to create collection name in vector database VECTOR_INDEX_NAME_PREFIX=Vector_index @@ -321,20 +321,6 @@ CHROMA_DATABASE=default_database CHROMA_AUTH_PROVIDER=chromadb.auth.token_authn.TokenAuthenticationServerProvider CHROMA_AUTH_CREDENTIALS=difyai123456 -# AnalyticDB configuration -ANALYTICDB_KEY_ID=your-ak -ANALYTICDB_KEY_SECRET=your-sk -ANALYTICDB_REGION_ID=cn-hangzhou -ANALYTICDB_INSTANCE_ID=gp-ab123456 -ANALYTICDB_ACCOUNT=testaccount -ANALYTICDB_PASSWORD=testpassword -ANALYTICDB_NAMESPACE=dify -ANALYTICDB_NAMESPACE_PASSWORD=difypassword -ANALYTICDB_HOST=gp-test.aliyuncs.com -ANALYTICDB_PORT=5432 -ANALYTICDB_MIN_CONNECTION=1 -ANALYTICDB_MAX_CONNECTION=5 - # OpenSearch configuration OPENSEARCH_HOST=127.0.0.1 OPENSEARCH_PORT=9200 diff --git a/api/commands/vector.py b/api/commands/vector.py index 5f41d469c8..37add64bf7 100644 --- a/api/commands/vector.py +++ b/api/commands/vector.py @@ -159,7 +159,6 @@ def migrate_knowledge_vector_database(): VectorType.MATRIXONE, } lower_collection_vector_types = { - VectorType.ANALYTICDB, VectorType.HOLOGRES, VectorType.CHROMA, VectorType.MYSCALE, diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py index 723cd82fb1..e280095435 100644 --- a/api/configs/middleware/__init__.py +++ b/api/configs/middleware/__init__.py @@ -19,7 +19,6 @@ from .storage.supabase_storage_config import SupabaseStorageConfig from .storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig from .vdb.alibabacloud_mysql_config import AlibabaCloudMySQLConfig -from .vdb.analyticdb_config import AnalyticdbConfig from .vdb.baidu_vector_config import BaiduVectorDBConfig from .vdb.chroma_config import ChromaConfig from .vdb.couchbase_config import CouchbaseConfig @@ -341,7 +340,6 @@ class MiddlewareConfig( VolcengineTOSStorageConfig, # configs of vdb and vdb providers VectorStoreConfig, - AnalyticdbConfig, ChromaConfig, HologresConfig, HuaweiCloudConfig, diff --git a/api/configs/middleware/vdb/analyticdb_config.py b/api/configs/middleware/vdb/analyticdb_config.py deleted file mode 100644 index 539b9c0963..0000000000 --- a/api/configs/middleware/vdb/analyticdb_config.py +++ /dev/null @@ -1,49 +0,0 @@ -from pydantic import Field, PositiveInt -from pydantic_settings import BaseSettings - - -class AnalyticdbConfig(BaseSettings): - """ - Configuration for connecting to Alibaba Cloud AnalyticDB for PostgreSQL. - Refer to the following documentation for details on obtaining credentials: - https://www.alibabacloud.com/help/en/analyticdb-for-postgresql/getting-started/create-an-instance-instances-with-vector-engine-optimization-enabled - """ - - ANALYTICDB_KEY_ID: str | None = Field( - default=None, description="The Access Key ID provided by Alibaba Cloud for API authentication." - ) - ANALYTICDB_KEY_SECRET: str | None = Field( - default=None, description="The Secret Access Key corresponding to the Access Key ID for secure API access." - ) - ANALYTICDB_REGION_ID: str | None = Field( - default=None, - description="The region where the AnalyticDB instance is deployed (e.g., 'cn-hangzhou', 'ap-southeast-1').", - ) - ANALYTICDB_INSTANCE_ID: str | None = Field( - default=None, - description="The unique identifier of the AnalyticDB instance you want to connect to.", - ) - ANALYTICDB_ACCOUNT: str | None = Field( - default=None, - description="The account name used to log in to the AnalyticDB instance" - " (usually the initial account created with the instance).", - ) - ANALYTICDB_PASSWORD: str | None = Field( - default=None, description="The password associated with the AnalyticDB account for database authentication." - ) - ANALYTICDB_NAMESPACE: str | None = Field( - default=None, description="The namespace within AnalyticDB for schema isolation (if using namespace feature)." - ) - ANALYTICDB_NAMESPACE_PASSWORD: str | None = Field( - default=None, - description="The password for accessing the specified namespace within the AnalyticDB instance" - " (if namespace feature is enabled).", - ) - ANALYTICDB_HOST: str | None = Field( - default=None, description="The host of the AnalyticDB instance you want to connect to." - ) - ANALYTICDB_PORT: PositiveInt = Field( - default=5432, description="The port of the AnalyticDB instance you want to connect to." - ) - ANALYTICDB_MIN_CONNECTION: PositiveInt = Field(default=1, description="Min connection of the AnalyticDB database.") - ANALYTICDB_MAX_CONNECTION: PositiveInt = Field(default=5, description="Max connection of the AnalyticDB database.") diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 459f147d5e..053d502e47 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -242,7 +242,6 @@ def _get_retrieval_methods_by_vector_type(vector_type: str | None, is_mock: bool VectorType.QDRANT, VectorType.WEAVIATE, VectorType.OPENSEARCH, - VectorType.ANALYTICDB, VectorType.MYSCALE, VectorType.ORACLE, VectorType.ELASTICSEARCH, diff --git a/api/core/rag/datasource/vdb/analyticdb/__init__.py b/api/core/rag/datasource/vdb/analyticdb/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py deleted file mode 100644 index ddb549ba9d..0000000000 --- a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py +++ /dev/null @@ -1,104 +0,0 @@ -import json -from typing import Any - -from configs import dify_config -from core.rag.datasource.vdb.analyticdb.analyticdb_vector_openapi import ( - AnalyticdbVectorOpenAPI, - AnalyticdbVectorOpenAPIConfig, -) -from core.rag.datasource.vdb.analyticdb.analyticdb_vector_sql import AnalyticdbVectorBySql, AnalyticdbVectorBySqlConfig -from core.rag.datasource.vdb.vector_base import BaseVector -from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory -from core.rag.datasource.vdb.vector_type import VectorType -from core.rag.embedding.embedding_base import Embeddings -from core.rag.models.document import Document -from models.dataset import Dataset - - -class AnalyticdbVector(BaseVector): - def __init__( - self, - collection_name: str, - api_config: AnalyticdbVectorOpenAPIConfig | None, - sql_config: AnalyticdbVectorBySqlConfig | None, - ): - super().__init__(collection_name) - if api_config is not None: - self.analyticdb_vector: AnalyticdbVectorOpenAPI | AnalyticdbVectorBySql = AnalyticdbVectorOpenAPI( - collection_name, api_config - ) - else: - if sql_config is None: - raise ValueError("Either api_config or sql_config must be provided") - self.analyticdb_vector = AnalyticdbVectorBySql(collection_name, sql_config) - - def get_type(self) -> str: - return VectorType.ANALYTICDB - - def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs): - dimension = len(embeddings[0]) - self.analyticdb_vector._create_collection_if_not_exists(dimension) - self.analyticdb_vector.add_texts(texts, embeddings) - - def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs): - self.analyticdb_vector.add_texts(documents, embeddings) - - def text_exists(self, id: str) -> bool: - return self.analyticdb_vector.text_exists(id) - - def delete_by_ids(self, ids: list[str]): - self.analyticdb_vector.delete_by_ids(ids) - - def delete_by_metadata_field(self, key: str, value: str): - self.analyticdb_vector.delete_by_metadata_field(key, value) - - def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: - return self.analyticdb_vector.search_by_vector(query_vector, **kwargs) - - def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: - return self.analyticdb_vector.search_by_full_text(query, **kwargs) - - def delete(self): - self.analyticdb_vector.delete() - - -class AnalyticdbVectorFactory(AbstractVectorFactory): - def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> AnalyticdbVector: - if dataset.index_struct_dict: - class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"] - collection_name = class_prefix.lower() - else: - dataset_id = dataset.id - collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower() - dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.ANALYTICDB, collection_name)) - - if dify_config.ANALYTICDB_HOST is None: - # implemented through OpenAPI - apiConfig = AnalyticdbVectorOpenAPIConfig( - access_key_id=dify_config.ANALYTICDB_KEY_ID or "", - access_key_secret=dify_config.ANALYTICDB_KEY_SECRET or "", - region_id=dify_config.ANALYTICDB_REGION_ID or "", - instance_id=dify_config.ANALYTICDB_INSTANCE_ID or "", - account=dify_config.ANALYTICDB_ACCOUNT or "", - account_password=dify_config.ANALYTICDB_PASSWORD or "", - namespace=dify_config.ANALYTICDB_NAMESPACE or "", - namespace_password=dify_config.ANALYTICDB_NAMESPACE_PASSWORD, - ) - sqlConfig = None - else: - # implemented through sql - sqlConfig = AnalyticdbVectorBySqlConfig( - host=dify_config.ANALYTICDB_HOST, - port=dify_config.ANALYTICDB_PORT, - account=dify_config.ANALYTICDB_ACCOUNT or "", - account_password=dify_config.ANALYTICDB_PASSWORD or "", - min_connection=dify_config.ANALYTICDB_MIN_CONNECTION, - max_connection=dify_config.ANALYTICDB_MAX_CONNECTION, - namespace=dify_config.ANALYTICDB_NAMESPACE or "", - ) - apiConfig = None - return AnalyticdbVector( - collection_name, - apiConfig, - sqlConfig, - ) diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_openapi.py b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_openapi.py deleted file mode 100644 index 702200e0ac..0000000000 --- a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_openapi.py +++ /dev/null @@ -1,321 +0,0 @@ -import json -from typing import Any - -from pydantic import BaseModel, model_validator - -_import_err_msg = ( - "`alibabacloud_gpdb20160503` and `alibabacloud_tea_openapi` packages not found, " - "please run `pip install alibabacloud_gpdb20160503 alibabacloud_tea_openapi`" -) - -from core.rag.models.document import Document -from extensions.ext_redis import redis_client - - -class AnalyticdbVectorOpenAPIConfig(BaseModel): - access_key_id: str - access_key_secret: str - region_id: str - instance_id: str - account: str - account_password: str - namespace: str = "dify" - namespace_password: str | None = None - metrics: str = "cosine" - read_timeout: int = 60000 - - @model_validator(mode="before") - @classmethod - def validate_config(cls, values: dict): - if not values["access_key_id"]: - raise ValueError("config ANALYTICDB_KEY_ID is required") - if not values["access_key_secret"]: - raise ValueError("config ANALYTICDB_KEY_SECRET is required") - if not values["region_id"]: - raise ValueError("config ANALYTICDB_REGION_ID is required") - if not values["instance_id"]: - raise ValueError("config ANALYTICDB_INSTANCE_ID is required") - if not values["account"]: - raise ValueError("config ANALYTICDB_ACCOUNT is required") - if not values["account_password"]: - raise ValueError("config ANALYTICDB_PASSWORD is required") - if not values["namespace_password"]: - raise ValueError("config ANALYTICDB_NAMESPACE_PASSWORD is required") - return values - - def to_analyticdb_client_params(self): - return { - "access_key_id": self.access_key_id, - "access_key_secret": self.access_key_secret, - "region_id": self.region_id, - "read_timeout": self.read_timeout, - } - - -class AnalyticdbVectorOpenAPI: - def __init__(self, collection_name: str, config: AnalyticdbVectorOpenAPIConfig): - try: - from alibabacloud_gpdb20160503.client import Client # type: ignore - from alibabacloud_tea_openapi import models as open_api_models # type: ignore - except: - raise ImportError(_import_err_msg) - self._collection_name = collection_name.lower() - self.config = config - self._client_config = open_api_models.Config(user_agent="dify", **config.to_analyticdb_client_params()) - self._client = Client(self._client_config) - self._initialize() - - def _initialize(self): - cache_key = f"vector_initialize_{self.config.instance_id}" - lock_name = f"{cache_key}_lock" - with redis_client.lock(lock_name, timeout=20): - database_exist_cache_key = f"vector_initialize_{self.config.instance_id}" - if redis_client.get(database_exist_cache_key): - return - self._initialize_vector_database() - self._create_namespace_if_not_exists() - redis_client.set(database_exist_cache_key, 1, ex=3600) - - def _initialize_vector_database(self): - from alibabacloud_gpdb20160503 import models as gpdb_20160503_models # type: ignore - - request = gpdb_20160503_models.InitVectorDatabaseRequest( - dbinstance_id=self.config.instance_id, - region_id=self.config.region_id, - manager_account=self.config.account, - manager_account_password=self.config.account_password, - ) - self._client.init_vector_database(request) - - def _create_namespace_if_not_exists(self): - from alibabacloud_gpdb20160503 import models as gpdb_20160503_models - from Tea.exceptions import TeaException # type: ignore - - try: - request = gpdb_20160503_models.DescribeNamespaceRequest( - dbinstance_id=self.config.instance_id, - region_id=self.config.region_id, - namespace=self.config.namespace, - manager_account=self.config.account, - manager_account_password=self.config.account_password, - ) - self._client.describe_namespace(request) - except TeaException as e: - if e.statusCode == 404: - request = gpdb_20160503_models.CreateNamespaceRequest( - dbinstance_id=self.config.instance_id, - region_id=self.config.region_id, - manager_account=self.config.account, - manager_account_password=self.config.account_password, - namespace=self.config.namespace, - namespace_password=self.config.namespace_password, - ) - self._client.create_namespace(request) - else: - raise ValueError(f"failed to create namespace {self.config.namespace}: {e}") - - def _create_collection_if_not_exists(self, embedding_dimension: int): - from alibabacloud_gpdb20160503 import models as gpdb_20160503_models - from Tea.exceptions import TeaException - - cache_key = f"vector_indexing_{self._collection_name}" - lock_name = f"{cache_key}_lock" - with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = f"vector_indexing_{self._collection_name}" - if redis_client.get(collection_exist_cache_key): - return - try: - request = gpdb_20160503_models.DescribeCollectionRequest( - dbinstance_id=self.config.instance_id, - region_id=self.config.region_id, - namespace=self.config.namespace, - namespace_password=self.config.namespace_password, - collection=self._collection_name, - ) - self._client.describe_collection(request) - except TeaException as e: - if e.statusCode == 404: - metadata = '{"ref_doc_id":"text","page_content":"text","metadata_":"jsonb"}' - full_text_retrieval_fields = "page_content" - request = gpdb_20160503_models.CreateCollectionRequest( - dbinstance_id=self.config.instance_id, - region_id=self.config.region_id, - manager_account=self.config.account, - manager_account_password=self.config.account_password, - namespace=self.config.namespace, - collection=self._collection_name, - dimension=embedding_dimension, - metrics=self.config.metrics, - metadata=metadata, - full_text_retrieval_fields=full_text_retrieval_fields, - ) - self._client.create_collection(request) - else: - raise ValueError(f"failed to create collection {self._collection_name}: {e}") - redis_client.set(collection_exist_cache_key, 1, ex=3600) - - def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs): - from alibabacloud_gpdb20160503 import models as gpdb_20160503_models - - rows: list[gpdb_20160503_models.UpsertCollectionDataRequestRows] = [] - for doc, embedding in zip(documents, embeddings, strict=True): - if doc.metadata is not None: - metadata = { - "ref_doc_id": doc.metadata["doc_id"], - "page_content": doc.page_content, - "metadata_": json.dumps(doc.metadata), - } - rows.append( - gpdb_20160503_models.UpsertCollectionDataRequestRows( - vector=embedding, - metadata=metadata, - ) - ) - request = gpdb_20160503_models.UpsertCollectionDataRequest( - dbinstance_id=self.config.instance_id, - region_id=self.config.region_id, - namespace=self.config.namespace, - namespace_password=self.config.namespace_password, - collection=self._collection_name, - rows=rows, - ) - self._client.upsert_collection_data(request) - - def text_exists(self, id: str) -> bool: - from alibabacloud_gpdb20160503 import models as gpdb_20160503_models - - request = gpdb_20160503_models.QueryCollectionDataRequest( - dbinstance_id=self.config.instance_id, - region_id=self.config.region_id, - namespace=self.config.namespace, - namespace_password=self.config.namespace_password, - collection=self._collection_name, - metrics=self.config.metrics, - include_values=True, - vector=None, - content=None, - top_k=1, - filter=f"ref_doc_id='{id}'", - ) - response = self._client.query_collection_data(request) - return len(response.body.matches.match) > 0 - - def delete_by_ids(self, ids: list[str]): - from alibabacloud_gpdb20160503 import models as gpdb_20160503_models - - ids_str = ",".join(f"'{id}'" for id in ids) - ids_str = f"({ids_str})" - request = gpdb_20160503_models.DeleteCollectionDataRequest( - dbinstance_id=self.config.instance_id, - region_id=self.config.region_id, - namespace=self.config.namespace, - namespace_password=self.config.namespace_password, - collection=self._collection_name, - collection_data=None, - collection_data_filter=f"ref_doc_id IN {ids_str}", - ) - self._client.delete_collection_data(request) - - def delete_by_metadata_field(self, key: str, value: str): - from alibabacloud_gpdb20160503 import models as gpdb_20160503_models - - request = gpdb_20160503_models.DeleteCollectionDataRequest( - dbinstance_id=self.config.instance_id, - region_id=self.config.region_id, - namespace=self.config.namespace, - namespace_password=self.config.namespace_password, - collection=self._collection_name, - collection_data=None, - collection_data_filter=f"metadata_ ->> '{key}' = '{value}'", - ) - self._client.delete_collection_data(request) - - def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: - from alibabacloud_gpdb20160503 import models as gpdb_20160503_models - - document_ids_filter = kwargs.get("document_ids_filter") - where_clause = "" - if document_ids_filter: - document_ids = ", ".join(f"'{id}'" for id in document_ids_filter) - where_clause += f"metadata_->>'document_id' IN ({document_ids})" - - score_threshold = kwargs.get("score_threshold") or 0.0 - request = gpdb_20160503_models.QueryCollectionDataRequest( - dbinstance_id=self.config.instance_id, - region_id=self.config.region_id, - namespace=self.config.namespace, - namespace_password=self.config.namespace_password, - collection=self._collection_name, - include_values=kwargs.pop("include_values", True), - metrics=self.config.metrics, - vector=query_vector, - content=None, - top_k=kwargs.get("top_k", 4), - filter=where_clause, - ) - response = self._client.query_collection_data(request) - documents = [] - for match in response.body.matches.match: - if match.score >= score_threshold: - metadata = json.loads(match.metadata.get("metadata_")) - metadata["score"] = match.score - doc = Document( - page_content=match.metadata.get("page_content"), - vector=match.values.value, - metadata=metadata, - ) - documents.append(doc) - documents = sorted(documents, key=lambda x: x.metadata["score"] if x.metadata else 0, reverse=True) - return documents - - def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: - from alibabacloud_gpdb20160503 import models as gpdb_20160503_models - - document_ids_filter = kwargs.get("document_ids_filter") - where_clause = "" - if document_ids_filter: - document_ids = ", ".join(f"'{id}'" for id in document_ids_filter) - where_clause += f"metadata_->>'document_id' IN ({document_ids})" - score_threshold = float(kwargs.get("score_threshold") or 0.0) - request = gpdb_20160503_models.QueryCollectionDataRequest( - dbinstance_id=self.config.instance_id, - region_id=self.config.region_id, - namespace=self.config.namespace, - namespace_password=self.config.namespace_password, - collection=self._collection_name, - include_values=kwargs.pop("include_values", True), - metrics=self.config.metrics, - vector=None, - content=query, - top_k=kwargs.get("top_k", 4), - filter=where_clause, - ) - response = self._client.query_collection_data(request) - documents = [] - for match in response.body.matches.match: - if match.score >= score_threshold: - metadata = json.loads(match.metadata.get("metadata_")) - metadata["score"] = match.score - doc = Document( - page_content=match.metadata.get("page_content"), - vector=match.values.value, - metadata=metadata, - ) - documents.append(doc) - documents = sorted(documents, key=lambda x: x.metadata["score"] if x.metadata else 0, reverse=True) - return documents - - def delete(self): - try: - from alibabacloud_gpdb20160503 import models as gpdb_20160503_models - - request = gpdb_20160503_models.DeleteCollectionRequest( - collection=self._collection_name, - dbinstance_id=self.config.instance_id, - namespace=self.config.namespace, - namespace_password=self.config.namespace_password, - region_id=self.config.region_id, - ) - self._client.delete_collection(request) - except Exception as e: - raise e diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_sql.py b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_sql.py deleted file mode 100644 index 12126f32d6..0000000000 --- a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector_sql.py +++ /dev/null @@ -1,275 +0,0 @@ -import json -import uuid -from contextlib import contextmanager -from typing import Any - -import psycopg2.extras -import psycopg2.pool -from pydantic import BaseModel, model_validator - -from core.rag.models.document import Document -from extensions.ext_redis import redis_client - - -class AnalyticdbVectorBySqlConfig(BaseModel): - host: str - port: int - account: str - account_password: str - min_connection: int - max_connection: int - namespace: str = "dify" - metrics: str = "cosine" - - @model_validator(mode="before") - @classmethod - def validate_config(cls, values: dict): - if not values["host"]: - raise ValueError("config ANALYTICDB_HOST is required") - if not values["port"]: - raise ValueError("config ANALYTICDB_PORT is required") - if not values["account"]: - raise ValueError("config ANALYTICDB_ACCOUNT is required") - if not values["account_password"]: - raise ValueError("config ANALYTICDB_PASSWORD is required") - if not values["min_connection"]: - raise ValueError("config ANALYTICDB_MIN_CONNECTION is required") - if not values["max_connection"]: - raise ValueError("config ANALYTICDB_MAX_CONNECTION is required") - if values["min_connection"] > values["max_connection"]: - raise ValueError("config ANALYTICDB_MIN_CONNECTION should less than ANALYTICDB_MAX_CONNECTION") - return values - - -class AnalyticdbVectorBySql: - def __init__(self, collection_name: str, config: AnalyticdbVectorBySqlConfig): - self._collection_name = collection_name.lower() - self.databaseName = "knowledgebase" - self.config = config - self.table_name = f"{self.config.namespace}.{self._collection_name}" - self.pool = None - self._initialize() - if not self.pool: - self.pool = self._create_connection_pool() - - def _initialize(self): - cache_key = f"vector_initialize_{self.config.host}" - lock_name = f"{cache_key}_lock" - with redis_client.lock(lock_name, timeout=20): - database_exist_cache_key = f"vector_initialize_{self.config.host}" - if redis_client.get(database_exist_cache_key): - return - self._initialize_vector_database() - redis_client.set(database_exist_cache_key, 1, ex=3600) - - def _create_connection_pool(self): - return psycopg2.pool.SimpleConnectionPool( - self.config.min_connection, - self.config.max_connection, - host=self.config.host, - port=self.config.port, - user=self.config.account, - password=self.config.account_password, - database=self.databaseName, - ) - - @contextmanager - def _get_cursor(self): - assert self.pool is not None, "Connection pool is not initialized" - conn = self.pool.getconn() - cur = conn.cursor() - try: - yield cur - finally: - cur.close() - conn.commit() - self.pool.putconn(conn) - - def _initialize_vector_database(self): - conn = psycopg2.connect( - host=self.config.host, - port=self.config.port, - user=self.config.account, - password=self.config.account_password, - database="postgres", - ) - conn.autocommit = True - cur = conn.cursor() - try: - cur.execute(f"CREATE DATABASE {self.databaseName}") - except Exception as e: - if "already exists" not in str(e): - raise e - finally: - cur.close() - conn.close() - self.pool = self._create_connection_pool() - with self._get_cursor() as cur: - conn = cur.connection - try: - cur.execute("CREATE EXTENSION IF NOT EXISTS zhparser;") - except Exception as e: - conn.rollback() - raise RuntimeError( - "Failed to create zhparser extension. Please ensure it is available in your AnalyticDB." - ) from e - try: - cur.execute("CREATE TEXT SEARCH CONFIGURATION zh_cn (PARSER = zhparser)") - cur.execute("ALTER TEXT SEARCH CONFIGURATION zh_cn ADD MAPPING FOR n,v,a,i,e,l,x WITH simple") - except Exception as e: - conn.rollback() - if "already exists" not in str(e): - raise e - cur.execute( - "CREATE OR REPLACE FUNCTION " - "public.to_tsquery_from_text(txt text, lang regconfig DEFAULT 'english'::regconfig) " - "RETURNS tsquery LANGUAGE sql IMMUTABLE STRICT AS $function$ " - "SELECT to_tsquery(lang, COALESCE(string_agg(split_part(word, ':', 1), ' | '), '')) " - "FROM (SELECT unnest(string_to_array(to_tsvector(lang, txt)::text, ' ')) AS word) " - "AS words_only;$function$" - ) - cur.execute(f"CREATE SCHEMA IF NOT EXISTS {self.config.namespace}") - - def _create_collection_if_not_exists(self, embedding_dimension: int): - cache_key = f"vector_indexing_{self._collection_name}" - lock_name = f"{cache_key}_lock" - with redis_client.lock(lock_name, timeout=20): - collection_exist_cache_key = f"vector_indexing_{self._collection_name}" - if redis_client.get(collection_exist_cache_key): - return - with self._get_cursor() as cur: - cur.execute( - f"CREATE TABLE IF NOT EXISTS {self.table_name}(" - f"id text PRIMARY KEY," - f"vector real[], ref_doc_id text, page_content text, metadata_ jsonb, " - f"to_tsvector TSVECTOR" - f") WITH (fillfactor=70) DISTRIBUTED BY (id);" - ) - if embedding_dimension is not None: - index_name = f"{self._collection_name}_embedding_idx" - try: - cur.execute(f"ALTER TABLE {self.table_name} ALTER COLUMN vector SET STORAGE PLAIN") - cur.execute( - f"CREATE INDEX {index_name} ON {self.table_name} USING ann(vector) " - f"WITH(dim='{embedding_dimension}', distancemeasure='{self.config.metrics}', " - f"pq_enable=0, external_storage=0)" - ) - cur.execute(f"CREATE INDEX ON {self.table_name} USING gin(to_tsvector)") - except Exception as e: - if "already exists" not in str(e): - raise e - redis_client.set(collection_exist_cache_key, 1, ex=3600) - - def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs): - values = [] - id_prefix = str(uuid.uuid4()) + "_" - sql = f""" - INSERT INTO {self.table_name} - (id, ref_doc_id, vector, page_content, metadata_, to_tsvector) - VALUES (%s, %s, %s, %s, %s, to_tsvector('zh_cn', %s)); - """ - for i, doc in enumerate(documents): - if doc.metadata is not None: - values.append( - ( - id_prefix + str(i), - doc.metadata.get("doc_id", str(uuid.uuid4())), - embeddings[i], - doc.page_content, - json.dumps(doc.metadata), - doc.page_content, - ) - ) - with self._get_cursor() as cur: - psycopg2.extras.execute_batch(cur, sql, values) - - def text_exists(self, id: str) -> bool: - with self._get_cursor() as cur: - cur.execute(f"SELECT id FROM {self.table_name} WHERE ref_doc_id = %s", (id,)) - return cur.fetchone() is not None - - def delete_by_ids(self, ids: list[str]): - if not ids: - return - with self._get_cursor() as cur: - try: - cur.execute(f"DELETE FROM {self.table_name} WHERE ref_doc_id = ANY(%s)", (ids,)) - except Exception as e: - if "does not exist" not in str(e): - raise e - - def delete_by_metadata_field(self, key: str, value: str): - with self._get_cursor() as cur: - try: - cur.execute(f"DELETE FROM {self.table_name} WHERE metadata_->>%s = %s", (key, value)) - except Exception as e: - if "does not exist" not in str(e): - raise e - - def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: - top_k = kwargs.get("top_k", 4) - if not isinstance(top_k, int) or top_k <= 0: - raise ValueError("top_k must be a positive integer") - document_ids_filter = kwargs.get("document_ids_filter") - where_clause = "WHERE 1=1" - if document_ids_filter: - document_ids = ", ".join(f"'{id}'" for id in document_ids_filter) - where_clause += f"AND metadata_->>'document_id' IN ({document_ids})" - score_threshold = float(kwargs.get("score_threshold") or 0.0) - with self._get_cursor() as cur: - query_vector_str = json.dumps(query_vector) - query_vector_str = "{" + query_vector_str[1:-1] + "}" - cur.execute( - f"SELECT t.id AS id, t.vector AS vector, (1.0 - t.score) AS score, " - f"t.page_content as page_content, t.metadata_ AS metadata_ " - f"FROM (SELECT id, vector, page_content, metadata_, vector <=> %s AS score " - f"FROM {self.table_name} {where_clause} ORDER BY score LIMIT {top_k} ) t", - (query_vector_str,), - ) - documents = [] - for record in cur: - _, vector, score, page_content, metadata = record - if score >= score_threshold: - metadata["score"] = score - doc = Document( - page_content=page_content, - vector=vector, - metadata=metadata, - ) - documents.append(doc) - return documents - - def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: - top_k = kwargs.get("top_k", 4) - if not isinstance(top_k, int) or top_k <= 0: - raise ValueError("top_k must be a positive integer") - document_ids_filter = kwargs.get("document_ids_filter") - where_clause = "" - if document_ids_filter: - document_ids = ", ".join(f"'{id}'" for id in document_ids_filter) - where_clause += f"AND metadata_->>'document_id' IN ({document_ids})" - with self._get_cursor() as cur: - cur.execute( - f"""SELECT id, vector, page_content, metadata_, - ts_rank(to_tsvector, to_tsquery_from_text(%s, 'zh_cn'), 32) AS score - FROM {self.table_name} - WHERE to_tsvector@@to_tsquery_from_text(%s, 'zh_cn') {where_clause} - ORDER BY score DESC, id DESC - LIMIT {top_k}""", - (f"'{query}'", f"'{query}'"), - ) - documents = [] - for record in cur: - _, vector, page_content, metadata, score = record - metadata["score"] = score - doc = Document( - page_content=page_content, - vector=vector, - metadata=metadata, - ) - documents.append(doc) - return documents - - def delete(self): - with self._get_cursor() as cur: - cur.execute(f"DROP TABLE IF EXISTS {self.table_name}") diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index c48437e4a7..9839dbf98d 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -135,10 +135,6 @@ class Vector: from core.rag.datasource.vdb.opensearch.opensearch_vector import OpenSearchVectorFactory return OpenSearchVectorFactory - case VectorType.ANALYTICDB: - from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVectorFactory - - return AnalyticdbVectorFactory case VectorType.COUCHBASE: from core.rag.datasource.vdb.couchbase.couchbase_vector import CouchbaseVectorFactory diff --git a/api/core/rag/datasource/vdb/vector_type.py b/api/core/rag/datasource/vdb/vector_type.py index 8872d4ca5b..2dfec13b71 100644 --- a/api/core/rag/datasource/vdb/vector_type.py +++ b/api/core/rag/datasource/vdb/vector_type.py @@ -3,7 +3,6 @@ from enum import StrEnum class VectorType(StrEnum): ALIBABACLOUD_MYSQL = "alibabacloud_mysql" - ANALYTICDB = "analyticdb" CHROMA = "chroma" MILVUS = "milvus" MYSCALE = "myscale" diff --git a/api/pyproject.toml b/api/pyproject.toml index c2db52d5d6..9d940778e2 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -202,8 +202,6 @@ tools = ["cloudscraper~=1.2.71", "nltk~=3.9.1"] # Required by vector store clients ############################################################ vdb = [ - "alibabacloud_gpdb20160503~=3.8.0", - "alibabacloud_tea_openapi~=0.4.3", "chromadb==0.5.20", "clickhouse-connect~=0.14.1", "couchbase~=4.5.0", diff --git a/api/pyrefly-local-excludes.txt b/api/pyrefly-local-excludes.txt index 25ba34e28f..bf1a7a5716 100644 --- a/api/pyrefly-local-excludes.txt +++ b/api/pyrefly-local-excludes.txt @@ -45,8 +45,6 @@ core/plugin/backwards_invocation/model.py core/prompt/utils/extract_thread_messages.py core/rag/datasource/keyword/jieba/jieba.py core/rag/datasource/keyword/jieba/jieba_keyword_table_handler.py -core/rag/datasource/vdb/analyticdb/analyticdb_vector.py -core/rag/datasource/vdb/analyticdb/analyticdb_vector_openapi.py core/rag/datasource/vdb/baidu/baidu_vector.py core/rag/datasource/vdb/chroma/chroma_vector.py core/rag/datasource/vdb/couchbase/couchbase_vector.py diff --git a/api/tests/integration_tests/vdb/analyticdb/test_analyticdb.py b/api/tests/integration_tests/vdb/analyticdb/test_analyticdb.py deleted file mode 100644 index 5dd4754e8e..0000000000 --- a/api/tests/integration_tests/vdb/analyticdb/test_analyticdb.py +++ /dev/null @@ -1,49 +0,0 @@ -from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVector -from core.rag.datasource.vdb.analyticdb.analyticdb_vector_openapi import AnalyticdbVectorOpenAPIConfig -from core.rag.datasource.vdb.analyticdb.analyticdb_vector_sql import AnalyticdbVectorBySqlConfig -from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, setup_mock_redis - - -class AnalyticdbVectorTest(AbstractVectorTest): - def __init__(self, config_type: str): - super().__init__() - # Analyticdb requires collection_name length less than 60. - # it's ok for normal usage. - self.collection_name = self.collection_name.replace("_test", "") - if config_type == "sql": - self.vector = AnalyticdbVector( - collection_name=self.collection_name, - sql_config=AnalyticdbVectorBySqlConfig( - host="test_host", - port=5432, - account="test_account", - account_password="test_passwd", - namespace="difytest_namespace", - ), - api_config=None, - ) - else: - self.vector = AnalyticdbVector( - collection_name=self.collection_name, - sql_config=None, - api_config=AnalyticdbVectorOpenAPIConfig( - access_key_id="test_key_id", - access_key_secret="test_key_secret", - region_id="test_region", - instance_id="test_id", - account="test_account", - account_password="test_passwd", - namespace="difytest_namespace", - collection="difytest_collection", - namespace_password="test_passwd", - ), - ) - - def run_all_tests(self): - self.vector.delete() - return super().run_all_tests() - - -def test_chroma_vector(setup_mock_redis): - AnalyticdbVectorTest("api").run_all_tests() - AnalyticdbVectorTest("sql").run_all_tests() diff --git a/api/uv.lock b/api/uv.lock index 1fd3dc17f6..2e4115717a 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -148,164 +148,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ba/88/6237e97e3385b57b5f1528647addea5cc03d4d65d5979ab24327d41fb00d/alembic-1.17.2-py3-none-any.whl", hash = "sha256:f483dd1fe93f6c5d49217055e4d15b905b425b6af906746abb35b69c1996c4e6", size = 248554 }, ] -[[package]] -name = "alibabacloud-credentials" -version = "1.0.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiofiles" }, - { name = "alibabacloud-credentials-api" }, - { name = "alibabacloud-tea" }, - { name = "apscheduler" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/df/82/45ec98bd19387507cf058ce47f62d6fea288bf0511c5a101b832e13d3edd/alibabacloud-credentials-1.0.3.tar.gz", hash = "sha256:9d8707e96afc6f348e23f5677ed15a21c2dfce7cfe6669776548ee4c80e1dfaf", size = 35831 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/df/dbd9ae9d531a40d5613573c5a22ef774ecfdcaa0dc43aad42189f89c04ce/alibabacloud_credentials-1.0.3-py3-none-any.whl", hash = "sha256:30c8302f204b663c655d97e1c283ee9f9f84a6257d7901b931477d6cf34445a8", size = 41875 }, -] - -[[package]] -name = "alibabacloud-credentials-api" -version = "1.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a0/87/1d7019d23891897cb076b2f7e3c81ab3c2ba91de3bb067196f675d60d34c/alibabacloud-credentials-api-1.0.0.tar.gz", hash = "sha256:8c340038d904f0218d7214a8f4088c31912bfcf279af2cbc7d9be4897a97dd2f", size = 2330 } - -[[package]] -name = "alibabacloud-endpoint-util" -version = "0.0.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/92/7d/8cc92a95c920e344835b005af6ea45a0db98763ad6ad19299d26892e6c8d/alibabacloud_endpoint_util-0.0.4.tar.gz", hash = "sha256:a593eb8ddd8168d5dc2216cd33111b144f9189fcd6e9ca20e48f358a739bbf90", size = 2813 } - -[[package]] -name = "alibabacloud-gateway-spi" -version = "0.0.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alibabacloud-credentials" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ab/98/d7111245f17935bf72ee9bea60bbbeff2bc42cdfe24d2544db52bc517e1a/alibabacloud_gateway_spi-0.0.3.tar.gz", hash = "sha256:10d1c53a3fc5f87915fbd6b4985b98338a776e9b44a0263f56643c5048223b8b", size = 4249 } - -[[package]] -name = "alibabacloud-gpdb20160503" -version = "3.8.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alibabacloud-endpoint-util" }, - { name = "alibabacloud-openapi-util" }, - { name = "alibabacloud-openplatform20191219" }, - { name = "alibabacloud-oss-sdk" }, - { name = "alibabacloud-oss-util" }, - { name = "alibabacloud-tea-fileform" }, - { name = "alibabacloud-tea-openapi" }, - { name = "alibabacloud-tea-util" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/15/6a/cc72e744e95c8f37fa6a84e66ae0b9b57a13ee97a0ef03d94c7127c31d75/alibabacloud_gpdb20160503-3.8.3.tar.gz", hash = "sha256:4dfcc0d9cff5a921d529d76f4bf97e2ceb9dc2fa53f00ab055f08509423d8e30", size = 155092 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/36/bce41704b3bf59d607590ec73a42a254c5dea27c0f707aee11d20512a200/alibabacloud_gpdb20160503-3.8.3-py3-none-any.whl", hash = "sha256:06e1c46ce5e4e9d1bcae76e76e51034196c625799d06b2efec8d46a7df323fe8", size = 156097 }, -] - -[[package]] -name = "alibabacloud-openapi-util" -version = "0.2.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alibabacloud-tea-util" }, - { name = "cryptography" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f6/50/5f41ab550d7874c623f6e992758429802c4b52a6804db437017e5387de33/alibabacloud_openapi_util-0.2.2.tar.gz", hash = "sha256:ebbc3906f554cb4bf8f513e43e8a33e8b6a3d4a0ef13617a0e14c3dda8ef52a8", size = 7201 } - -[[package]] -name = "alibabacloud-openplatform20191219" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alibabacloud-endpoint-util" }, - { name = "alibabacloud-openapi-util" }, - { name = "alibabacloud-tea-openapi" }, - { name = "alibabacloud-tea-util" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/4f/bf/f7fa2f3657ed352870f442434cb2f27b7f70dcd52a544a1f3998eeaf6d71/alibabacloud_openplatform20191219-2.0.0.tar.gz", hash = "sha256:e67f4c337b7542538746592c6a474bd4ae3a9edccdf62e11a32ca61fad3c9020", size = 5038 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/94/e5/18c75213551eeca9db1f6b41ddcc0bd87b5b6508c75a67f05cd8671847b4/alibabacloud_openplatform20191219-2.0.0-py3-none-any.whl", hash = "sha256:873821c45bca72a6c6ec7a906c9cb21554c122e88893bbac3986934dab30dd36", size = 5204 }, -] - -[[package]] -name = "alibabacloud-oss-sdk" -version = "0.1.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alibabacloud-credentials" }, - { name = "alibabacloud-oss-util" }, - { name = "alibabacloud-tea-fileform" }, - { name = "alibabacloud-tea-util" }, - { name = "alibabacloud-tea-xml" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/7e/d1/f442dd026908fcf55340ca694bb1d027aa91e119e76ae2fbea62f2bde4f4/alibabacloud_oss_sdk-0.1.1.tar.gz", hash = "sha256:f51a368020d0964fcc0978f96736006f49f5ab6a4a4bf4f0b8549e2c659e7358", size = 46434 } - -[[package]] -name = "alibabacloud-oss-util" -version = "0.0.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alibabacloud-tea" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/02/7c/d7e812b9968247a302573daebcfef95d0f9a718f7b4bfcca8d3d83e266be/alibabacloud_oss_util-0.0.6.tar.gz", hash = "sha256:d3ecec36632434bd509a113e8cf327dc23e830ac8d9dd6949926f4e334c8b5d6", size = 10008 } - -[[package]] -name = "alibabacloud-tea" -version = "0.4.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9a/7d/b22cb9a0d4f396ee0f3f9d7f26b76b9ed93d4101add7867a2c87ed2534f5/alibabacloud-tea-0.4.3.tar.gz", hash = "sha256:ec8053d0aa8d43ebe1deb632d5c5404339b39ec9a18a0707d57765838418504a", size = 8785 } - -[[package]] -name = "alibabacloud-tea-fileform" -version = "0.0.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alibabacloud-tea" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/22/8a/ef8ddf5ee0350984cad2749414b420369fe943e15e6d96b79be45367630e/alibabacloud_tea_fileform-0.0.5.tar.gz", hash = "sha256:fd00a8c9d85e785a7655059e9651f9e91784678881831f60589172387b968ee8", size = 3961 } - -[[package]] -name = "alibabacloud-tea-openapi" -version = "0.4.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alibabacloud-credentials" }, - { name = "alibabacloud-gateway-spi" }, - { name = "alibabacloud-tea-util" }, - { name = "cryptography" }, - { name = "darabonba-core" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/91/4f/b5288eea8f4d4b032c9a8f2cd1d926d5017977d10b874956f31e5343f299/alibabacloud_tea_openapi-0.4.3.tar.gz", hash = "sha256:12aef036ed993637b6f141abbd1de9d6199d5516f4a901588bb65d6a3768d41b", size = 21864 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a5/37/48ee5468ecad19c6d44cf3b9629d77078e836ee3ec760f0366247f307b7c/alibabacloud_tea_openapi-0.4.3-py3-none-any.whl", hash = "sha256:d0b3a373b760ef6278b25fc128c73284301e07888977bf97519e7636d47bdf0a", size = 26159 }, -] - -[[package]] -name = "alibabacloud-tea-util" -version = "0.3.14" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alibabacloud-tea" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e9/ee/ea90be94ad781a5055db29556744681fc71190ef444ae53adba45e1be5f3/alibabacloud_tea_util-0.3.14.tar.gz", hash = "sha256:708e7c9f64641a3c9e0e566365d2f23675f8d7c2a3e2971d9402ceede0408cdb", size = 7515 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/72/9e/c394b4e2104766fb28a1e44e3ed36e4c7773b4d05c868e482be99d5635c9/alibabacloud_tea_util-0.3.14-py3-none-any.whl", hash = "sha256:10d3e5c340d8f7ec69dd27345eb2fc5a1dab07875742525edf07bbe86db93bfe", size = 6697 }, -] - -[[package]] -name = "alibabacloud-tea-xml" -version = "0.0.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alibabacloud-tea" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/32/eb/5e82e419c3061823f3feae9b5681588762929dc4da0176667297c2784c1a/alibabacloud_tea_xml-0.0.3.tar.gz", hash = "sha256:979cb51fadf43de77f41c69fc69c12529728919f849723eb0cd24eb7b048a90c", size = 3466 } - [[package]] name = "aliyun-log-python-sdk" version = "0.9.37" @@ -1428,19 +1270,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/fb/1b681635bfd5f2274d0caa8f934b58435db6c091b97f5593738065ddb786/cymem-2.0.13-cp312-cp312-win_arm64.whl", hash = "sha256:6bbd701338df7bf408648191dff52472a9b334f71bcd31a21a41d83821050f67", size = 35959 }, ] -[[package]] -name = "darabonba-core" -version = "1.0.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp" }, - { name = "alibabacloud-tea" }, - { name = "requests" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/66/d3/a7daaee544c904548e665829b51a9fa2572acb82c73ad787a8ff90273002/darabonba_core-1.0.5-py3-none-any.whl", hash = "sha256:671ab8dbc4edc2a8f88013da71646839bb8914f1259efc069353243ef52ea27c", size = 24580 }, -] - [[package]] name = "databricks-sdk" version = "0.73.0" @@ -1689,8 +1518,6 @@ tools = [ { name = "nltk" }, ] vdb = [ - { name = "alibabacloud-gpdb20160503" }, - { name = "alibabacloud-tea-openapi" }, { name = "chromadb" }, { name = "clickhouse-connect" }, { name = "couchbase" }, @@ -1890,8 +1717,6 @@ tools = [ { name = "nltk", specifier = "~=3.9.1" }, ] vdb = [ - { name = "alibabacloud-gpdb20160503", specifier = "~=3.8.0" }, - { name = "alibabacloud-tea-openapi", specifier = "~=0.4.3" }, { name = "chromadb", specifier = "==0.5.20" }, { name = "clickhouse-connect", specifier = "~=0.14.1" }, { name = "couchbase", specifier = "~=4.5.0" }, diff --git a/docker/.env.example b/docker/.env.example index 9d6cd65318..45a0fcc53e 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -541,7 +541,7 @@ SUPABASE_URL=your-server-url # ------------------------------ # The type of vector store to use. -# Supported values are `weaviate`, `oceanbase`, `seekdb`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`, `vastbase`, `tidb`, `tidb_on_qdrant`, `baidu`, `lindorm`, `huawei_cloud`, `upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`, `iris`, `hologres`. +# Supported values are `weaviate`, `oceanbase`, `seekdb`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`, `vastbase`, `tidb`, `tidb_on_qdrant`, `baidu`, `lindorm`, `huawei_cloud`, `upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`, `iris`, `hologres`. VECTOR_STORE=weaviate # Prefix used to create collection name in vector database VECTOR_INDEX_NAME_PREFIX=Vector_index @@ -646,20 +646,6 @@ PGVECTO_RS_USER=postgres PGVECTO_RS_PASSWORD=difyai123456 PGVECTO_RS_DATABASE=dify -# analyticdb configurations, only available when VECTOR_STORE is `analyticdb` -ANALYTICDB_KEY_ID=your-ak -ANALYTICDB_KEY_SECRET=your-sk -ANALYTICDB_REGION_ID=cn-hangzhou -ANALYTICDB_INSTANCE_ID=gp-ab123456 -ANALYTICDB_ACCOUNT=testaccount -ANALYTICDB_PASSWORD=testpassword -ANALYTICDB_NAMESPACE=dify -ANALYTICDB_NAMESPACE_PASSWORD=difypassword -ANALYTICDB_HOST=gp-test.aliyuncs.com -ANALYTICDB_PORT=5432 -ANALYTICDB_MIN_CONNECTION=1 -ANALYTICDB_MAX_CONNECTION=5 - # TiDB vector configurations, only available when VECTOR_STORE is `tidb_vector` TIDB_VECTOR_HOST=tidb TIDB_VECTOR_PORT=4000 diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index bf72a0f623..5eeaf5d9ed 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -247,18 +247,6 @@ x-shared-env: &shared-api-worker-env PGVECTO_RS_USER: ${PGVECTO_RS_USER:-postgres} PGVECTO_RS_PASSWORD: ${PGVECTO_RS_PASSWORD:-difyai123456} PGVECTO_RS_DATABASE: ${PGVECTO_RS_DATABASE:-dify} - ANALYTICDB_KEY_ID: ${ANALYTICDB_KEY_ID:-your-ak} - ANALYTICDB_KEY_SECRET: ${ANALYTICDB_KEY_SECRET:-your-sk} - ANALYTICDB_REGION_ID: ${ANALYTICDB_REGION_ID:-cn-hangzhou} - ANALYTICDB_INSTANCE_ID: ${ANALYTICDB_INSTANCE_ID:-gp-ab123456} - ANALYTICDB_ACCOUNT: ${ANALYTICDB_ACCOUNT:-testaccount} - ANALYTICDB_PASSWORD: ${ANALYTICDB_PASSWORD:-testpassword} - ANALYTICDB_NAMESPACE: ${ANALYTICDB_NAMESPACE:-dify} - ANALYTICDB_NAMESPACE_PASSWORD: ${ANALYTICDB_NAMESPACE_PASSWORD:-difypassword} - ANALYTICDB_HOST: ${ANALYTICDB_HOST:-gp-test.aliyuncs.com} - ANALYTICDB_PORT: ${ANALYTICDB_PORT:-5432} - ANALYTICDB_MIN_CONNECTION: ${ANALYTICDB_MIN_CONNECTION:-1} - ANALYTICDB_MAX_CONNECTION: ${ANALYTICDB_MAX_CONNECTION:-5} TIDB_VECTOR_HOST: ${TIDB_VECTOR_HOST:-tidb} TIDB_VECTOR_PORT: ${TIDB_VECTOR_PORT:-4000} TIDB_VECTOR_USER: ${TIDB_VECTOR_USER:-}