mirror of https://github.com/langgenius/dify.git
Merge branch 'main' into feat/rag-2
This commit is contained in:
commit
1b3860d012
File diff suppressed because it is too large
Load Diff
|
|
@ -4,6 +4,23 @@ title: "[Chore/Refactor] "
|
|||
labels:
|
||||
- refactor
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Self Checks
|
||||
description: "To make sure we get to you in time, please check the following :)"
|
||||
options:
|
||||
- label: I have read the [Contributing Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) and [Language Policy](https://github.com/langgenius/dify/issues/1542).
|
||||
required: true
|
||||
- label: This is only for refactoring, if you would like to ask a question, please head to [Discussions](https://github.com/langgenius/dify/discussions/categories/general).
|
||||
required: true
|
||||
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
|
||||
required: true
|
||||
- label: I confirm that I am using English to submit this report, otherwise it will be closed.
|
||||
required: true
|
||||
- label: 【中文用户 & Non English User】请使用英语提交,否则会被关闭 :)
|
||||
required: true
|
||||
- label: "Please do not modify this template :) and fill in all the required fields."
|
||||
required: true
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
|
|
|
|||
|
|
@ -5,6 +5,10 @@ on:
|
|||
types: [closed]
|
||||
branches: [main]
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
check-and-update:
|
||||
if: github.event.pull_request.merged == true
|
||||
|
|
@ -16,7 +20,7 @@ jobs:
|
|||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2 # last 2 commits
|
||||
persist-credentials: false
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Check for file changes in i18n/en-US
|
||||
id: check_files
|
||||
|
|
@ -49,7 +53,7 @@ jobs:
|
|||
if: env.FILES_CHANGED == 'true'
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Run npm script
|
||||
- name: Generate i18n translations
|
||||
if: env.FILES_CHANGED == 'true'
|
||||
run: pnpm run auto-gen-i18n
|
||||
|
||||
|
|
@ -57,6 +61,7 @@ jobs:
|
|||
if: env.FILES_CHANGED == 'true'
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
commit-message: Update i18n files based on en-US changes
|
||||
title: 'chore: translate i18n files'
|
||||
body: This PR was automatically created to update i18n files based on changes in en-US locale.
|
||||
|
|
|
|||
|
|
@ -215,3 +215,10 @@ mise.toml
|
|||
# AI Assistant
|
||||
.roo/
|
||||
api/.env.backup
|
||||
|
||||
# Clickzetta test credentials
|
||||
.env.clickzetta
|
||||
.env.clickzetta.test
|
||||
|
||||
# Clickzetta plugin development folder (keep local, ignore for PR)
|
||||
clickzetta/
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
|
|||
from .storage.amazon_s3_storage_config import S3StorageConfig
|
||||
from .storage.azure_blob_storage_config import AzureBlobStorageConfig
|
||||
from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig
|
||||
from .storage.clickzetta_volume_storage_config import ClickZettaVolumeStorageConfig
|
||||
from .storage.google_cloud_storage_config import GoogleCloudStorageConfig
|
||||
from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
|
||||
from .storage.oci_storage_config import OCIStorageConfig
|
||||
|
|
@ -20,6 +21,7 @@ from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
|
|||
from .vdb.analyticdb_config import AnalyticdbConfig
|
||||
from .vdb.baidu_vector_config import BaiduVectorDBConfig
|
||||
from .vdb.chroma_config import ChromaConfig
|
||||
from .vdb.clickzetta_config import ClickzettaConfig
|
||||
from .vdb.couchbase_config import CouchbaseConfig
|
||||
from .vdb.elasticsearch_config import ElasticsearchConfig
|
||||
from .vdb.huawei_cloud_config import HuaweiCloudConfig
|
||||
|
|
@ -52,6 +54,7 @@ class StorageConfig(BaseSettings):
|
|||
"aliyun-oss",
|
||||
"azure-blob",
|
||||
"baidu-obs",
|
||||
"clickzetta-volume",
|
||||
"google-storage",
|
||||
"huawei-obs",
|
||||
"oci-storage",
|
||||
|
|
@ -61,8 +64,9 @@ class StorageConfig(BaseSettings):
|
|||
"local",
|
||||
] = Field(
|
||||
description="Type of storage to use."
|
||||
" Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', "
|
||||
"'huawei-obs', 'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'opendal'.",
|
||||
" Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', "
|
||||
"'clickzetta-volume', 'google-storage', 'huawei-obs', 'oci-storage', 'tencent-cos', "
|
||||
"'volcengine-tos', 'supabase'. Default is 'opendal'.",
|
||||
default="opendal",
|
||||
)
|
||||
|
||||
|
|
@ -303,6 +307,7 @@ class MiddlewareConfig(
|
|||
AliyunOSSStorageConfig,
|
||||
AzureBlobStorageConfig,
|
||||
BaiduOBSStorageConfig,
|
||||
ClickZettaVolumeStorageConfig,
|
||||
GoogleCloudStorageConfig,
|
||||
HuaweiCloudOBSStorageConfig,
|
||||
OCIStorageConfig,
|
||||
|
|
@ -315,6 +320,7 @@ class MiddlewareConfig(
|
|||
VectorStoreConfig,
|
||||
AnalyticdbConfig,
|
||||
ChromaConfig,
|
||||
ClickzettaConfig,
|
||||
HuaweiCloudConfig,
|
||||
MilvusConfig,
|
||||
MyScaleConfig,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,65 @@
|
|||
"""ClickZetta Volume Storage Configuration"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class ClickZettaVolumeStorageConfig(BaseSettings):
|
||||
"""Configuration for ClickZetta Volume storage."""
|
||||
|
||||
CLICKZETTA_VOLUME_USERNAME: Optional[str] = Field(
|
||||
description="Username for ClickZetta Volume authentication",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_PASSWORD: Optional[str] = Field(
|
||||
description="Password for ClickZetta Volume authentication",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_INSTANCE: Optional[str] = Field(
|
||||
description="ClickZetta instance identifier",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_SERVICE: str = Field(
|
||||
description="ClickZetta service endpoint",
|
||||
default="api.clickzetta.com",
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_WORKSPACE: str = Field(
|
||||
description="ClickZetta workspace name",
|
||||
default="quick_start",
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_VCLUSTER: str = Field(
|
||||
description="ClickZetta virtual cluster name",
|
||||
default="default_ap",
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_SCHEMA: str = Field(
|
||||
description="ClickZetta schema name",
|
||||
default="dify",
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_TYPE: str = Field(
|
||||
description="ClickZetta volume type (table|user|external)",
|
||||
default="user",
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_NAME: Optional[str] = Field(
|
||||
description="ClickZetta volume name for external volumes",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_TABLE_PREFIX: str = Field(
|
||||
description="Prefix for ClickZetta volume table names",
|
||||
default="dataset_",
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_DIFY_PREFIX: str = Field(
|
||||
description="Directory prefix for User Volume to organize Dify files",
|
||||
default="dify_km",
|
||||
)
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ClickzettaConfig(BaseModel):
|
||||
"""
|
||||
Clickzetta Lakehouse vector database configuration
|
||||
"""
|
||||
|
||||
CLICKZETTA_USERNAME: Optional[str] = Field(
|
||||
description="Username for authenticating with Clickzetta Lakehouse",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_PASSWORD: Optional[str] = Field(
|
||||
description="Password for authenticating with Clickzetta Lakehouse",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_INSTANCE: Optional[str] = Field(
|
||||
description="Clickzetta Lakehouse instance ID",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_SERVICE: Optional[str] = Field(
|
||||
description="Clickzetta API service endpoint (e.g., 'api.clickzetta.com')",
|
||||
default="api.clickzetta.com",
|
||||
)
|
||||
|
||||
CLICKZETTA_WORKSPACE: Optional[str] = Field(
|
||||
description="Clickzetta workspace name",
|
||||
default="default",
|
||||
)
|
||||
|
||||
CLICKZETTA_VCLUSTER: Optional[str] = Field(
|
||||
description="Clickzetta virtual cluster name",
|
||||
default="default_ap",
|
||||
)
|
||||
|
||||
CLICKZETTA_SCHEMA: Optional[str] = Field(
|
||||
description="Database schema name in Clickzetta",
|
||||
default="public",
|
||||
)
|
||||
|
||||
CLICKZETTA_BATCH_SIZE: Optional[int] = Field(
|
||||
description="Batch size for bulk insert operations",
|
||||
default=100,
|
||||
)
|
||||
|
||||
CLICKZETTA_ENABLE_INVERTED_INDEX: Optional[bool] = Field(
|
||||
description="Enable inverted index for full-text search capabilities",
|
||||
default=True,
|
||||
)
|
||||
|
||||
CLICKZETTA_ANALYZER_TYPE: Optional[str] = Field(
|
||||
description="Analyzer type for full-text search: keyword, english, chinese, unicode",
|
||||
default="chinese",
|
||||
)
|
||||
|
||||
CLICKZETTA_ANALYZER_MODE: Optional[str] = Field(
|
||||
description="Analyzer mode for tokenization: max_word (fine-grained) or smart (intelligent)",
|
||||
default="smart",
|
||||
)
|
||||
|
||||
CLICKZETTA_VECTOR_DISTANCE_FUNCTION: Optional[str] = Field(
|
||||
description="Distance function for vector similarity: l2_distance or cosine_distance",
|
||||
default="cosine_distance",
|
||||
)
|
||||
|
|
@ -694,6 +694,7 @@ class DatasetRetrievalSettingApi(Resource):
|
|||
| VectorType.HUAWEI_CLOUD
|
||||
| VectorType.TENCENT
|
||||
| VectorType.MATRIXONE
|
||||
| VectorType.CLICKZETTA
|
||||
):
|
||||
return {
|
||||
"retrieval_method": [
|
||||
|
|
@ -742,6 +743,7 @@ class DatasetRetrievalSettingMockApi(Resource):
|
|||
| VectorType.TENCENT
|
||||
| VectorType.HUAWEI_CLOUD
|
||||
| VectorType.MATRIXONE
|
||||
| VectorType.CLICKZETTA
|
||||
):
|
||||
return {
|
||||
"retrieval_method": [
|
||||
|
|
|
|||
|
|
@ -49,7 +49,6 @@ class FileApi(Resource):
|
|||
@marshal_with(file_fields)
|
||||
@cloud_edition_billing_resource_check("documents")
|
||||
def post(self):
|
||||
file = request.files["file"]
|
||||
source_str = request.form.get("source")
|
||||
source: Literal["datasets"] | None = "datasets" if source_str == "datasets" else None
|
||||
|
||||
|
|
@ -58,6 +57,7 @@ class FileApi(Resource):
|
|||
|
||||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
file = request.files["file"]
|
||||
|
||||
if not file.filename:
|
||||
raise FilenameNotExistsError
|
||||
|
|
|
|||
|
|
@ -191,9 +191,6 @@ class WebappLogoWorkspaceApi(Resource):
|
|||
@account_initialization_required
|
||||
@cloud_edition_billing_resource_check("workspace_custom")
|
||||
def post(self):
|
||||
# get file from request
|
||||
file = request.files["file"]
|
||||
|
||||
# check file
|
||||
if "file" not in request.files:
|
||||
raise NoFileUploadedError()
|
||||
|
|
@ -201,6 +198,8 @@ class WebappLogoWorkspaceApi(Resource):
|
|||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
|
||||
# get file from request
|
||||
file = request.files["file"]
|
||||
if not file.filename:
|
||||
raise FilenameNotExistsError
|
||||
|
||||
|
|
|
|||
|
|
@ -20,18 +20,17 @@ class FileApi(Resource):
|
|||
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.FORM))
|
||||
@marshal_with(file_fields)
|
||||
def post(self, app_model: App, end_user: EndUser):
|
||||
file = request.files["file"]
|
||||
|
||||
# check file
|
||||
if "file" not in request.files:
|
||||
raise NoFileUploadedError()
|
||||
|
||||
if not file.mimetype:
|
||||
raise UnsupportedFileTypeError()
|
||||
|
||||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
|
||||
file = request.files["file"]
|
||||
if not file.mimetype:
|
||||
raise UnsupportedFileTypeError()
|
||||
|
||||
if not file.filename:
|
||||
raise FilenameNotExistsError
|
||||
|
||||
|
|
|
|||
|
|
@ -234,8 +234,6 @@ class DocumentAddByFileApi(DatasetApiResource):
|
|||
args["retrieval_model"].get("reranking_model").get("reranking_model_name"),
|
||||
)
|
||||
|
||||
# save file info
|
||||
file = request.files["file"]
|
||||
# check file
|
||||
if "file" not in request.files:
|
||||
raise NoFileUploadedError()
|
||||
|
|
@ -243,6 +241,8 @@ class DocumentAddByFileApi(DatasetApiResource):
|
|||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
|
||||
# save file info
|
||||
file = request.files["file"]
|
||||
if not file.filename:
|
||||
raise FilenameNotExistsError
|
||||
|
||||
|
|
|
|||
|
|
@ -12,18 +12,17 @@ from services.file_service import FileService
|
|||
class FileApi(WebApiResource):
|
||||
@marshal_with(file_fields)
|
||||
def post(self, app_model, end_user):
|
||||
file = request.files["file"]
|
||||
source = request.form.get("source")
|
||||
|
||||
if "file" not in request.files:
|
||||
raise NoFileUploadedError()
|
||||
|
||||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
|
||||
file = request.files["file"]
|
||||
if not file.filename:
|
||||
raise FilenameNotExistsError
|
||||
|
||||
source = request.form.get("source")
|
||||
if source not in ("datasets", None):
|
||||
source = None
|
||||
|
||||
|
|
|
|||
|
|
@ -121,9 +121,8 @@ class TokenBufferMemory:
|
|||
curr_message_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)
|
||||
|
||||
if curr_message_tokens > max_token_limit:
|
||||
pruned_memory = []
|
||||
while curr_message_tokens > max_token_limit and len(prompt_messages) > 1:
|
||||
pruned_memory.append(prompt_messages.pop(0))
|
||||
prompt_messages.pop(0)
|
||||
curr_message_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)
|
||||
|
||||
return prompt_messages
|
||||
|
|
|
|||
|
|
@ -0,0 +1,190 @@
|
|||
# Clickzetta Vector Database Integration
|
||||
|
||||
This module provides integration with Clickzetta Lakehouse as a vector database for Dify.
|
||||
|
||||
## Features
|
||||
|
||||
- **Vector Storage**: Store and retrieve high-dimensional vectors using Clickzetta's native VECTOR type
|
||||
- **Vector Search**: Efficient similarity search using HNSW algorithm
|
||||
- **Full-Text Search**: Leverage Clickzetta's inverted index for powerful text search capabilities
|
||||
- **Hybrid Search**: Combine vector similarity and full-text search for better results
|
||||
- **Multi-language Support**: Built-in support for Chinese, English, and Unicode text processing
|
||||
- **Scalable**: Leverage Clickzetta's distributed architecture for large-scale deployments
|
||||
|
||||
## Configuration
|
||||
|
||||
### Required Environment Variables
|
||||
|
||||
All seven configuration parameters are required:
|
||||
|
||||
```bash
|
||||
# Authentication
|
||||
CLICKZETTA_USERNAME=your_username
|
||||
CLICKZETTA_PASSWORD=your_password
|
||||
|
||||
# Instance configuration
|
||||
CLICKZETTA_INSTANCE=your_instance_id
|
||||
CLICKZETTA_SERVICE=api.clickzetta.com
|
||||
CLICKZETTA_WORKSPACE=your_workspace
|
||||
CLICKZETTA_VCLUSTER=your_vcluster
|
||||
CLICKZETTA_SCHEMA=your_schema
|
||||
```
|
||||
|
||||
### Optional Configuration
|
||||
|
||||
```bash
|
||||
# Batch processing
|
||||
CLICKZETTA_BATCH_SIZE=100
|
||||
|
||||
# Full-text search configuration
|
||||
CLICKZETTA_ENABLE_INVERTED_INDEX=true
|
||||
CLICKZETTA_ANALYZER_TYPE=chinese # Options: keyword, english, chinese, unicode
|
||||
CLICKZETTA_ANALYZER_MODE=smart # Options: max_word, smart
|
||||
|
||||
# Vector search configuration
|
||||
CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance # Options: l2_distance, cosine_distance
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### 1. Set Clickzetta as the Vector Store
|
||||
|
||||
In your Dify configuration, set:
|
||||
|
||||
```bash
|
||||
VECTOR_STORE=clickzetta
|
||||
```
|
||||
|
||||
### 2. Table Structure
|
||||
|
||||
Clickzetta will automatically create tables with the following structure:
|
||||
|
||||
```sql
|
||||
CREATE TABLE <collection_name> (
|
||||
id STRING NOT NULL,
|
||||
content STRING NOT NULL,
|
||||
metadata JSON,
|
||||
vector VECTOR(FLOAT, <dimension>) NOT NULL,
|
||||
PRIMARY KEY (id)
|
||||
);
|
||||
|
||||
-- Vector index for similarity search
|
||||
CREATE VECTOR INDEX idx_<collection_name>_vec
|
||||
ON TABLE <schema>.<collection_name>(vector)
|
||||
PROPERTIES (
|
||||
"distance.function" = "cosine_distance",
|
||||
"scalar.type" = "f32"
|
||||
);
|
||||
|
||||
-- Inverted index for full-text search (if enabled)
|
||||
CREATE INVERTED INDEX idx_<collection_name>_text
|
||||
ON <schema>.<collection_name>(content)
|
||||
PROPERTIES (
|
||||
"analyzer" = "chinese",
|
||||
"mode" = "smart"
|
||||
);
|
||||
```
|
||||
|
||||
## Full-Text Search Capabilities
|
||||
|
||||
Clickzetta supports advanced full-text search with multiple analyzers:
|
||||
|
||||
### Analyzer Types
|
||||
|
||||
1. **keyword**: No tokenization, treats the entire string as a single token
|
||||
- Best for: Exact matching, IDs, codes
|
||||
|
||||
2. **english**: Designed for English text
|
||||
- Features: Recognizes ASCII letters and numbers, converts to lowercase
|
||||
- Best for: English content
|
||||
|
||||
3. **chinese**: Chinese text tokenizer
|
||||
- Features: Recognizes Chinese and English characters, removes punctuation
|
||||
- Best for: Chinese or mixed Chinese-English content
|
||||
|
||||
4. **unicode**: Multi-language tokenizer based on Unicode
|
||||
- Features: Recognizes text boundaries in multiple languages
|
||||
- Best for: Multi-language content
|
||||
|
||||
### Analyzer Modes
|
||||
|
||||
- **max_word**: Fine-grained tokenization (more tokens)
|
||||
- **smart**: Intelligent tokenization (balanced)
|
||||
|
||||
### Full-Text Search Functions
|
||||
|
||||
- `MATCH_ALL(column, query)`: All terms must be present
|
||||
- `MATCH_ANY(column, query)`: At least one term must be present
|
||||
- `MATCH_PHRASE(column, query)`: Exact phrase matching
|
||||
- `MATCH_PHRASE_PREFIX(column, query)`: Phrase prefix matching
|
||||
- `MATCH_REGEXP(column, pattern)`: Regular expression matching
|
||||
|
||||
## Performance Optimization
|
||||
|
||||
### Vector Search
|
||||
|
||||
1. **Adjust exploration factor** for accuracy vs speed trade-off:
|
||||
```sql
|
||||
SET cz.vector.index.search.ef=64;
|
||||
```
|
||||
|
||||
2. **Use appropriate distance functions**:
|
||||
- `cosine_distance`: Best for normalized embeddings (e.g., from language models)
|
||||
- `l2_distance`: Best for raw feature vectors
|
||||
|
||||
### Full-Text Search
|
||||
|
||||
1. **Choose the right analyzer**:
|
||||
- Use `keyword` for exact matching
|
||||
- Use language-specific analyzers for better tokenization
|
||||
|
||||
2. **Combine with vector search**:
|
||||
- Pre-filter with full-text search for better performance
|
||||
- Use hybrid search for improved relevance
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Connection Issues
|
||||
|
||||
1. Verify all 7 required configuration parameters are set
|
||||
2. Check network connectivity to Clickzetta service
|
||||
3. Ensure the user has proper permissions on the schema
|
||||
|
||||
### Search Performance
|
||||
|
||||
1. Verify vector index exists:
|
||||
```sql
|
||||
SHOW INDEX FROM <schema>.<table_name>;
|
||||
```
|
||||
|
||||
2. Check if vector index is being used:
|
||||
```sql
|
||||
EXPLAIN SELECT ... WHERE l2_distance(...) < threshold;
|
||||
```
|
||||
Look for `vector_index_search_type` in the execution plan.
|
||||
|
||||
### Full-Text Search Not Working
|
||||
|
||||
1. Verify inverted index is created
|
||||
2. Check analyzer configuration matches your content language
|
||||
3. Use `TOKENIZE()` function to test tokenization:
|
||||
```sql
|
||||
SELECT TOKENIZE('your text', map('analyzer', 'chinese', 'mode', 'smart'));
|
||||
```
|
||||
|
||||
## Limitations
|
||||
|
||||
1. Vector operations don't support `ORDER BY` or `GROUP BY` directly on vector columns
|
||||
2. Full-text search relevance scores are not provided by Clickzetta
|
||||
3. Inverted index creation may fail for very large existing tables (continue without error)
|
||||
4. Index naming constraints:
|
||||
- Index names must be unique within a schema
|
||||
- Only one vector index can be created per column
|
||||
- The implementation uses timestamps to ensure unique index names
|
||||
5. A column can only have one vector index at a time
|
||||
|
||||
## References
|
||||
|
||||
- [Clickzetta Vector Search Documentation](../../../../../../../yunqidoc/cn_markdown_20250526/vector-search.md)
|
||||
- [Clickzetta Inverted Index Documentation](../../../../../../../yunqidoc/cn_markdown_20250526/inverted-index.md)
|
||||
- [Clickzetta SQL Functions](../../../../../../../yunqidoc/cn_markdown_20250526/sql_functions/)
|
||||
|
|
@ -0,0 +1 @@
|
|||
# Clickzetta Vector Database Integration for Dify
|
||||
|
|
@ -0,0 +1,834 @@
|
|||
import json
|
||||
import logging
|
||||
import queue
|
||||
import threading
|
||||
import uuid
|
||||
from typing import Any, Optional, TYPE_CHECKING
|
||||
|
||||
import clickzetta # type: ignore
|
||||
from pydantic import BaseModel, model_validator
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from clickzetta import Connection
|
||||
|
||||
from configs import dify_config
|
||||
from core.rag.datasource.vdb.field import Field
|
||||
from core.rag.datasource.vdb.vector_base import BaseVector
|
||||
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
|
||||
from core.rag.embedding.embedding_base import Embeddings
|
||||
from core.rag.models.document import Document
|
||||
from models.dataset import Dataset
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ClickZetta Lakehouse Vector Database Configuration
|
||||
|
||||
|
||||
class ClickzettaConfig(BaseModel):
|
||||
"""
|
||||
Configuration class for Clickzetta connection.
|
||||
"""
|
||||
|
||||
username: str
|
||||
password: str
|
||||
instance: str
|
||||
service: str = "api.clickzetta.com"
|
||||
workspace: str = "quick_start"
|
||||
vcluster: str = "default_ap"
|
||||
schema_name: str = "dify" # Renamed to avoid shadowing BaseModel.schema
|
||||
# Advanced settings
|
||||
batch_size: int = 20 # Reduced batch size to avoid large SQL statements
|
||||
enable_inverted_index: bool = True # Enable inverted index for full-text search
|
||||
analyzer_type: str = "chinese" # Analyzer type for full-text search: keyword, english, chinese, unicode
|
||||
analyzer_mode: str = "smart" # Analyzer mode: max_word, smart
|
||||
vector_distance_function: str = "cosine_distance" # l2_distance or cosine_distance
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_config(cls, values: dict) -> dict:
|
||||
"""
|
||||
Validate the configuration values.
|
||||
"""
|
||||
if not values.get("username"):
|
||||
raise ValueError("config CLICKZETTA_USERNAME is required")
|
||||
if not values.get("password"):
|
||||
raise ValueError("config CLICKZETTA_PASSWORD is required")
|
||||
if not values.get("instance"):
|
||||
raise ValueError("config CLICKZETTA_INSTANCE is required")
|
||||
if not values.get("service"):
|
||||
raise ValueError("config CLICKZETTA_SERVICE is required")
|
||||
if not values.get("workspace"):
|
||||
raise ValueError("config CLICKZETTA_WORKSPACE is required")
|
||||
if not values.get("vcluster"):
|
||||
raise ValueError("config CLICKZETTA_VCLUSTER is required")
|
||||
if not values.get("schema_name"):
|
||||
raise ValueError("config CLICKZETTA_SCHEMA is required")
|
||||
return values
|
||||
|
||||
|
||||
class ClickzettaVector(BaseVector):
|
||||
"""
|
||||
Clickzetta vector storage implementation.
|
||||
"""
|
||||
|
||||
# Class-level write queue and lock for serializing writes
|
||||
_write_queue: Optional[queue.Queue] = None
|
||||
_write_thread: Optional[threading.Thread] = None
|
||||
_write_lock = threading.Lock()
|
||||
_shutdown = False
|
||||
|
||||
def __init__(self, collection_name: str, config: ClickzettaConfig):
|
||||
super().__init__(collection_name)
|
||||
self._config = config
|
||||
self._table_name = collection_name.replace("-", "_").lower() # Ensure valid table name
|
||||
self._connection: Optional["Connection"] = None
|
||||
self._init_connection()
|
||||
self._init_write_queue()
|
||||
|
||||
def _init_connection(self):
|
||||
"""Initialize Clickzetta connection."""
|
||||
self._connection = clickzetta.connect(
|
||||
username=self._config.username,
|
||||
password=self._config.password,
|
||||
instance=self._config.instance,
|
||||
service=self._config.service,
|
||||
workspace=self._config.workspace,
|
||||
vcluster=self._config.vcluster,
|
||||
schema=self._config.schema_name
|
||||
)
|
||||
|
||||
# Set session parameters for better string handling and performance optimization
|
||||
if self._connection is not None:
|
||||
with self._connection.cursor() as cursor:
|
||||
# Use quote mode for string literal escaping to handle quotes better
|
||||
cursor.execute("SET cz.sql.string.literal.escape.mode = 'quote'")
|
||||
logger.info("Set string literal escape mode to 'quote' for better quote handling")
|
||||
|
||||
# Performance optimization hints for vector operations
|
||||
self._set_performance_hints(cursor)
|
||||
|
||||
def _set_performance_hints(self, cursor):
|
||||
"""Set ClickZetta performance optimization hints for vector operations."""
|
||||
try:
|
||||
# Performance optimization hints for vector operations and query processing
|
||||
performance_hints = [
|
||||
# Vector index optimization
|
||||
"SET cz.storage.parquet.vector.index.read.memory.cache = true",
|
||||
"SET cz.storage.parquet.vector.index.read.local.cache = false",
|
||||
|
||||
# Query optimization
|
||||
"SET cz.sql.table.scan.push.down.filter = true",
|
||||
"SET cz.sql.table.scan.enable.ensure.filter = true",
|
||||
"SET cz.storage.always.prefetch.internal = true",
|
||||
"SET cz.optimizer.generate.columns.always.valid = true",
|
||||
"SET cz.sql.index.prewhere.enabled = true",
|
||||
|
||||
# Storage optimization
|
||||
"SET cz.storage.parquet.enable.io.prefetch = false",
|
||||
"SET cz.optimizer.enable.mv.rewrite = false",
|
||||
"SET cz.sql.dump.as.lz4 = true",
|
||||
"SET cz.optimizer.limited.optimization.naive.query = true",
|
||||
"SET cz.sql.table.scan.enable.push.down.log = false",
|
||||
"SET cz.storage.use.file.format.local.stats = false",
|
||||
"SET cz.storage.local.file.object.cache.level = all",
|
||||
|
||||
# Job execution optimization
|
||||
"SET cz.sql.job.fast.mode = true",
|
||||
"SET cz.storage.parquet.non.contiguous.read = true",
|
||||
"SET cz.sql.compaction.after.commit = true"
|
||||
]
|
||||
|
||||
for hint in performance_hints:
|
||||
cursor.execute(hint)
|
||||
|
||||
logger.info("Applied %d performance optimization hints for ClickZetta vector operations", len(performance_hints))
|
||||
|
||||
except Exception:
|
||||
# Catch any errors setting performance hints but continue with defaults
|
||||
logger.exception("Failed to set some performance hints, continuing with default settings")
|
||||
|
||||
@classmethod
|
||||
def _init_write_queue(cls):
|
||||
"""Initialize the write queue and worker thread."""
|
||||
with cls._write_lock:
|
||||
if cls._write_queue is None:
|
||||
cls._write_queue = queue.Queue()
|
||||
cls._write_thread = threading.Thread(target=cls._write_worker, daemon=True)
|
||||
cls._write_thread.start()
|
||||
logger.info("Started Clickzetta write worker thread")
|
||||
|
||||
@classmethod
|
||||
def _write_worker(cls):
|
||||
"""Worker thread that processes write tasks sequentially."""
|
||||
while not cls._shutdown:
|
||||
try:
|
||||
# Get task from queue with timeout
|
||||
if cls._write_queue is not None:
|
||||
task = cls._write_queue.get(timeout=1)
|
||||
if task is None: # Shutdown signal
|
||||
break
|
||||
|
||||
# Execute the write task
|
||||
func, args, kwargs, result_queue = task
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
result_queue.put((True, result))
|
||||
except (RuntimeError, ValueError, TypeError, ConnectionError) as e:
|
||||
logger.exception("Write task failed")
|
||||
result_queue.put((False, e))
|
||||
finally:
|
||||
cls._write_queue.task_done()
|
||||
else:
|
||||
break
|
||||
except queue.Empty:
|
||||
continue
|
||||
except (RuntimeError, ValueError, TypeError, ConnectionError) as e:
|
||||
logger.exception("Write worker error")
|
||||
|
||||
def _execute_write(self, func, *args, **kwargs):
|
||||
"""Execute a write operation through the queue."""
|
||||
if ClickzettaVector._write_queue is None:
|
||||
raise RuntimeError("Write queue not initialized")
|
||||
|
||||
result_queue: queue.Queue[tuple[bool, Any]] = queue.Queue()
|
||||
ClickzettaVector._write_queue.put((func, args, kwargs, result_queue))
|
||||
|
||||
# Wait for result
|
||||
success, result = result_queue.get()
|
||||
if not success:
|
||||
raise result
|
||||
return result
|
||||
|
||||
def get_type(self) -> str:
|
||||
"""Return the vector database type."""
|
||||
return "clickzetta"
|
||||
|
||||
def _ensure_connection(self) -> "Connection":
|
||||
"""Ensure connection is available and return it."""
|
||||
if self._connection is None:
|
||||
raise RuntimeError("Database connection not initialized")
|
||||
return self._connection
|
||||
|
||||
def _table_exists(self) -> bool:
|
||||
"""Check if the table exists."""
|
||||
try:
|
||||
connection = self._ensure_connection()
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(f"DESC {self._config.schema_name}.{self._table_name}")
|
||||
return True
|
||||
except (RuntimeError, ValueError) as e:
|
||||
if "table or view not found" in str(e).lower():
|
||||
return False
|
||||
else:
|
||||
# Re-raise if it's a different error
|
||||
raise
|
||||
|
||||
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
|
||||
"""Create the collection and add initial documents."""
|
||||
# Execute table creation through write queue to avoid concurrent conflicts
|
||||
self._execute_write(self._create_table_and_indexes, embeddings)
|
||||
|
||||
# Add initial texts
|
||||
if texts:
|
||||
self.add_texts(texts, embeddings, **kwargs)
|
||||
|
||||
def _create_table_and_indexes(self, embeddings: list[list[float]]):
|
||||
"""Create table and indexes (executed in write worker thread)."""
|
||||
# Check if table already exists to avoid unnecessary index creation
|
||||
if self._table_exists():
|
||||
logger.info("Table %s.%s already exists, skipping creation", self._config.schema_name, self._table_name)
|
||||
return
|
||||
|
||||
# Create table with vector and metadata columns
|
||||
dimension = len(embeddings[0]) if embeddings else 768
|
||||
|
||||
create_table_sql = f"""
|
||||
CREATE TABLE IF NOT EXISTS {self._config.schema_name}.{self._table_name} (
|
||||
id STRING NOT NULL COMMENT 'Unique document identifier',
|
||||
{Field.CONTENT_KEY.value} STRING NOT NULL COMMENT 'Document text content for search and retrieval',
|
||||
{Field.METADATA_KEY.value} JSON COMMENT 'Document metadata including source, type, and other attributes',
|
||||
{Field.VECTOR.value} VECTOR(FLOAT, {dimension}) NOT NULL COMMENT
|
||||
'High-dimensional embedding vector for semantic similarity search',
|
||||
PRIMARY KEY (id)
|
||||
) COMMENT 'Dify RAG knowledge base vector storage table for document embeddings and content'
|
||||
"""
|
||||
|
||||
connection = self._ensure_connection()
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(create_table_sql)
|
||||
logger.info("Created table %s.%s", self._config.schema_name, self._table_name)
|
||||
|
||||
# Create vector index
|
||||
self._create_vector_index(cursor)
|
||||
|
||||
# Create inverted index for full-text search if enabled
|
||||
if self._config.enable_inverted_index:
|
||||
self._create_inverted_index(cursor)
|
||||
|
||||
def _create_vector_index(self, cursor):
|
||||
"""Create HNSW vector index for similarity search."""
|
||||
# Use a fixed index name based on table and column name
|
||||
index_name = f"idx_{self._table_name}_vector"
|
||||
|
||||
# First check if an index already exists on this column
|
||||
try:
|
||||
cursor.execute(f"SHOW INDEX FROM {self._config.schema_name}.{self._table_name}")
|
||||
existing_indexes = cursor.fetchall()
|
||||
for idx in existing_indexes:
|
||||
# Check if vector index already exists on the embedding column
|
||||
if Field.VECTOR.value in str(idx).lower():
|
||||
logger.info("Vector index already exists on column %s", Field.VECTOR.value)
|
||||
return
|
||||
except (RuntimeError, ValueError) as e:
|
||||
logger.warning("Failed to check existing indexes: %s", e)
|
||||
|
||||
index_sql = f"""
|
||||
CREATE VECTOR INDEX IF NOT EXISTS {index_name}
|
||||
ON TABLE {self._config.schema_name}.{self._table_name}({Field.VECTOR.value})
|
||||
PROPERTIES (
|
||||
"distance.function" = "{self._config.vector_distance_function}",
|
||||
"scalar.type" = "f32",
|
||||
"m" = "16",
|
||||
"ef.construction" = "128"
|
||||
)
|
||||
"""
|
||||
try:
|
||||
cursor.execute(index_sql)
|
||||
logger.info("Created vector index: %s", index_name)
|
||||
except (RuntimeError, ValueError) as e:
|
||||
error_msg = str(e).lower()
|
||||
if ("already exists" in error_msg or
|
||||
"already has index" in error_msg or
|
||||
"with the same type" in error_msg):
|
||||
logger.info("Vector index already exists: %s", e)
|
||||
else:
|
||||
logger.exception("Failed to create vector index")
|
||||
raise
|
||||
|
||||
def _create_inverted_index(self, cursor):
|
||||
"""Create inverted index for full-text search."""
|
||||
# Use a fixed index name based on table name to avoid duplicates
|
||||
index_name = f"idx_{self._table_name}_text"
|
||||
|
||||
# Check if an inverted index already exists on this column
|
||||
try:
|
||||
cursor.execute(f"SHOW INDEX FROM {self._config.schema_name}.{self._table_name}")
|
||||
existing_indexes = cursor.fetchall()
|
||||
for idx in existing_indexes:
|
||||
idx_str = str(idx).lower()
|
||||
# More precise check: look for inverted index specifically on the content column
|
||||
if ("inverted" in idx_str and
|
||||
Field.CONTENT_KEY.value.lower() in idx_str and
|
||||
(index_name.lower() in idx_str or f"idx_{self._table_name}_text" in idx_str)):
|
||||
logger.info("Inverted index already exists on column %s: %s", Field.CONTENT_KEY.value, idx)
|
||||
return
|
||||
except (RuntimeError, ValueError) as e:
|
||||
logger.warning("Failed to check existing indexes: %s", e)
|
||||
|
||||
index_sql = f"""
|
||||
CREATE INVERTED INDEX IF NOT EXISTS {index_name}
|
||||
ON TABLE {self._config.schema_name}.{self._table_name} ({Field.CONTENT_KEY.value})
|
||||
PROPERTIES (
|
||||
"analyzer" = "{self._config.analyzer_type}",
|
||||
"mode" = "{self._config.analyzer_mode}"
|
||||
)
|
||||
"""
|
||||
try:
|
||||
cursor.execute(index_sql)
|
||||
logger.info("Created inverted index: %s", index_name)
|
||||
except (RuntimeError, ValueError) as e:
|
||||
error_msg = str(e).lower()
|
||||
# Handle ClickZetta specific error messages
|
||||
if (("already exists" in error_msg or
|
||||
"already has index" in error_msg or
|
||||
"with the same type" in error_msg or
|
||||
"cannot create inverted index" in error_msg) and
|
||||
"already has index" in error_msg):
|
||||
logger.info("Inverted index already exists on column %s", Field.CONTENT_KEY.value)
|
||||
# Try to get the existing index name for logging
|
||||
try:
|
||||
cursor.execute(f"SHOW INDEX FROM {self._config.schema_name}.{self._table_name}")
|
||||
existing_indexes = cursor.fetchall()
|
||||
for idx in existing_indexes:
|
||||
if "inverted" in str(idx).lower() and Field.CONTENT_KEY.value.lower() in str(idx).lower():
|
||||
logger.info("Found existing inverted index: %s", idx)
|
||||
break
|
||||
except (RuntimeError, ValueError):
|
||||
pass
|
||||
else:
|
||||
logger.warning("Failed to create inverted index: %s", e)
|
||||
# Continue without inverted index - full-text search will fall back to LIKE
|
||||
|
||||
|
||||
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
|
||||
"""Add documents with embeddings to the collection."""
|
||||
if not documents:
|
||||
return
|
||||
|
||||
batch_size = self._config.batch_size
|
||||
total_batches = (len(documents) + batch_size - 1) // batch_size
|
||||
|
||||
for i in range(0, len(documents), batch_size):
|
||||
batch_docs = documents[i:i + batch_size]
|
||||
batch_embeddings = embeddings[i:i + batch_size]
|
||||
|
||||
# Execute batch insert through write queue
|
||||
self._execute_write(self._insert_batch, batch_docs, batch_embeddings, i, batch_size, total_batches)
|
||||
|
||||
def _insert_batch(self, batch_docs: list[Document], batch_embeddings: list[list[float]],
|
||||
batch_index: int, batch_size: int, total_batches: int):
|
||||
"""Insert a batch of documents using parameterized queries (executed in write worker thread)."""
|
||||
if not batch_docs or not batch_embeddings:
|
||||
logger.warning("Empty batch provided, skipping insertion")
|
||||
return
|
||||
|
||||
if len(batch_docs) != len(batch_embeddings):
|
||||
logger.error("Mismatch between docs (%d) and embeddings (%d)", len(batch_docs), len(batch_embeddings))
|
||||
return
|
||||
|
||||
# Prepare data for parameterized insertion
|
||||
data_rows = []
|
||||
vector_dimension = len(batch_embeddings[0]) if batch_embeddings and batch_embeddings[0] else 768
|
||||
|
||||
for doc, embedding in zip(batch_docs, batch_embeddings):
|
||||
# Optimized: minimal checks for common case, fallback for edge cases
|
||||
metadata = doc.metadata if doc.metadata else {}
|
||||
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = {}
|
||||
|
||||
doc_id = self._safe_doc_id(metadata.get("doc_id", str(uuid.uuid4())))
|
||||
|
||||
# Fast path for JSON serialization
|
||||
try:
|
||||
metadata_json = json.dumps(metadata, ensure_ascii=True)
|
||||
except (TypeError, ValueError):
|
||||
logger.warning("JSON serialization failed, using empty dict")
|
||||
metadata_json = "{}"
|
||||
|
||||
content = doc.page_content or ""
|
||||
|
||||
# According to ClickZetta docs, vector should be formatted as array string
|
||||
# for external systems: '[1.0, 2.0, 3.0]'
|
||||
vector_str = '[' + ','.join(map(str, embedding)) + ']'
|
||||
data_rows.append([doc_id, content, metadata_json, vector_str])
|
||||
|
||||
# Check if we have any valid data to insert
|
||||
if not data_rows:
|
||||
logger.warning("No valid documents to insert in batch %d/%d", batch_index // batch_size + 1, total_batches)
|
||||
return
|
||||
|
||||
# Use parameterized INSERT with executemany for better performance and security
|
||||
# Cast JSON and VECTOR in SQL, pass raw data as parameters
|
||||
columns = f"id, {Field.CONTENT_KEY.value}, {Field.METADATA_KEY.value}, {Field.VECTOR.value}"
|
||||
insert_sql = (
|
||||
f"INSERT INTO {self._config.schema_name}.{self._table_name} ({columns}) "
|
||||
f"VALUES (?, ?, CAST(? AS JSON), CAST(? AS VECTOR({vector_dimension})))"
|
||||
)
|
||||
|
||||
connection = self._ensure_connection()
|
||||
with connection.cursor() as cursor:
|
||||
try:
|
||||
# Set session-level hints for batch insert operations
|
||||
# Note: executemany doesn't support hints parameter, so we set them as session variables
|
||||
cursor.execute("SET cz.sql.job.fast.mode = true")
|
||||
cursor.execute("SET cz.sql.compaction.after.commit = true")
|
||||
cursor.execute("SET cz.storage.always.prefetch.internal = true")
|
||||
|
||||
cursor.executemany(insert_sql, data_rows)
|
||||
logger.info(
|
||||
f"Inserted batch {batch_index // batch_size + 1}/{total_batches} "
|
||||
f"({len(data_rows)} valid docs using parameterized query with VECTOR({vector_dimension}) cast)"
|
||||
)
|
||||
except (RuntimeError, ValueError, TypeError, ConnectionError) as e:
|
||||
logger.exception("Parameterized SQL execution failed for %d documents: %s", len(data_rows), e)
|
||||
logger.exception("SQL template: %s", insert_sql)
|
||||
logger.exception("Sample data row: %s", data_rows[0] if data_rows else 'None')
|
||||
raise
|
||||
|
||||
def text_exists(self, id: str) -> bool:
|
||||
"""Check if a document exists by ID."""
|
||||
safe_id = self._safe_doc_id(id)
|
||||
connection = self._ensure_connection()
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
f"SELECT COUNT(*) FROM {self._config.schema_name}.{self._table_name} WHERE id = ?",
|
||||
[safe_id]
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
return result[0] > 0 if result else False
|
||||
|
||||
def delete_by_ids(self, ids: list[str]) -> None:
|
||||
"""Delete documents by IDs."""
|
||||
if not ids:
|
||||
return
|
||||
|
||||
# Check if table exists before attempting delete
|
||||
if not self._table_exists():
|
||||
logger.warning("Table %s.%s does not exist, skipping delete", self._config.schema_name, self._table_name)
|
||||
return
|
||||
|
||||
# Execute delete through write queue
|
||||
self._execute_write(self._delete_by_ids_impl, ids)
|
||||
|
||||
def _delete_by_ids_impl(self, ids: list[str]) -> None:
|
||||
"""Implementation of delete by IDs (executed in write worker thread)."""
|
||||
safe_ids = [self._safe_doc_id(id) for id in ids]
|
||||
# Create properly escaped string literals for SQL
|
||||
id_list = ",".join(f"'{id}'" for id in safe_ids)
|
||||
sql = f"DELETE FROM {self._config.schema_name}.{self._table_name} WHERE id IN ({id_list})"
|
||||
|
||||
connection = self._ensure_connection()
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(sql)
|
||||
|
||||
def delete_by_metadata_field(self, key: str, value: str) -> None:
|
||||
"""Delete documents by metadata field."""
|
||||
# Check if table exists before attempting delete
|
||||
if not self._table_exists():
|
||||
logger.warning("Table %s.%s does not exist, skipping delete", self._config.schema_name, self._table_name)
|
||||
return
|
||||
|
||||
# Execute delete through write queue
|
||||
self._execute_write(self._delete_by_metadata_field_impl, key, value)
|
||||
|
||||
def _delete_by_metadata_field_impl(self, key: str, value: str) -> None:
|
||||
"""Implementation of delete by metadata field (executed in write worker thread)."""
|
||||
connection = self._ensure_connection()
|
||||
with connection.cursor() as cursor:
|
||||
# Using JSON path to filter with parameterized query
|
||||
# Note: JSON path requires literal key name, cannot be parameterized
|
||||
# Use json_extract_string function for ClickZetta compatibility
|
||||
sql = (f"DELETE FROM {self._config.schema_name}.{self._table_name} "
|
||||
f"WHERE json_extract_string({Field.METADATA_KEY.value}, '$.{key}') = ?")
|
||||
cursor.execute(sql, [value])
|
||||
|
||||
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
|
||||
"""Search for documents by vector similarity."""
|
||||
top_k = kwargs.get("top_k", 10)
|
||||
score_threshold = kwargs.get("score_threshold", 0.0)
|
||||
document_ids_filter = kwargs.get("document_ids_filter")
|
||||
|
||||
# Handle filter parameter from canvas (workflow)
|
||||
filter_param = kwargs.get("filter", {})
|
||||
|
||||
# Build filter clause
|
||||
filter_clauses = []
|
||||
if document_ids_filter:
|
||||
safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
|
||||
doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
|
||||
# Use json_extract_string function for ClickZetta compatibility
|
||||
filter_clauses.append(
|
||||
f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
|
||||
)
|
||||
|
||||
# No need for dataset_id filter since each dataset has its own table
|
||||
|
||||
# Add distance threshold based on distance function
|
||||
vector_dimension = len(query_vector)
|
||||
if self._config.vector_distance_function == "cosine_distance":
|
||||
# For cosine distance, smaller is better (0 = identical, 2 = opposite)
|
||||
distance_func = "COSINE_DISTANCE"
|
||||
if score_threshold > 0:
|
||||
query_vector_str = f"CAST('[{self._format_vector_simple(query_vector)}]' AS VECTOR({vector_dimension}))"
|
||||
filter_clauses.append(f"{distance_func}({Field.VECTOR.value}, "
|
||||
f"{query_vector_str}) < {2 - score_threshold}")
|
||||
else:
|
||||
# For L2 distance, smaller is better
|
||||
distance_func = "L2_DISTANCE"
|
||||
if score_threshold > 0:
|
||||
query_vector_str = f"CAST('[{self._format_vector_simple(query_vector)}]' AS VECTOR({vector_dimension}))"
|
||||
filter_clauses.append(f"{distance_func}({Field.VECTOR.value}, "
|
||||
f"{query_vector_str}) < {score_threshold}")
|
||||
|
||||
where_clause = " AND ".join(filter_clauses) if filter_clauses else "1=1"
|
||||
|
||||
# Execute vector search query
|
||||
query_vector_str = f"CAST('[{self._format_vector_simple(query_vector)}]' AS VECTOR({vector_dimension}))"
|
||||
search_sql = f"""
|
||||
SELECT id, {Field.CONTENT_KEY.value}, {Field.METADATA_KEY.value},
|
||||
{distance_func}({Field.VECTOR.value}, {query_vector_str}) AS distance
|
||||
FROM {self._config.schema_name}.{self._table_name}
|
||||
WHERE {where_clause}
|
||||
ORDER BY distance
|
||||
LIMIT {top_k}
|
||||
"""
|
||||
|
||||
documents = []
|
||||
connection = self._ensure_connection()
|
||||
with connection.cursor() as cursor:
|
||||
# Use hints parameter for vector search optimization
|
||||
search_hints = {
|
||||
'hints': {
|
||||
'sdk.job.timeout': 60, # Increase timeout for vector search
|
||||
'cz.sql.job.fast.mode': True,
|
||||
'cz.storage.parquet.vector.index.read.memory.cache': True
|
||||
}
|
||||
}
|
||||
cursor.execute(search_sql, parameters=search_hints)
|
||||
results = cursor.fetchall()
|
||||
|
||||
for row in results:
|
||||
# Parse metadata from JSON string (may be double-encoded)
|
||||
try:
|
||||
if row[2]:
|
||||
metadata = json.loads(row[2])
|
||||
|
||||
# If result is a string, it's double-encoded JSON - parse again
|
||||
if isinstance(metadata, str):
|
||||
metadata = json.loads(metadata)
|
||||
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = {}
|
||||
else:
|
||||
metadata = {}
|
||||
except (json.JSONDecodeError, TypeError) as e:
|
||||
logger.error("JSON parsing failed: %s", e)
|
||||
# Fallback: extract document_id with regex
|
||||
import re
|
||||
doc_id_match = re.search(r'"document_id":\s*"([^"]+)"', str(row[2] or ''))
|
||||
metadata = {"document_id": doc_id_match.group(1)} if doc_id_match else {}
|
||||
|
||||
# Ensure required fields are set
|
||||
metadata["doc_id"] = row[0] # segment id
|
||||
|
||||
# Ensure document_id exists (critical for Dify's format_retrieval_documents)
|
||||
if "document_id" not in metadata:
|
||||
metadata["document_id"] = row[0] # fallback to segment id
|
||||
|
||||
# Add score based on distance
|
||||
if self._config.vector_distance_function == "cosine_distance":
|
||||
metadata["score"] = 1 - (row[3] / 2)
|
||||
else:
|
||||
metadata["score"] = 1 / (1 + row[3])
|
||||
|
||||
doc = Document(page_content=row[1], metadata=metadata)
|
||||
documents.append(doc)
|
||||
|
||||
return documents
|
||||
|
||||
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
|
||||
"""Search for documents using full-text search with inverted index."""
|
||||
if not self._config.enable_inverted_index:
|
||||
logger.warning("Full-text search is not enabled. Enable inverted index in config.")
|
||||
return []
|
||||
|
||||
top_k = kwargs.get("top_k", 10)
|
||||
document_ids_filter = kwargs.get("document_ids_filter")
|
||||
|
||||
# Handle filter parameter from canvas (workflow)
|
||||
filter_param = kwargs.get("filter", {})
|
||||
|
||||
# Build filter clause
|
||||
filter_clauses = []
|
||||
if document_ids_filter:
|
||||
safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
|
||||
doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
|
||||
# Use json_extract_string function for ClickZetta compatibility
|
||||
filter_clauses.append(
|
||||
f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
|
||||
)
|
||||
|
||||
# No need for dataset_id filter since each dataset has its own table
|
||||
|
||||
# Use match_all function for full-text search
|
||||
# match_all requires all terms to be present
|
||||
# Use simple quote escaping for MATCH_ALL since it needs to be in the WHERE clause
|
||||
escaped_query = query.replace("'", "''")
|
||||
filter_clauses.append(f"MATCH_ALL({Field.CONTENT_KEY.value}, '{escaped_query}')")
|
||||
|
||||
where_clause = " AND ".join(filter_clauses)
|
||||
|
||||
# Execute full-text search query
|
||||
search_sql = f"""
|
||||
SELECT id, {Field.CONTENT_KEY.value}, {Field.METADATA_KEY.value}
|
||||
FROM {self._config.schema_name}.{self._table_name}
|
||||
WHERE {where_clause}
|
||||
LIMIT {top_k}
|
||||
"""
|
||||
|
||||
documents = []
|
||||
connection = self._ensure_connection()
|
||||
with connection.cursor() as cursor:
|
||||
try:
|
||||
# Use hints parameter for full-text search optimization
|
||||
fulltext_hints = {
|
||||
'hints': {
|
||||
'sdk.job.timeout': 30, # Timeout for full-text search
|
||||
'cz.sql.job.fast.mode': True,
|
||||
'cz.sql.index.prewhere.enabled': True
|
||||
}
|
||||
}
|
||||
cursor.execute(search_sql, parameters=fulltext_hints)
|
||||
results = cursor.fetchall()
|
||||
|
||||
for row in results:
|
||||
# Parse metadata from JSON string (may be double-encoded)
|
||||
try:
|
||||
if row[2]:
|
||||
metadata = json.loads(row[2])
|
||||
|
||||
# If result is a string, it's double-encoded JSON - parse again
|
||||
if isinstance(metadata, str):
|
||||
metadata = json.loads(metadata)
|
||||
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = {}
|
||||
else:
|
||||
metadata = {}
|
||||
except (json.JSONDecodeError, TypeError) as e:
|
||||
logger.error("JSON parsing failed: %s", e)
|
||||
# Fallback: extract document_id with regex
|
||||
import re
|
||||
doc_id_match = re.search(r'"document_id":\s*"([^"]+)"', str(row[2] or ''))
|
||||
metadata = {"document_id": doc_id_match.group(1)} if doc_id_match else {}
|
||||
|
||||
# Ensure required fields are set
|
||||
metadata["doc_id"] = row[0] # segment id
|
||||
|
||||
# Ensure document_id exists (critical for Dify's format_retrieval_documents)
|
||||
if "document_id" not in metadata:
|
||||
metadata["document_id"] = row[0] # fallback to segment id
|
||||
|
||||
# Add a relevance score for full-text search
|
||||
metadata["score"] = 1.0 # Clickzetta doesn't provide relevance scores
|
||||
doc = Document(page_content=row[1], metadata=metadata)
|
||||
documents.append(doc)
|
||||
except (RuntimeError, ValueError, TypeError, ConnectionError) as e:
|
||||
logger.exception("Full-text search failed")
|
||||
# Fallback to LIKE search if full-text search fails
|
||||
return self._search_by_like(query, **kwargs)
|
||||
|
||||
return documents
|
||||
|
||||
def _search_by_like(self, query: str, **kwargs: Any) -> list[Document]:
|
||||
"""Fallback search using LIKE operator."""
|
||||
top_k = kwargs.get("top_k", 10)
|
||||
document_ids_filter = kwargs.get("document_ids_filter")
|
||||
|
||||
# Handle filter parameter from canvas (workflow)
|
||||
filter_param = kwargs.get("filter", {})
|
||||
|
||||
# Build filter clause
|
||||
filter_clauses = []
|
||||
if document_ids_filter:
|
||||
safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
|
||||
doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
|
||||
# Use json_extract_string function for ClickZetta compatibility
|
||||
filter_clauses.append(
|
||||
f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
|
||||
)
|
||||
|
||||
# No need for dataset_id filter since each dataset has its own table
|
||||
|
||||
# Use simple quote escaping for LIKE clause
|
||||
escaped_query = query.replace("'", "''")
|
||||
filter_clauses.append(f"{Field.CONTENT_KEY.value} LIKE '%{escaped_query}%'")
|
||||
where_clause = " AND ".join(filter_clauses)
|
||||
|
||||
search_sql = f"""
|
||||
SELECT id, {Field.CONTENT_KEY.value}, {Field.METADATA_KEY.value}
|
||||
FROM {self._config.schema_name}.{self._table_name}
|
||||
WHERE {where_clause}
|
||||
LIMIT {top_k}
|
||||
"""
|
||||
|
||||
documents = []
|
||||
connection = self._ensure_connection()
|
||||
with connection.cursor() as cursor:
|
||||
# Use hints parameter for LIKE search optimization
|
||||
like_hints = {
|
||||
'hints': {
|
||||
'sdk.job.timeout': 20, # Timeout for LIKE search
|
||||
'cz.sql.job.fast.mode': True
|
||||
}
|
||||
}
|
||||
cursor.execute(search_sql, parameters=like_hints)
|
||||
results = cursor.fetchall()
|
||||
|
||||
for row in results:
|
||||
# Parse metadata from JSON string (may be double-encoded)
|
||||
try:
|
||||
if row[2]:
|
||||
metadata = json.loads(row[2])
|
||||
|
||||
# If result is a string, it's double-encoded JSON - parse again
|
||||
if isinstance(metadata, str):
|
||||
metadata = json.loads(metadata)
|
||||
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = {}
|
||||
else:
|
||||
metadata = {}
|
||||
except (json.JSONDecodeError, TypeError) as e:
|
||||
logger.error("JSON parsing failed: %s", e)
|
||||
# Fallback: extract document_id with regex
|
||||
import re
|
||||
doc_id_match = re.search(r'"document_id":\s*"([^"]+)"', str(row[2] or ''))
|
||||
metadata = {"document_id": doc_id_match.group(1)} if doc_id_match else {}
|
||||
|
||||
# Ensure required fields are set
|
||||
metadata["doc_id"] = row[0] # segment id
|
||||
|
||||
# Ensure document_id exists (critical for Dify's format_retrieval_documents)
|
||||
if "document_id" not in metadata:
|
||||
metadata["document_id"] = row[0] # fallback to segment id
|
||||
|
||||
metadata["score"] = 0.5 # Lower score for LIKE search
|
||||
doc = Document(page_content=row[1], metadata=metadata)
|
||||
documents.append(doc)
|
||||
|
||||
return documents
|
||||
|
||||
def delete(self) -> None:
|
||||
"""Delete the entire collection."""
|
||||
connection = self._ensure_connection()
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(f"DROP TABLE IF EXISTS {self._config.schema_name}.{self._table_name}")
|
||||
|
||||
|
||||
def _format_vector_simple(self, vector: list[float]) -> str:
|
||||
"""Simple vector formatting for SQL queries."""
|
||||
return ','.join(map(str, vector))
|
||||
|
||||
def _safe_doc_id(self, doc_id: str) -> str:
|
||||
"""Ensure doc_id is safe for SQL and doesn't contain special characters."""
|
||||
if not doc_id:
|
||||
return str(uuid.uuid4())
|
||||
# Remove or replace potentially problematic characters
|
||||
safe_id = str(doc_id)
|
||||
# Only allow alphanumeric, hyphens, underscores
|
||||
safe_id = ''.join(c for c in safe_id if c.isalnum() or c in '-_')
|
||||
if not safe_id: # If all characters were removed
|
||||
return str(uuid.uuid4())
|
||||
return safe_id[:255] # Limit length
|
||||
|
||||
|
||||
|
||||
class ClickzettaVectorFactory(AbstractVectorFactory):
|
||||
"""Factory for creating Clickzetta vector instances."""
|
||||
|
||||
def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> BaseVector:
|
||||
"""Initialize a Clickzetta vector instance."""
|
||||
# Get configuration from environment variables or dataset config
|
||||
config = ClickzettaConfig(
|
||||
username=dify_config.CLICKZETTA_USERNAME or "",
|
||||
password=dify_config.CLICKZETTA_PASSWORD or "",
|
||||
instance=dify_config.CLICKZETTA_INSTANCE or "",
|
||||
service=dify_config.CLICKZETTA_SERVICE or "api.clickzetta.com",
|
||||
workspace=dify_config.CLICKZETTA_WORKSPACE or "quick_start",
|
||||
vcluster=dify_config.CLICKZETTA_VCLUSTER or "default_ap",
|
||||
schema_name=dify_config.CLICKZETTA_SCHEMA or "dify",
|
||||
batch_size=dify_config.CLICKZETTA_BATCH_SIZE or 100,
|
||||
enable_inverted_index=dify_config.CLICKZETTA_ENABLE_INVERTED_INDEX or True,
|
||||
analyzer_type=dify_config.CLICKZETTA_ANALYZER_TYPE or "chinese",
|
||||
analyzer_mode=dify_config.CLICKZETTA_ANALYZER_MODE or "smart",
|
||||
vector_distance_function=dify_config.CLICKZETTA_VECTOR_DISTANCE_FUNCTION or "cosine_distance",
|
||||
)
|
||||
|
||||
# Use dataset collection name as table name
|
||||
collection_name = Dataset.gen_collection_name_by_id(dataset.id).lower()
|
||||
|
||||
return ClickzettaVector(collection_name=collection_name, config=config)
|
||||
|
||||
|
|
@ -172,6 +172,10 @@ class Vector:
|
|||
from core.rag.datasource.vdb.matrixone.matrixone_vector import MatrixoneVectorFactory
|
||||
|
||||
return MatrixoneVectorFactory
|
||||
case VectorType.CLICKZETTA:
|
||||
from core.rag.datasource.vdb.clickzetta.clickzetta_vector import ClickzettaVectorFactory
|
||||
|
||||
return ClickzettaVectorFactory
|
||||
case _:
|
||||
raise ValueError(f"Vector store {vector_type} is not supported.")
|
||||
|
||||
|
|
|
|||
|
|
@ -30,3 +30,4 @@ class VectorType(StrEnum):
|
|||
TABLESTORE = "tablestore"
|
||||
HUAWEI_CLOUD = "huawei_cloud"
|
||||
MATRIXONE = "matrixone"
|
||||
CLICKZETTA = "clickzetta"
|
||||
|
|
|
|||
|
|
@ -37,12 +37,12 @@ class LocaltimeToTimestampTool(BuiltinTool):
|
|||
@staticmethod
|
||||
def localtime_to_timestamp(localtime: str, time_format: str, local_tz=None) -> int | None:
|
||||
try:
|
||||
if local_tz is None:
|
||||
local_tz = datetime.now().astimezone().tzinfo
|
||||
if isinstance(local_tz, str):
|
||||
local_tz = pytz.timezone(local_tz)
|
||||
local_time = datetime.strptime(localtime, time_format)
|
||||
localtime = local_tz.localize(local_time) # type: ignore
|
||||
if local_tz is None:
|
||||
localtime = local_time.astimezone() # type: ignore
|
||||
elif isinstance(local_tz, str):
|
||||
local_tz = pytz.timezone(local_tz)
|
||||
localtime = local_tz.localize(local_time) # type: ignore
|
||||
timestamp = int(localtime.timestamp()) # type: ignore
|
||||
return timestamp
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
import json
|
||||
from collections.abc import Generator
|
||||
from dataclasses import dataclass
|
||||
from os import getenv
|
||||
from typing import Any, Optional
|
||||
from typing import Any, Optional, Union
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import httpx
|
||||
|
|
@ -20,6 +21,20 @@ API_TOOL_DEFAULT_TIMEOUT = (
|
|||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedResponse:
|
||||
"""Represents a parsed HTTP response with type information"""
|
||||
|
||||
content: Union[str, dict]
|
||||
is_json: bool
|
||||
|
||||
def to_string(self) -> str:
|
||||
"""Convert response to string format for credential validation"""
|
||||
if isinstance(self.content, dict):
|
||||
return json.dumps(self.content, ensure_ascii=False)
|
||||
return str(self.content)
|
||||
|
||||
|
||||
class ApiTool(Tool):
|
||||
"""
|
||||
Api tool
|
||||
|
|
@ -58,7 +73,9 @@ class ApiTool(Tool):
|
|||
|
||||
response = self.do_http_request(self.api_bundle.server_url, self.api_bundle.method, headers, parameters)
|
||||
# validate response
|
||||
return self.validate_and_parse_response(response)
|
||||
parsed_response = self.validate_and_parse_response(response)
|
||||
# For credential validation, always return as string
|
||||
return parsed_response.to_string()
|
||||
|
||||
def tool_provider_type(self) -> ToolProviderType:
|
||||
return ToolProviderType.API
|
||||
|
|
@ -112,23 +129,36 @@ class ApiTool(Tool):
|
|||
|
||||
return headers
|
||||
|
||||
def validate_and_parse_response(self, response: httpx.Response) -> str:
|
||||
def validate_and_parse_response(self, response: httpx.Response) -> ParsedResponse:
|
||||
"""
|
||||
validate the response
|
||||
validate the response and return parsed content with type information
|
||||
|
||||
:return: ParsedResponse with content and is_json flag
|
||||
"""
|
||||
if isinstance(response, httpx.Response):
|
||||
if response.status_code >= 400:
|
||||
raise ToolInvokeError(f"Request failed with status code {response.status_code} and {response.text}")
|
||||
if not response.content:
|
||||
return "Empty response from the tool, please check your parameters and try again."
|
||||
return ParsedResponse(
|
||||
"Empty response from the tool, please check your parameters and try again.", False
|
||||
)
|
||||
|
||||
# Check content type
|
||||
content_type = response.headers.get("content-type", "").lower()
|
||||
is_json_content_type = "application/json" in content_type
|
||||
|
||||
# Try to parse as JSON
|
||||
try:
|
||||
response = response.json()
|
||||
try:
|
||||
return json.dumps(response, ensure_ascii=False)
|
||||
except Exception:
|
||||
return json.dumps(response)
|
||||
json_response = response.json()
|
||||
# If content-type indicates JSON, return as JSON object
|
||||
if is_json_content_type:
|
||||
return ParsedResponse(json_response, True)
|
||||
else:
|
||||
# If content-type doesn't indicate JSON, treat as text regardless of content
|
||||
return ParsedResponse(response.text, False)
|
||||
except Exception:
|
||||
return response.text
|
||||
# Not valid JSON, return as text
|
||||
return ParsedResponse(response.text, False)
|
||||
else:
|
||||
raise ValueError(f"Invalid response type {type(response)}")
|
||||
|
||||
|
|
@ -369,7 +399,14 @@ class ApiTool(Tool):
|
|||
response = self.do_http_request(self.api_bundle.server_url, self.api_bundle.method, headers, tool_parameters)
|
||||
|
||||
# validate response
|
||||
response = self.validate_and_parse_response(response)
|
||||
parsed_response = self.validate_and_parse_response(response)
|
||||
|
||||
# assemble invoke message
|
||||
yield self.create_text_message(response)
|
||||
# assemble invoke message based on response type
|
||||
if parsed_response.is_json and isinstance(parsed_response.content, dict):
|
||||
yield self.create_json_message(parsed_response.content)
|
||||
else:
|
||||
# Convert to string if needed and create text message
|
||||
text_response = (
|
||||
parsed_response.content if isinstance(parsed_response.content, str) else str(parsed_response.content)
|
||||
)
|
||||
yield self.create_text_message(text_response)
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ class Executor:
|
|||
self.auth = node_data.authorization
|
||||
self.timeout = timeout
|
||||
self.ssl_verify = node_data.ssl_verify
|
||||
self.params = []
|
||||
self.params = None
|
||||
self.headers = {}
|
||||
self.content = None
|
||||
self.files = None
|
||||
|
|
@ -139,7 +139,8 @@ class Executor:
|
|||
(self.variable_pool.convert_template(key).text, self.variable_pool.convert_template(value_str).text)
|
||||
)
|
||||
|
||||
self.params = result
|
||||
if result:
|
||||
self.params = result
|
||||
|
||||
def _init_headers(self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -69,6 +69,19 @@ class Storage:
|
|||
from extensions.storage.supabase_storage import SupabaseStorage
|
||||
|
||||
return SupabaseStorage
|
||||
case StorageType.CLICKZETTA_VOLUME:
|
||||
from extensions.storage.clickzetta_volume.clickzetta_volume_storage import (
|
||||
ClickZettaVolumeConfig,
|
||||
ClickZettaVolumeStorage,
|
||||
)
|
||||
|
||||
def create_clickzetta_volume_storage():
|
||||
# ClickZettaVolumeConfig will automatically read from environment variables
|
||||
# and fallback to CLICKZETTA_* config if CLICKZETTA_VOLUME_* is not set
|
||||
volume_config = ClickZettaVolumeConfig()
|
||||
return ClickZettaVolumeStorage(volume_config)
|
||||
|
||||
return create_clickzetta_volume_storage
|
||||
case _:
|
||||
raise ValueError(f"unsupported storage type {storage_type}")
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
"""ClickZetta Volume storage implementation."""
|
||||
|
||||
from .clickzetta_volume_storage import ClickZettaVolumeStorage
|
||||
|
||||
__all__ = ["ClickZettaVolumeStorage"]
|
||||
|
|
@ -0,0 +1,530 @@
|
|||
"""ClickZetta Volume Storage Implementation
|
||||
|
||||
This module provides storage backend using ClickZetta Volume functionality.
|
||||
Supports Table Volume, User Volume, and External Volume types.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from collections.abc import Generator
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import clickzetta # type: ignore[import]
|
||||
from pydantic import BaseModel, model_validator
|
||||
|
||||
from extensions.storage.base_storage import BaseStorage
|
||||
|
||||
from .volume_permissions import VolumePermissionManager, check_volume_permission
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ClickZettaVolumeConfig(BaseModel):
|
||||
"""Configuration for ClickZetta Volume storage."""
|
||||
|
||||
username: str = ""
|
||||
password: str = ""
|
||||
instance: str = ""
|
||||
service: str = "api.clickzetta.com"
|
||||
workspace: str = "quick_start"
|
||||
vcluster: str = "default_ap"
|
||||
schema_name: str = "dify"
|
||||
volume_type: str = "table" # table|user|external
|
||||
volume_name: Optional[str] = None # For external volumes
|
||||
table_prefix: str = "dataset_" # Prefix for table volume names
|
||||
dify_prefix: str = "dify_km" # Directory prefix for User Volume
|
||||
permission_check: bool = True # Enable/disable permission checking
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_config(cls, values: dict) -> dict:
|
||||
"""Validate the configuration values.
|
||||
|
||||
This method will first try to use CLICKZETTA_VOLUME_* environment variables,
|
||||
then fall back to CLICKZETTA_* environment variables (for vector DB config).
|
||||
"""
|
||||
import os
|
||||
|
||||
# Helper function to get environment variable with fallback
|
||||
def get_env_with_fallback(volume_key: str, fallback_key: str, default: str | None = None) -> str:
|
||||
# First try CLICKZETTA_VOLUME_* specific config
|
||||
volume_value = values.get(volume_key.lower().replace("clickzetta_volume_", ""))
|
||||
if volume_value:
|
||||
return str(volume_value)
|
||||
|
||||
# Then try environment variables
|
||||
volume_env = os.getenv(volume_key)
|
||||
if volume_env:
|
||||
return volume_env
|
||||
|
||||
# Fall back to existing CLICKZETTA_* config
|
||||
fallback_env = os.getenv(fallback_key)
|
||||
if fallback_env:
|
||||
return fallback_env
|
||||
|
||||
return default or ""
|
||||
|
||||
# Apply environment variables with fallback to existing CLICKZETTA_* config
|
||||
values.setdefault("username", get_env_with_fallback("CLICKZETTA_VOLUME_USERNAME", "CLICKZETTA_USERNAME"))
|
||||
values.setdefault("password", get_env_with_fallback("CLICKZETTA_VOLUME_PASSWORD", "CLICKZETTA_PASSWORD"))
|
||||
values.setdefault("instance", get_env_with_fallback("CLICKZETTA_VOLUME_INSTANCE", "CLICKZETTA_INSTANCE"))
|
||||
values.setdefault(
|
||||
"service", get_env_with_fallback("CLICKZETTA_VOLUME_SERVICE", "CLICKZETTA_SERVICE", "api.clickzetta.com")
|
||||
)
|
||||
values.setdefault(
|
||||
"workspace", get_env_with_fallback("CLICKZETTA_VOLUME_WORKSPACE", "CLICKZETTA_WORKSPACE", "quick_start")
|
||||
)
|
||||
values.setdefault(
|
||||
"vcluster", get_env_with_fallback("CLICKZETTA_VOLUME_VCLUSTER", "CLICKZETTA_VCLUSTER", "default_ap")
|
||||
)
|
||||
values.setdefault("schema_name", get_env_with_fallback("CLICKZETTA_VOLUME_SCHEMA", "CLICKZETTA_SCHEMA", "dify"))
|
||||
|
||||
# Volume-specific configurations (no fallback to vector DB config)
|
||||
values.setdefault("volume_type", os.getenv("CLICKZETTA_VOLUME_TYPE", "table"))
|
||||
values.setdefault("volume_name", os.getenv("CLICKZETTA_VOLUME_NAME"))
|
||||
values.setdefault("table_prefix", os.getenv("CLICKZETTA_VOLUME_TABLE_PREFIX", "dataset_"))
|
||||
values.setdefault("dify_prefix", os.getenv("CLICKZETTA_VOLUME_DIFY_PREFIX", "dify_km"))
|
||||
# 暂时禁用权限检查功能,直接设置为false
|
||||
values.setdefault("permission_check", False)
|
||||
|
||||
# Validate required fields
|
||||
if not values.get("username"):
|
||||
raise ValueError("CLICKZETTA_VOLUME_USERNAME or CLICKZETTA_USERNAME is required")
|
||||
if not values.get("password"):
|
||||
raise ValueError("CLICKZETTA_VOLUME_PASSWORD or CLICKZETTA_PASSWORD is required")
|
||||
if not values.get("instance"):
|
||||
raise ValueError("CLICKZETTA_VOLUME_INSTANCE or CLICKZETTA_INSTANCE is required")
|
||||
|
||||
# Validate volume type
|
||||
volume_type = values["volume_type"]
|
||||
if volume_type not in ["table", "user", "external"]:
|
||||
raise ValueError("CLICKZETTA_VOLUME_TYPE must be one of: table, user, external")
|
||||
|
||||
if volume_type == "external" and not values.get("volume_name"):
|
||||
raise ValueError("CLICKZETTA_VOLUME_NAME is required for external volume type")
|
||||
|
||||
return values
|
||||
|
||||
|
||||
class ClickZettaVolumeStorage(BaseStorage):
|
||||
"""ClickZetta Volume storage implementation."""
|
||||
|
||||
def __init__(self, config: ClickZettaVolumeConfig):
|
||||
"""Initialize ClickZetta Volume storage.
|
||||
|
||||
Args:
|
||||
config: ClickZetta Volume configuration
|
||||
"""
|
||||
self._config = config
|
||||
self._connection = None
|
||||
self._permission_manager: VolumePermissionManager | None = None
|
||||
self._init_connection()
|
||||
self._init_permission_manager()
|
||||
|
||||
logger.info("ClickZetta Volume storage initialized with type: %s", config.volume_type)
|
||||
|
||||
def _init_connection(self):
|
||||
"""Initialize ClickZetta connection."""
|
||||
try:
|
||||
self._connection = clickzetta.connect(
|
||||
username=self._config.username,
|
||||
password=self._config.password,
|
||||
instance=self._config.instance,
|
||||
service=self._config.service,
|
||||
workspace=self._config.workspace,
|
||||
vcluster=self._config.vcluster,
|
||||
schema=self._config.schema_name,
|
||||
)
|
||||
logger.debug("ClickZetta connection established")
|
||||
except Exception as e:
|
||||
logger.exception("Failed to connect to ClickZetta")
|
||||
raise
|
||||
|
||||
def _init_permission_manager(self):
|
||||
"""Initialize permission manager."""
|
||||
try:
|
||||
self._permission_manager = VolumePermissionManager(
|
||||
self._connection, self._config.volume_type, self._config.volume_name
|
||||
)
|
||||
logger.debug("Permission manager initialized")
|
||||
except Exception as e:
|
||||
logger.exception("Failed to initialize permission manager")
|
||||
raise
|
||||
|
||||
def _get_volume_path(self, filename: str, dataset_id: Optional[str] = None) -> str:
|
||||
"""Get the appropriate volume path based on volume type."""
|
||||
if self._config.volume_type == "user":
|
||||
# Add dify prefix for User Volume to organize files
|
||||
return f"{self._config.dify_prefix}/{filename}"
|
||||
elif self._config.volume_type == "table":
|
||||
# Check if this should use User Volume (special directories)
|
||||
if dataset_id in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
|
||||
# Use User Volume with dify prefix for special directories
|
||||
return f"{self._config.dify_prefix}/{filename}"
|
||||
|
||||
if dataset_id:
|
||||
return f"{self._config.table_prefix}{dataset_id}/{filename}"
|
||||
else:
|
||||
# Extract dataset_id from filename if not provided
|
||||
# Format: dataset_id/filename
|
||||
if "/" in filename:
|
||||
return filename
|
||||
else:
|
||||
raise ValueError("dataset_id is required for table volume or filename must include dataset_id/")
|
||||
elif self._config.volume_type == "external":
|
||||
return filename
|
||||
else:
|
||||
raise ValueError(f"Unsupported volume type: {self._config.volume_type}")
|
||||
|
||||
def _get_volume_sql_prefix(self, dataset_id: Optional[str] = None) -> str:
|
||||
"""Get SQL prefix for volume operations."""
|
||||
if self._config.volume_type == "user":
|
||||
return "USER VOLUME"
|
||||
elif self._config.volume_type == "table":
|
||||
# For Dify's current file storage pattern, most files are stored in
|
||||
# paths like "upload_files/tenant_id/uuid.ext", "tools/tenant_id/uuid.ext"
|
||||
# These should use USER VOLUME for better compatibility
|
||||
if dataset_id in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
|
||||
return "USER VOLUME"
|
||||
|
||||
# Only use TABLE VOLUME for actual dataset-specific paths
|
||||
# like "dataset_12345/file.pdf" or paths with dataset_ prefix
|
||||
if dataset_id:
|
||||
table_name = f"{self._config.table_prefix}{dataset_id}"
|
||||
else:
|
||||
# Default table name for generic operations
|
||||
table_name = "default_dataset"
|
||||
return f"TABLE VOLUME {table_name}"
|
||||
elif self._config.volume_type == "external":
|
||||
return f"VOLUME {self._config.volume_name}"
|
||||
else:
|
||||
raise ValueError(f"Unsupported volume type: {self._config.volume_type}")
|
||||
|
||||
def _execute_sql(self, sql: str, fetch: bool = False):
|
||||
"""Execute SQL command."""
|
||||
try:
|
||||
if self._connection is None:
|
||||
raise RuntimeError("Connection not initialized")
|
||||
with self._connection.cursor() as cursor:
|
||||
cursor.execute(sql)
|
||||
if fetch:
|
||||
return cursor.fetchall()
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.exception("SQL execution failed: %s", sql)
|
||||
raise
|
||||
|
||||
def _ensure_table_volume_exists(self, dataset_id: str) -> None:
|
||||
"""Ensure table volume exists for the given dataset_id."""
|
||||
if self._config.volume_type != "table" or not dataset_id:
|
||||
return
|
||||
|
||||
# Skip for upload_files and other special directories that use USER VOLUME
|
||||
if dataset_id in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
|
||||
return
|
||||
|
||||
table_name = f"{self._config.table_prefix}{dataset_id}"
|
||||
|
||||
try:
|
||||
# Check if table exists
|
||||
check_sql = f"SHOW TABLES LIKE '{table_name}'"
|
||||
result = self._execute_sql(check_sql, fetch=True)
|
||||
|
||||
if not result:
|
||||
# Create table with volume
|
||||
create_sql = f"""
|
||||
CREATE TABLE {table_name} (
|
||||
id INT PRIMARY KEY AUTO_INCREMENT,
|
||||
filename VARCHAR(255) NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
INDEX idx_filename (filename)
|
||||
) WITH VOLUME
|
||||
"""
|
||||
self._execute_sql(create_sql)
|
||||
logger.info("Created table volume: %s", table_name)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Failed to create table volume %s: %s", table_name, e)
|
||||
# Don't raise exception, let the operation continue
|
||||
# The table might exist but not be visible due to permissions
|
||||
|
||||
def save(self, filename: str, data: bytes) -> None:
|
||||
"""Save data to ClickZetta Volume.
|
||||
|
||||
Args:
|
||||
filename: File path in volume
|
||||
data: File content as bytes
|
||||
"""
|
||||
# Extract dataset_id from filename if present
|
||||
dataset_id = None
|
||||
if "/" in filename and self._config.volume_type == "table":
|
||||
parts = filename.split("/", 1)
|
||||
if parts[0].startswith(self._config.table_prefix):
|
||||
dataset_id = parts[0][len(self._config.table_prefix) :]
|
||||
filename = parts[1]
|
||||
else:
|
||||
dataset_id = parts[0]
|
||||
filename = parts[1]
|
||||
|
||||
# Ensure table volume exists (for table volumes)
|
||||
if dataset_id:
|
||||
self._ensure_table_volume_exists(dataset_id)
|
||||
|
||||
# Check permissions (if enabled)
|
||||
if self._config.permission_check:
|
||||
# Skip permission check for special directories that use USER VOLUME
|
||||
if dataset_id not in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
|
||||
if self._permission_manager is not None:
|
||||
check_volume_permission(self._permission_manager, "save", dataset_id)
|
||||
|
||||
# Write data to temporary file
|
||||
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
||||
temp_file.write(data)
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
try:
|
||||
# Upload to volume
|
||||
volume_prefix = self._get_volume_sql_prefix(dataset_id)
|
||||
|
||||
# Get the actual volume path (may include dify_km prefix)
|
||||
volume_path = self._get_volume_path(filename, dataset_id)
|
||||
actual_filename = volume_path.split("/")[-1] if "/" in volume_path else volume_path
|
||||
|
||||
# For User Volume, use the full path with dify_km prefix
|
||||
if volume_prefix == "USER VOLUME":
|
||||
sql = f"PUT '{temp_file_path}' TO {volume_prefix} FILE '{volume_path}'"
|
||||
else:
|
||||
sql = f"PUT '{temp_file_path}' TO {volume_prefix} FILE '{filename}'"
|
||||
|
||||
self._execute_sql(sql)
|
||||
logger.debug("File %s saved to ClickZetta Volume at path %s", filename, volume_path)
|
||||
finally:
|
||||
# Clean up temporary file
|
||||
Path(temp_file_path).unlink(missing_ok=True)
|
||||
|
||||
def load_once(self, filename: str) -> bytes:
|
||||
"""Load file content from ClickZetta Volume.
|
||||
|
||||
Args:
|
||||
filename: File path in volume
|
||||
|
||||
Returns:
|
||||
File content as bytes
|
||||
"""
|
||||
# Extract dataset_id from filename if present
|
||||
dataset_id = None
|
||||
if "/" in filename and self._config.volume_type == "table":
|
||||
parts = filename.split("/", 1)
|
||||
if parts[0].startswith(self._config.table_prefix):
|
||||
dataset_id = parts[0][len(self._config.table_prefix) :]
|
||||
filename = parts[1]
|
||||
else:
|
||||
dataset_id = parts[0]
|
||||
filename = parts[1]
|
||||
|
||||
# Check permissions (if enabled)
|
||||
if self._config.permission_check:
|
||||
# Skip permission check for special directories that use USER VOLUME
|
||||
if dataset_id not in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
|
||||
if self._permission_manager is not None:
|
||||
check_volume_permission(self._permission_manager, "load_once", dataset_id)
|
||||
|
||||
# Download to temporary directory
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
volume_prefix = self._get_volume_sql_prefix(dataset_id)
|
||||
|
||||
# Get the actual volume path (may include dify_km prefix)
|
||||
volume_path = self._get_volume_path(filename, dataset_id)
|
||||
|
||||
# For User Volume, use the full path with dify_km prefix
|
||||
if volume_prefix == "USER VOLUME":
|
||||
sql = f"GET {volume_prefix} FILE '{volume_path}' TO '{temp_dir}'"
|
||||
else:
|
||||
sql = f"GET {volume_prefix} FILE '{filename}' TO '{temp_dir}'"
|
||||
|
||||
self._execute_sql(sql)
|
||||
|
||||
# Find the downloaded file (may be in subdirectories)
|
||||
downloaded_file = None
|
||||
for root, dirs, files in os.walk(temp_dir):
|
||||
for file in files:
|
||||
if file == filename or file == os.path.basename(filename):
|
||||
downloaded_file = Path(root) / file
|
||||
break
|
||||
if downloaded_file:
|
||||
break
|
||||
|
||||
if not downloaded_file or not downloaded_file.exists():
|
||||
raise FileNotFoundError(f"Downloaded file not found: {filename}")
|
||||
|
||||
content = downloaded_file.read_bytes()
|
||||
|
||||
logger.debug("File %s loaded from ClickZetta Volume", filename)
|
||||
return content
|
||||
|
||||
def load_stream(self, filename: str) -> Generator:
|
||||
"""Load file as stream from ClickZetta Volume.
|
||||
|
||||
Args:
|
||||
filename: File path in volume
|
||||
|
||||
Yields:
|
||||
File content chunks
|
||||
"""
|
||||
content = self.load_once(filename)
|
||||
batch_size = 4096
|
||||
stream = BytesIO(content)
|
||||
|
||||
while chunk := stream.read(batch_size):
|
||||
yield chunk
|
||||
|
||||
logger.debug("File %s loaded as stream from ClickZetta Volume", filename)
|
||||
|
||||
def download(self, filename: str, target_filepath: str):
|
||||
"""Download file from ClickZetta Volume to local path.
|
||||
|
||||
Args:
|
||||
filename: File path in volume
|
||||
target_filepath: Local target file path
|
||||
"""
|
||||
content = self.load_once(filename)
|
||||
|
||||
with Path(target_filepath).open("wb") as f:
|
||||
f.write(content)
|
||||
|
||||
logger.debug("File %s downloaded from ClickZetta Volume to %s", filename, target_filepath)
|
||||
|
||||
def exists(self, filename: str) -> bool:
|
||||
"""Check if file exists in ClickZetta Volume.
|
||||
|
||||
Args:
|
||||
filename: File path in volume
|
||||
|
||||
Returns:
|
||||
True if file exists, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Extract dataset_id from filename if present
|
||||
dataset_id = None
|
||||
if "/" in filename and self._config.volume_type == "table":
|
||||
parts = filename.split("/", 1)
|
||||
if parts[0].startswith(self._config.table_prefix):
|
||||
dataset_id = parts[0][len(self._config.table_prefix) :]
|
||||
filename = parts[1]
|
||||
else:
|
||||
dataset_id = parts[0]
|
||||
filename = parts[1]
|
||||
|
||||
volume_prefix = self._get_volume_sql_prefix(dataset_id)
|
||||
|
||||
# Get the actual volume path (may include dify_km prefix)
|
||||
volume_path = self._get_volume_path(filename, dataset_id)
|
||||
|
||||
# For User Volume, use the full path with dify_km prefix
|
||||
if volume_prefix == "USER VOLUME":
|
||||
sql = f"LIST {volume_prefix} REGEXP = '^{volume_path}$'"
|
||||
else:
|
||||
sql = f"LIST {volume_prefix} REGEXP = '^{filename}$'"
|
||||
|
||||
rows = self._execute_sql(sql, fetch=True)
|
||||
|
||||
exists = len(rows) > 0
|
||||
logger.debug("File %s exists check: %s", filename, exists)
|
||||
return exists
|
||||
except Exception as e:
|
||||
logger.warning("Error checking file existence for %s: %s", filename, e)
|
||||
return False
|
||||
|
||||
def delete(self, filename: str):
|
||||
"""Delete file from ClickZetta Volume.
|
||||
|
||||
Args:
|
||||
filename: File path in volume
|
||||
"""
|
||||
if not self.exists(filename):
|
||||
logger.debug("File %s not found, skip delete", filename)
|
||||
return
|
||||
|
||||
# Extract dataset_id from filename if present
|
||||
dataset_id = None
|
||||
if "/" in filename and self._config.volume_type == "table":
|
||||
parts = filename.split("/", 1)
|
||||
if parts[0].startswith(self._config.table_prefix):
|
||||
dataset_id = parts[0][len(self._config.table_prefix) :]
|
||||
filename = parts[1]
|
||||
else:
|
||||
dataset_id = parts[0]
|
||||
filename = parts[1]
|
||||
|
||||
volume_prefix = self._get_volume_sql_prefix(dataset_id)
|
||||
|
||||
# Get the actual volume path (may include dify_km prefix)
|
||||
volume_path = self._get_volume_path(filename, dataset_id)
|
||||
|
||||
# For User Volume, use the full path with dify_km prefix
|
||||
if volume_prefix == "USER VOLUME":
|
||||
sql = f"REMOVE {volume_prefix} FILE '{volume_path}'"
|
||||
else:
|
||||
sql = f"REMOVE {volume_prefix} FILE '{filename}'"
|
||||
|
||||
self._execute_sql(sql)
|
||||
|
||||
logger.debug("File %s deleted from ClickZetta Volume", filename)
|
||||
|
||||
def scan(self, path: str, files: bool = True, directories: bool = False) -> list[str]:
|
||||
"""Scan files and directories in ClickZetta Volume.
|
||||
|
||||
Args:
|
||||
path: Path to scan (dataset_id for table volumes)
|
||||
files: Include files in results
|
||||
directories: Include directories in results
|
||||
|
||||
Returns:
|
||||
List of file/directory paths
|
||||
"""
|
||||
try:
|
||||
# For table volumes, path is treated as dataset_id
|
||||
dataset_id = None
|
||||
if self._config.volume_type == "table":
|
||||
dataset_id = path
|
||||
path = "" # Root of the table volume
|
||||
|
||||
volume_prefix = self._get_volume_sql_prefix(dataset_id)
|
||||
|
||||
# For User Volume, add dify prefix to path
|
||||
if volume_prefix == "USER VOLUME":
|
||||
if path:
|
||||
scan_path = f"{self._config.dify_prefix}/{path}"
|
||||
sql = f"LIST {volume_prefix} SUBDIRECTORY '{scan_path}'"
|
||||
else:
|
||||
sql = f"LIST {volume_prefix} SUBDIRECTORY '{self._config.dify_prefix}'"
|
||||
else:
|
||||
if path:
|
||||
sql = f"LIST {volume_prefix} SUBDIRECTORY '{path}'"
|
||||
else:
|
||||
sql = f"LIST {volume_prefix}"
|
||||
|
||||
rows = self._execute_sql(sql, fetch=True)
|
||||
|
||||
result = []
|
||||
for row in rows:
|
||||
file_path = row[0] # relative_path column
|
||||
|
||||
# For User Volume, remove dify prefix from results
|
||||
dify_prefix_with_slash = f"{self._config.dify_prefix}/"
|
||||
if volume_prefix == "USER VOLUME" and file_path.startswith(dify_prefix_with_slash):
|
||||
file_path = file_path[len(dify_prefix_with_slash) :] # Remove prefix
|
||||
|
||||
if files and not file_path.endswith("/") or directories and file_path.endswith("/"):
|
||||
result.append(file_path)
|
||||
|
||||
logger.debug("Scanned %d items in path %s", len(result), path)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Error scanning path %s", path)
|
||||
return []
|
||||
|
|
@ -0,0 +1,516 @@
|
|||
"""ClickZetta Volume文件生命周期管理
|
||||
|
||||
该模块提供文件版本控制、自动清理、备份和恢复等生命周期管理功能。
|
||||
支持知识库文件的完整生命周期管理。
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import asdict, dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from enum import Enum
|
||||
from typing import Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FileStatus(Enum):
|
||||
"""文件状态枚举"""
|
||||
|
||||
ACTIVE = "active" # 活跃状态
|
||||
ARCHIVED = "archived" # 已归档
|
||||
DELETED = "deleted" # 已删除(软删除)
|
||||
BACKUP = "backup" # 备份文件
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileMetadata:
|
||||
"""文件元数据"""
|
||||
|
||||
filename: str
|
||||
size: int | None
|
||||
created_at: datetime
|
||||
modified_at: datetime
|
||||
version: int | None
|
||||
status: FileStatus
|
||||
checksum: Optional[str] = None
|
||||
tags: Optional[dict[str, str]] = None
|
||||
parent_version: Optional[int] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""转换为字典格式"""
|
||||
data = asdict(self)
|
||||
data["created_at"] = self.created_at.isoformat()
|
||||
data["modified_at"] = self.modified_at.isoformat()
|
||||
data["status"] = self.status.value
|
||||
return data
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "FileMetadata":
|
||||
"""从字典创建实例"""
|
||||
data = data.copy()
|
||||
data["created_at"] = datetime.fromisoformat(data["created_at"])
|
||||
data["modified_at"] = datetime.fromisoformat(data["modified_at"])
|
||||
data["status"] = FileStatus(data["status"])
|
||||
return cls(**data)
|
||||
|
||||
|
||||
class FileLifecycleManager:
|
||||
"""文件生命周期管理器"""
|
||||
|
||||
def __init__(self, storage, dataset_id: Optional[str] = None):
|
||||
"""初始化生命周期管理器
|
||||
|
||||
Args:
|
||||
storage: ClickZetta Volume存储实例
|
||||
dataset_id: 数据集ID(用于Table Volume)
|
||||
"""
|
||||
self._storage = storage
|
||||
self._dataset_id = dataset_id
|
||||
self._metadata_file = ".dify_file_metadata.json"
|
||||
self._version_prefix = ".versions/"
|
||||
self._backup_prefix = ".backups/"
|
||||
self._deleted_prefix = ".deleted/"
|
||||
|
||||
# 获取权限管理器(如果存在)
|
||||
self._permission_manager: Optional[Any] = getattr(storage, "_permission_manager", None)
|
||||
|
||||
def save_with_lifecycle(self, filename: str, data: bytes, tags: Optional[dict[str, str]] = None) -> FileMetadata:
|
||||
"""保存文件并管理生命周期
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
data: 文件内容
|
||||
tags: 文件标签
|
||||
|
||||
Returns:
|
||||
文件元数据
|
||||
"""
|
||||
# 权限检查
|
||||
if not self._check_permission(filename, "save"):
|
||||
from .volume_permissions import VolumePermissionError
|
||||
|
||||
raise VolumePermissionError(
|
||||
f"Permission denied for lifecycle save operation on file: {filename}",
|
||||
operation="save",
|
||||
volume_type=getattr(self._storage, "_config", {}).get("volume_type", "unknown"),
|
||||
dataset_id=self._dataset_id,
|
||||
)
|
||||
|
||||
try:
|
||||
# 1. 检查是否存在旧版本
|
||||
metadata_dict = self._load_metadata()
|
||||
current_metadata = metadata_dict.get(filename)
|
||||
|
||||
# 2. 如果存在旧版本,创建版本备份
|
||||
if current_metadata:
|
||||
self._create_version_backup(filename, current_metadata)
|
||||
|
||||
# 3. 计算文件信息
|
||||
now = datetime.now()
|
||||
checksum = self._calculate_checksum(data)
|
||||
new_version = (current_metadata["version"] + 1) if current_metadata else 1
|
||||
|
||||
# 4. 保存新文件
|
||||
self._storage.save(filename, data)
|
||||
|
||||
# 5. 创建元数据
|
||||
created_at = now
|
||||
parent_version = None
|
||||
|
||||
if current_metadata:
|
||||
# 如果created_at是字符串,转换为datetime
|
||||
if isinstance(current_metadata["created_at"], str):
|
||||
created_at = datetime.fromisoformat(current_metadata["created_at"])
|
||||
else:
|
||||
created_at = current_metadata["created_at"]
|
||||
parent_version = current_metadata["version"]
|
||||
|
||||
file_metadata = FileMetadata(
|
||||
filename=filename,
|
||||
size=len(data),
|
||||
created_at=created_at,
|
||||
modified_at=now,
|
||||
version=new_version,
|
||||
status=FileStatus.ACTIVE,
|
||||
checksum=checksum,
|
||||
tags=tags or {},
|
||||
parent_version=parent_version,
|
||||
)
|
||||
|
||||
# 6. 更新元数据
|
||||
metadata_dict[filename] = file_metadata.to_dict()
|
||||
self._save_metadata(metadata_dict)
|
||||
|
||||
logger.info("File %s saved with lifecycle management, version %s", filename, new_version)
|
||||
return file_metadata
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to save file with lifecycle")
|
||||
raise
|
||||
|
||||
def get_file_metadata(self, filename: str) -> Optional[FileMetadata]:
|
||||
"""获取文件元数据
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
|
||||
Returns:
|
||||
文件元数据,如果不存在返回None
|
||||
"""
|
||||
try:
|
||||
metadata_dict = self._load_metadata()
|
||||
if filename in metadata_dict:
|
||||
return FileMetadata.from_dict(metadata_dict[filename])
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.exception("Failed to get file metadata for %s", filename)
|
||||
return None
|
||||
|
||||
def list_file_versions(self, filename: str) -> list[FileMetadata]:
|
||||
"""列出文件的所有版本
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
|
||||
Returns:
|
||||
文件版本列表,按版本号排序
|
||||
"""
|
||||
try:
|
||||
versions = []
|
||||
|
||||
# 获取当前版本
|
||||
current_metadata = self.get_file_metadata(filename)
|
||||
if current_metadata:
|
||||
versions.append(current_metadata)
|
||||
|
||||
# 获取历史版本
|
||||
version_pattern = f"{self._version_prefix}{filename}.v*"
|
||||
try:
|
||||
version_files = self._storage.scan(self._dataset_id or "", files=True)
|
||||
for file_path in version_files:
|
||||
if file_path.startswith(f"{self._version_prefix}{filename}.v"):
|
||||
# 解析版本号
|
||||
version_str = file_path.split(".v")[-1].split(".")[0]
|
||||
try:
|
||||
version_num = int(version_str)
|
||||
# 这里简化处理,实际应该从版本文件中读取元数据
|
||||
# 暂时创建基本的元数据信息
|
||||
except ValueError:
|
||||
continue
|
||||
except:
|
||||
# 如果无法扫描版本文件,只返回当前版本
|
||||
pass
|
||||
|
||||
return sorted(versions, key=lambda x: x.version or 0, reverse=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to list file versions for %s", filename)
|
||||
return []
|
||||
|
||||
def restore_version(self, filename: str, version: int) -> bool:
|
||||
"""恢复文件到指定版本
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
version: 要恢复的版本号
|
||||
|
||||
Returns:
|
||||
恢复是否成功
|
||||
"""
|
||||
try:
|
||||
version_filename = f"{self._version_prefix}{filename}.v{version}"
|
||||
|
||||
# 检查版本文件是否存在
|
||||
if not self._storage.exists(version_filename):
|
||||
logger.warning("Version %s of %s not found", version, filename)
|
||||
return False
|
||||
|
||||
# 读取版本文件内容
|
||||
version_data = self._storage.load_once(version_filename)
|
||||
|
||||
# 保存当前版本为备份
|
||||
current_metadata = self.get_file_metadata(filename)
|
||||
if current_metadata:
|
||||
self._create_version_backup(filename, current_metadata.to_dict())
|
||||
|
||||
# 恢复文件
|
||||
self.save_with_lifecycle(filename, version_data, {"restored_from": str(version)})
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to restore %s to version %s", filename, version)
|
||||
return False
|
||||
|
||||
def archive_file(self, filename: str) -> bool:
|
||||
"""归档文件
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
|
||||
Returns:
|
||||
归档是否成功
|
||||
"""
|
||||
# 权限检查
|
||||
if not self._check_permission(filename, "archive"):
|
||||
logger.warning("Permission denied for archive operation on file: %s", filename)
|
||||
return False
|
||||
|
||||
try:
|
||||
# 更新文件状态为归档
|
||||
metadata_dict = self._load_metadata()
|
||||
if filename not in metadata_dict:
|
||||
logger.warning("File %s not found in metadata", filename)
|
||||
return False
|
||||
|
||||
metadata_dict[filename]["status"] = FileStatus.ARCHIVED.value
|
||||
metadata_dict[filename]["modified_at"] = datetime.now().isoformat()
|
||||
|
||||
self._save_metadata(metadata_dict)
|
||||
|
||||
logger.info("File %s archived successfully", filename)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to archive file %s", filename)
|
||||
return False
|
||||
|
||||
def soft_delete_file(self, filename: str) -> bool:
|
||||
"""软删除文件(移动到删除目录)
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
|
||||
Returns:
|
||||
删除是否成功
|
||||
"""
|
||||
# 权限检查
|
||||
if not self._check_permission(filename, "delete"):
|
||||
logger.warning("Permission denied for soft delete operation on file: %s", filename)
|
||||
return False
|
||||
|
||||
try:
|
||||
# 检查文件是否存在
|
||||
if not self._storage.exists(filename):
|
||||
logger.warning("File %s not found", filename)
|
||||
return False
|
||||
|
||||
# 读取文件内容
|
||||
file_data = self._storage.load_once(filename)
|
||||
|
||||
# 移动到删除目录
|
||||
deleted_filename = f"{self._deleted_prefix}{filename}.{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
self._storage.save(deleted_filename, file_data)
|
||||
|
||||
# 删除原文件
|
||||
self._storage.delete(filename)
|
||||
|
||||
# 更新元数据
|
||||
metadata_dict = self._load_metadata()
|
||||
if filename in metadata_dict:
|
||||
metadata_dict[filename]["status"] = FileStatus.DELETED.value
|
||||
metadata_dict[filename]["modified_at"] = datetime.now().isoformat()
|
||||
self._save_metadata(metadata_dict)
|
||||
|
||||
logger.info("File %s soft deleted successfully", filename)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to soft delete file %s", filename)
|
||||
return False
|
||||
|
||||
def cleanup_old_versions(self, max_versions: int = 5, max_age_days: int = 30) -> int:
|
||||
"""清理旧版本文件
|
||||
|
||||
Args:
|
||||
max_versions: 保留的最大版本数
|
||||
max_age_days: 版本文件的最大保留天数
|
||||
|
||||
Returns:
|
||||
清理的文件数量
|
||||
"""
|
||||
try:
|
||||
cleaned_count = 0
|
||||
cutoff_date = datetime.now() - timedelta(days=max_age_days)
|
||||
|
||||
# 获取所有版本文件
|
||||
try:
|
||||
all_files = self._storage.scan(self._dataset_id or "", files=True)
|
||||
version_files = [f for f in all_files if f.startswith(self._version_prefix)]
|
||||
|
||||
# 按文件分组
|
||||
file_versions: dict[str, list[tuple[int, str]]] = {}
|
||||
for version_file in version_files:
|
||||
# 解析文件名和版本
|
||||
parts = version_file[len(self._version_prefix) :].split(".v")
|
||||
if len(parts) >= 2:
|
||||
base_filename = parts[0]
|
||||
version_part = parts[1].split(".")[0]
|
||||
try:
|
||||
version_num = int(version_part)
|
||||
if base_filename not in file_versions:
|
||||
file_versions[base_filename] = []
|
||||
file_versions[base_filename].append((version_num, version_file))
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# 清理每个文件的旧版本
|
||||
for base_filename, versions in file_versions.items():
|
||||
# 按版本号排序
|
||||
versions.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
# 保留最新的max_versions个版本,删除其余的
|
||||
if len(versions) > max_versions:
|
||||
to_delete = versions[max_versions:]
|
||||
for version_num, version_file in to_delete:
|
||||
self._storage.delete(version_file)
|
||||
cleaned_count += 1
|
||||
logger.debug("Cleaned old version: %s", version_file)
|
||||
|
||||
logger.info("Cleaned %d old version files", cleaned_count)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Could not scan for version files: %s", e)
|
||||
|
||||
return cleaned_count
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to cleanup old versions")
|
||||
return 0
|
||||
|
||||
def get_storage_statistics(self) -> dict[str, Any]:
|
||||
"""获取存储统计信息
|
||||
|
||||
Returns:
|
||||
存储统计字典
|
||||
"""
|
||||
try:
|
||||
metadata_dict = self._load_metadata()
|
||||
|
||||
stats: dict[str, Any] = {
|
||||
"total_files": len(metadata_dict),
|
||||
"active_files": 0,
|
||||
"archived_files": 0,
|
||||
"deleted_files": 0,
|
||||
"total_size": 0,
|
||||
"versions_count": 0,
|
||||
"oldest_file": None,
|
||||
"newest_file": None,
|
||||
}
|
||||
|
||||
oldest_date = None
|
||||
newest_date = None
|
||||
|
||||
for filename, metadata in metadata_dict.items():
|
||||
file_meta = FileMetadata.from_dict(metadata)
|
||||
|
||||
# 统计文件状态
|
||||
if file_meta.status == FileStatus.ACTIVE:
|
||||
stats["active_files"] = (stats["active_files"] or 0) + 1
|
||||
elif file_meta.status == FileStatus.ARCHIVED:
|
||||
stats["archived_files"] = (stats["archived_files"] or 0) + 1
|
||||
elif file_meta.status == FileStatus.DELETED:
|
||||
stats["deleted_files"] = (stats["deleted_files"] or 0) + 1
|
||||
|
||||
# 统计大小
|
||||
stats["total_size"] = (stats["total_size"] or 0) + (file_meta.size or 0)
|
||||
|
||||
# 统计版本
|
||||
stats["versions_count"] = (stats["versions_count"] or 0) + (file_meta.version or 0)
|
||||
|
||||
# 找出最新和最旧的文件
|
||||
if oldest_date is None or file_meta.created_at < oldest_date:
|
||||
oldest_date = file_meta.created_at
|
||||
stats["oldest_file"] = filename
|
||||
|
||||
if newest_date is None or file_meta.modified_at > newest_date:
|
||||
newest_date = file_meta.modified_at
|
||||
stats["newest_file"] = filename
|
||||
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to get storage statistics")
|
||||
return {}
|
||||
|
||||
def _create_version_backup(self, filename: str, metadata: dict):
|
||||
"""创建版本备份"""
|
||||
try:
|
||||
# 读取当前文件内容
|
||||
current_data = self._storage.load_once(filename)
|
||||
|
||||
# 保存为版本文件
|
||||
version_filename = f"{self._version_prefix}{filename}.v{metadata['version']}"
|
||||
self._storage.save(version_filename, current_data)
|
||||
|
||||
logger.debug("Created version backup: %s", version_filename)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Failed to create version backup for %s: %s", filename, e)
|
||||
|
||||
def _load_metadata(self) -> dict[str, Any]:
|
||||
"""加载元数据文件"""
|
||||
try:
|
||||
if self._storage.exists(self._metadata_file):
|
||||
metadata_content = self._storage.load_once(self._metadata_file)
|
||||
result = json.loads(metadata_content.decode("utf-8"))
|
||||
return dict(result) if result else {}
|
||||
else:
|
||||
return {}
|
||||
except Exception as e:
|
||||
logger.warning("Failed to load metadata: %s", e)
|
||||
return {}
|
||||
|
||||
def _save_metadata(self, metadata_dict: dict):
|
||||
"""保存元数据文件"""
|
||||
try:
|
||||
metadata_content = json.dumps(metadata_dict, indent=2, ensure_ascii=False)
|
||||
self._storage.save(self._metadata_file, metadata_content.encode("utf-8"))
|
||||
logger.debug("Metadata saved successfully")
|
||||
except Exception as e:
|
||||
logger.exception("Failed to save metadata")
|
||||
raise
|
||||
|
||||
def _calculate_checksum(self, data: bytes) -> str:
|
||||
"""计算文件校验和"""
|
||||
import hashlib
|
||||
|
||||
return hashlib.md5(data).hexdigest()
|
||||
|
||||
def _check_permission(self, filename: str, operation: str) -> bool:
|
||||
"""检查文件操作权限
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
operation: 操作类型
|
||||
|
||||
Returns:
|
||||
True if permission granted, False otherwise
|
||||
"""
|
||||
# 如果没有权限管理器,默认允许
|
||||
if not self._permission_manager:
|
||||
return True
|
||||
|
||||
try:
|
||||
# 根据操作类型映射到权限
|
||||
operation_mapping = {
|
||||
"save": "save",
|
||||
"load": "load_once",
|
||||
"delete": "delete",
|
||||
"archive": "delete", # 归档需要删除权限
|
||||
"restore": "save", # 恢复需要写权限
|
||||
"cleanup": "delete", # 清理需要删除权限
|
||||
"read": "load_once",
|
||||
"write": "save",
|
||||
}
|
||||
|
||||
mapped_operation = operation_mapping.get(operation, operation)
|
||||
|
||||
# 检查权限
|
||||
result = self._permission_manager.validate_operation(mapped_operation, self._dataset_id)
|
||||
return bool(result)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Permission check failed for %s operation %s", filename, operation)
|
||||
# 安全默认:权限检查失败时拒绝访问
|
||||
return False
|
||||
|
|
@ -0,0 +1,646 @@
|
|||
"""ClickZetta Volume权限管理机制
|
||||
|
||||
该模块提供Volume权限检查、验证和管理功能。
|
||||
根据ClickZetta的权限模型,不同Volume类型有不同的权限要求。
|
||||
"""
|
||||
|
||||
import logging
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VolumePermission(Enum):
|
||||
"""Volume权限类型枚举"""
|
||||
|
||||
READ = "SELECT" # 对应ClickZetta的SELECT权限
|
||||
WRITE = "INSERT,UPDATE,DELETE" # 对应ClickZetta的写权限
|
||||
LIST = "SELECT" # 列出文件需要SELECT权限
|
||||
DELETE = "INSERT,UPDATE,DELETE" # 删除文件需要写权限
|
||||
USAGE = "USAGE" # External Volume需要的基本权限
|
||||
|
||||
|
||||
class VolumePermissionManager:
|
||||
"""Volume权限管理器"""
|
||||
|
||||
def __init__(self, connection_or_config, volume_type: str | None = None, volume_name: Optional[str] = None):
|
||||
"""初始化权限管理器
|
||||
|
||||
Args:
|
||||
connection_or_config: ClickZetta连接对象或配置字典
|
||||
volume_type: Volume类型 (user|table|external)
|
||||
volume_name: Volume名称 (用于external volume)
|
||||
"""
|
||||
# 支持两种初始化方式:连接对象或配置字典
|
||||
if isinstance(connection_or_config, dict):
|
||||
# 从配置字典创建连接
|
||||
import clickzetta # type: ignore[import-untyped]
|
||||
|
||||
config = connection_or_config
|
||||
self._connection = clickzetta.connect(
|
||||
username=config.get("username"),
|
||||
password=config.get("password"),
|
||||
instance=config.get("instance"),
|
||||
service=config.get("service"),
|
||||
workspace=config.get("workspace"),
|
||||
vcluster=config.get("vcluster"),
|
||||
schema=config.get("schema") or config.get("database"),
|
||||
)
|
||||
self._volume_type = config.get("volume_type", volume_type)
|
||||
self._volume_name = config.get("volume_name", volume_name)
|
||||
else:
|
||||
# 直接使用连接对象
|
||||
self._connection = connection_or_config
|
||||
self._volume_type = volume_type
|
||||
self._volume_name = volume_name
|
||||
|
||||
if not self._connection:
|
||||
raise ValueError("Valid connection or config is required")
|
||||
if not self._volume_type:
|
||||
raise ValueError("volume_type is required")
|
||||
|
||||
self._permission_cache: dict[str, set[str]] = {}
|
||||
self._current_username = None # 将从连接中获取当前用户名
|
||||
|
||||
def check_permission(self, operation: VolumePermission, dataset_id: Optional[str] = None) -> bool:
|
||||
"""检查用户是否有执行特定操作的权限
|
||||
|
||||
Args:
|
||||
operation: 要执行的操作类型
|
||||
dataset_id: 数据集ID (用于table volume)
|
||||
|
||||
Returns:
|
||||
True if user has permission, False otherwise
|
||||
"""
|
||||
try:
|
||||
if self._volume_type == "user":
|
||||
return self._check_user_volume_permission(operation)
|
||||
elif self._volume_type == "table":
|
||||
return self._check_table_volume_permission(operation, dataset_id)
|
||||
elif self._volume_type == "external":
|
||||
return self._check_external_volume_permission(operation)
|
||||
else:
|
||||
logger.warning("Unknown volume type: %s", self._volume_type)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Permission check failed")
|
||||
return False
|
||||
|
||||
def _check_user_volume_permission(self, operation: VolumePermission) -> bool:
|
||||
"""检查User Volume权限
|
||||
|
||||
User Volume权限规则:
|
||||
- 用户对自己的User Volume有全部权限
|
||||
- 只要用户能够连接到ClickZetta,就默认具有User Volume的基本权限
|
||||
- 更注重连接身份验证,而不是复杂的权限检查
|
||||
"""
|
||||
try:
|
||||
# 获取当前用户名
|
||||
current_user = self._get_current_username()
|
||||
|
||||
# 检查基本连接状态
|
||||
with self._connection.cursor() as cursor:
|
||||
# 简单的连接测试,如果能执行查询说明用户有基本权限
|
||||
cursor.execute("SELECT 1")
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result:
|
||||
logger.debug(
|
||||
"User Volume permission check for %s, operation %s: granted (basic connection verified)",
|
||||
current_user,
|
||||
operation.name,
|
||||
)
|
||||
return True
|
||||
else:
|
||||
logger.warning(
|
||||
"User Volume permission check failed: cannot verify basic connection for %s", current_user
|
||||
)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("User Volume permission check failed")
|
||||
# 对于User Volume,如果权限检查失败,可能是配置问题,给出更友好的错误提示
|
||||
logger.info("User Volume permission check failed, but permission checking is disabled in this version")
|
||||
return False
|
||||
|
||||
def _check_table_volume_permission(self, operation: VolumePermission, dataset_id: Optional[str]) -> bool:
|
||||
"""检查Table Volume权限
|
||||
|
||||
Table Volume权限规则:
|
||||
- Table Volume权限继承对应表的权限
|
||||
- SELECT权限 -> 可以READ/LIST文件
|
||||
- INSERT,UPDATE,DELETE权限 -> 可以WRITE/DELETE文件
|
||||
"""
|
||||
if not dataset_id:
|
||||
logger.warning("dataset_id is required for table volume permission check")
|
||||
return False
|
||||
|
||||
table_name = f"dataset_{dataset_id}" if not dataset_id.startswith("dataset_") else dataset_id
|
||||
|
||||
try:
|
||||
# 检查表权限
|
||||
permissions = self._get_table_permissions(table_name)
|
||||
required_permissions = set(operation.value.split(","))
|
||||
|
||||
# 检查是否有所需的所有权限
|
||||
has_permission = required_permissions.issubset(permissions)
|
||||
|
||||
logger.debug(
|
||||
"Table Volume permission check for %s, operation %s: required=%s, has=%s, granted=%s",
|
||||
table_name,
|
||||
operation.name,
|
||||
required_permissions,
|
||||
permissions,
|
||||
has_permission,
|
||||
)
|
||||
|
||||
return has_permission
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Table volume permission check failed for %s", table_name)
|
||||
return False
|
||||
|
||||
def _check_external_volume_permission(self, operation: VolumePermission) -> bool:
|
||||
"""检查External Volume权限
|
||||
|
||||
External Volume权限规则:
|
||||
- 尝试获取对External Volume的权限
|
||||
- 如果权限检查失败,进行备选验证
|
||||
- 对于开发环境,提供更宽松的权限检查
|
||||
"""
|
||||
if not self._volume_name:
|
||||
logger.warning("volume_name is required for external volume permission check")
|
||||
return False
|
||||
|
||||
try:
|
||||
# 检查External Volume权限
|
||||
permissions = self._get_external_volume_permissions(self._volume_name)
|
||||
|
||||
# External Volume权限映射:根据操作类型确定所需权限
|
||||
required_permissions = set()
|
||||
|
||||
if operation in [VolumePermission.READ, VolumePermission.LIST]:
|
||||
required_permissions.add("read")
|
||||
elif operation in [VolumePermission.WRITE, VolumePermission.DELETE]:
|
||||
required_permissions.add("write")
|
||||
|
||||
# 检查是否有所需的所有权限
|
||||
has_permission = required_permissions.issubset(permissions)
|
||||
|
||||
logger.debug(
|
||||
"External Volume permission check for %s, operation %s: required=%s, has=%s, granted=%s",
|
||||
self._volume_name,
|
||||
operation.name,
|
||||
required_permissions,
|
||||
permissions,
|
||||
has_permission,
|
||||
)
|
||||
|
||||
# 如果权限检查失败,尝试备选验证
|
||||
if not has_permission:
|
||||
logger.info("Direct permission check failed for %s, trying fallback verification", self._volume_name)
|
||||
|
||||
# 备选验证:尝试列出Volume来验证基本访问权限
|
||||
try:
|
||||
with self._connection.cursor() as cursor:
|
||||
cursor.execute("SHOW VOLUMES")
|
||||
volumes = cursor.fetchall()
|
||||
for volume in volumes:
|
||||
if len(volume) > 0 and volume[0] == self._volume_name:
|
||||
logger.info("Fallback verification successful for %s", self._volume_name)
|
||||
return True
|
||||
except Exception as fallback_e:
|
||||
logger.warning("Fallback verification failed for %s: %s", self._volume_name, fallback_e)
|
||||
|
||||
return has_permission
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("External volume permission check failed for %s", self._volume_name)
|
||||
logger.info("External Volume permission check failed, but permission checking is disabled in this version")
|
||||
return False
|
||||
|
||||
def _get_table_permissions(self, table_name: str) -> set[str]:
|
||||
"""获取用户对指定表的权限
|
||||
|
||||
Args:
|
||||
table_name: 表名
|
||||
|
||||
Returns:
|
||||
用户对该表的权限集合
|
||||
"""
|
||||
cache_key = f"table:{table_name}"
|
||||
|
||||
if cache_key in self._permission_cache:
|
||||
return self._permission_cache[cache_key]
|
||||
|
||||
permissions = set()
|
||||
|
||||
try:
|
||||
with self._connection.cursor() as cursor:
|
||||
# 使用正确的ClickZetta语法检查当前用户权限
|
||||
cursor.execute("SHOW GRANTS")
|
||||
grants = cursor.fetchall()
|
||||
|
||||
# 解析权限结果,查找对该表的权限
|
||||
for grant in grants:
|
||||
if len(grant) >= 3: # 典型格式: (privilege, object_type, object_name, ...)
|
||||
privilege = grant[0].upper()
|
||||
object_type = grant[1].upper() if len(grant) > 1 else ""
|
||||
object_name = grant[2] if len(grant) > 2 else ""
|
||||
|
||||
# 检查是否是对该表的权限
|
||||
if (
|
||||
object_type == "TABLE"
|
||||
and object_name == table_name
|
||||
or object_type == "SCHEMA"
|
||||
and object_name in table_name
|
||||
):
|
||||
if privilege in ["SELECT", "INSERT", "UPDATE", "DELETE", "ALL"]:
|
||||
if privilege == "ALL":
|
||||
permissions.update(["SELECT", "INSERT", "UPDATE", "DELETE"])
|
||||
else:
|
||||
permissions.add(privilege)
|
||||
|
||||
# 如果没有找到明确的权限,尝试执行一个简单的查询来验证权限
|
||||
if not permissions:
|
||||
try:
|
||||
cursor.execute(f"SELECT COUNT(*) FROM {table_name} LIMIT 1")
|
||||
permissions.add("SELECT")
|
||||
except Exception:
|
||||
logger.debug("Cannot query table %s, no SELECT permission", table_name)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Could not check table permissions for %s: %s", table_name, e)
|
||||
# 安全默认:权限检查失败时拒绝访问
|
||||
pass
|
||||
|
||||
# 缓存权限信息
|
||||
self._permission_cache[cache_key] = permissions
|
||||
return permissions
|
||||
|
||||
def _get_current_username(self) -> str:
|
||||
"""获取当前用户名"""
|
||||
if self._current_username:
|
||||
return self._current_username
|
||||
|
||||
try:
|
||||
with self._connection.cursor() as cursor:
|
||||
cursor.execute("SELECT CURRENT_USER()")
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
self._current_username = result[0]
|
||||
return str(self._current_username)
|
||||
except Exception as e:
|
||||
logger.exception("Failed to get current username")
|
||||
|
||||
return "unknown"
|
||||
|
||||
def _get_user_permissions(self, username: str) -> set[str]:
|
||||
"""获取用户的基本权限集合"""
|
||||
cache_key = f"user_permissions:{username}"
|
||||
|
||||
if cache_key in self._permission_cache:
|
||||
return self._permission_cache[cache_key]
|
||||
|
||||
permissions = set()
|
||||
|
||||
try:
|
||||
with self._connection.cursor() as cursor:
|
||||
# 使用正确的ClickZetta语法检查当前用户权限
|
||||
cursor.execute("SHOW GRANTS")
|
||||
grants = cursor.fetchall()
|
||||
|
||||
# 解析权限结果,查找用户的基本权限
|
||||
for grant in grants:
|
||||
if len(grant) >= 3: # 典型格式: (privilege, object_type, object_name, ...)
|
||||
privilege = grant[0].upper()
|
||||
object_type = grant[1].upper() if len(grant) > 1 else ""
|
||||
|
||||
# 收集所有相关权限
|
||||
if privilege in ["SELECT", "INSERT", "UPDATE", "DELETE", "ALL"]:
|
||||
if privilege == "ALL":
|
||||
permissions.update(["SELECT", "INSERT", "UPDATE", "DELETE"])
|
||||
else:
|
||||
permissions.add(privilege)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Could not check user permissions for %s: %s", username, e)
|
||||
# 安全默认:权限检查失败时拒绝访问
|
||||
pass
|
||||
|
||||
# 缓存权限信息
|
||||
self._permission_cache[cache_key] = permissions
|
||||
return permissions
|
||||
|
||||
def _get_external_volume_permissions(self, volume_name: str) -> set[str]:
|
||||
"""获取用户对指定External Volume的权限
|
||||
|
||||
Args:
|
||||
volume_name: External Volume名称
|
||||
|
||||
Returns:
|
||||
用户对该Volume的权限集合
|
||||
"""
|
||||
cache_key = f"external_volume:{volume_name}"
|
||||
|
||||
if cache_key in self._permission_cache:
|
||||
return self._permission_cache[cache_key]
|
||||
|
||||
permissions = set()
|
||||
|
||||
try:
|
||||
with self._connection.cursor() as cursor:
|
||||
# 使用正确的ClickZetta语法检查Volume权限
|
||||
logger.info("Checking permissions for volume: %s", volume_name)
|
||||
cursor.execute(f"SHOW GRANTS ON VOLUME {volume_name}")
|
||||
grants = cursor.fetchall()
|
||||
|
||||
logger.info("Raw grants result for %s: %s", volume_name, grants)
|
||||
|
||||
# 解析权限结果
|
||||
# 格式: (granted_type, privilege, conditions, granted_on, object_name, granted_to,
|
||||
# grantee_name, grantor_name, grant_option, granted_time)
|
||||
for grant in grants:
|
||||
logger.info("Processing grant: %s", grant)
|
||||
if len(grant) >= 5:
|
||||
granted_type = grant[0]
|
||||
privilege = grant[1].upper()
|
||||
granted_on = grant[3]
|
||||
object_name = grant[4]
|
||||
|
||||
logger.info(
|
||||
"Grant details - type: %s, privilege: %s, granted_on: %s, object_name: %s",
|
||||
granted_type,
|
||||
privilege,
|
||||
granted_on,
|
||||
object_name,
|
||||
)
|
||||
|
||||
# 检查是否是对该Volume的权限或者是层级权限
|
||||
if (
|
||||
granted_type == "PRIVILEGE" and granted_on == "VOLUME" and object_name.endswith(volume_name)
|
||||
) or (granted_type == "OBJECT_HIERARCHY" and granted_on == "VOLUME"):
|
||||
logger.info("Matching grant found for %s", volume_name)
|
||||
|
||||
if "READ" in privilege:
|
||||
permissions.add("read")
|
||||
logger.info("Added READ permission for %s", volume_name)
|
||||
if "WRITE" in privilege:
|
||||
permissions.add("write")
|
||||
logger.info("Added WRITE permission for %s", volume_name)
|
||||
if "ALTER" in privilege:
|
||||
permissions.add("alter")
|
||||
logger.info("Added ALTER permission for %s", volume_name)
|
||||
if privilege == "ALL":
|
||||
permissions.update(["read", "write", "alter"])
|
||||
logger.info("Added ALL permissions for %s", volume_name)
|
||||
|
||||
logger.info("Final permissions for %s: %s", volume_name, permissions)
|
||||
|
||||
# 如果没有找到明确的权限,尝试查看Volume列表来验证基本权限
|
||||
if not permissions:
|
||||
try:
|
||||
cursor.execute("SHOW VOLUMES")
|
||||
volumes = cursor.fetchall()
|
||||
for volume in volumes:
|
||||
if len(volume) > 0 and volume[0] == volume_name:
|
||||
permissions.add("read") # 至少有读权限
|
||||
logger.debug("Volume %s found in SHOW VOLUMES, assuming read permission", volume_name)
|
||||
break
|
||||
except Exception:
|
||||
logger.debug("Cannot access volume %s, no basic permission", volume_name)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Could not check external volume permissions for %s: %s", volume_name, e)
|
||||
# 在权限检查失败时,尝试基本的Volume访问验证
|
||||
try:
|
||||
with self._connection.cursor() as cursor:
|
||||
cursor.execute("SHOW VOLUMES")
|
||||
volumes = cursor.fetchall()
|
||||
for volume in volumes:
|
||||
if len(volume) > 0 and volume[0] == volume_name:
|
||||
logger.info("Basic volume access verified for %s", volume_name)
|
||||
permissions.add("read")
|
||||
permissions.add("write") # 假设有写权限
|
||||
break
|
||||
except Exception as basic_e:
|
||||
logger.warning("Basic volume access check failed for %s: %s", volume_name, basic_e)
|
||||
# 最后的备选方案:假设有基本权限
|
||||
permissions.add("read")
|
||||
|
||||
# 缓存权限信息
|
||||
self._permission_cache[cache_key] = permissions
|
||||
return permissions
|
||||
|
||||
def clear_permission_cache(self):
|
||||
"""清空权限缓存"""
|
||||
self._permission_cache.clear()
|
||||
logger.debug("Permission cache cleared")
|
||||
|
||||
def get_permission_summary(self, dataset_id: Optional[str] = None) -> dict[str, bool]:
|
||||
"""获取权限摘要
|
||||
|
||||
Args:
|
||||
dataset_id: 数据集ID (用于table volume)
|
||||
|
||||
Returns:
|
||||
权限摘要字典
|
||||
"""
|
||||
summary = {}
|
||||
|
||||
for operation in VolumePermission:
|
||||
summary[operation.name.lower()] = self.check_permission(operation, dataset_id)
|
||||
|
||||
return summary
|
||||
|
||||
def check_inherited_permission(self, file_path: str, operation: VolumePermission) -> bool:
|
||||
"""检查文件路径的权限继承
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
operation: 要执行的操作
|
||||
|
||||
Returns:
|
||||
True if user has permission, False otherwise
|
||||
"""
|
||||
try:
|
||||
# 解析文件路径
|
||||
path_parts = file_path.strip("/").split("/")
|
||||
|
||||
if not path_parts:
|
||||
logger.warning("Invalid file path for permission inheritance check")
|
||||
return False
|
||||
|
||||
# 对于Table Volume,第一层是dataset_id
|
||||
if self._volume_type == "table":
|
||||
if len(path_parts) < 1:
|
||||
return False
|
||||
|
||||
dataset_id = path_parts[0]
|
||||
|
||||
# 检查对dataset的权限
|
||||
has_dataset_permission = self.check_permission(operation, dataset_id)
|
||||
|
||||
if not has_dataset_permission:
|
||||
logger.debug("Permission denied for dataset %s", dataset_id)
|
||||
return False
|
||||
|
||||
# 检查路径遍历攻击
|
||||
if self._contains_path_traversal(file_path):
|
||||
logger.warning("Path traversal attack detected: %s", file_path)
|
||||
return False
|
||||
|
||||
# 检查是否访问敏感目录
|
||||
if self._is_sensitive_path(file_path):
|
||||
logger.warning("Access to sensitive path denied: %s", file_path)
|
||||
return False
|
||||
|
||||
logger.debug("Permission inherited for path %s", file_path)
|
||||
return True
|
||||
|
||||
elif self._volume_type == "user":
|
||||
# User Volume的权限继承
|
||||
current_user = self._get_current_username()
|
||||
|
||||
# 检查是否试图访问其他用户的目录
|
||||
if len(path_parts) > 1 and path_parts[0] != current_user:
|
||||
logger.warning("User %s attempted to access %s's directory", current_user, path_parts[0])
|
||||
return False
|
||||
|
||||
# 检查基本权限
|
||||
return self.check_permission(operation)
|
||||
|
||||
elif self._volume_type == "external":
|
||||
# External Volume的权限继承
|
||||
# 检查对External Volume的权限
|
||||
return self.check_permission(operation)
|
||||
|
||||
else:
|
||||
logger.warning("Unknown volume type for permission inheritance: %s", self._volume_type)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Permission inheritance check failed")
|
||||
return False
|
||||
|
||||
def _contains_path_traversal(self, file_path: str) -> bool:
|
||||
"""检查路径是否包含路径遍历攻击"""
|
||||
# 检查常见的路径遍历模式
|
||||
traversal_patterns = [
|
||||
"../",
|
||||
"..\\",
|
||||
"..%2f",
|
||||
"..%2F",
|
||||
"..%5c",
|
||||
"..%5C",
|
||||
"%2e%2e%2f",
|
||||
"%2e%2e%5c",
|
||||
"....//",
|
||||
"....\\\\",
|
||||
]
|
||||
|
||||
file_path_lower = file_path.lower()
|
||||
|
||||
for pattern in traversal_patterns:
|
||||
if pattern in file_path_lower:
|
||||
return True
|
||||
|
||||
# 检查绝对路径
|
||||
if file_path.startswith("/") or file_path.startswith("\\"):
|
||||
return True
|
||||
|
||||
# 检查Windows驱动器路径
|
||||
if len(file_path) >= 2 and file_path[1] == ":":
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _is_sensitive_path(self, file_path: str) -> bool:
|
||||
"""检查路径是否为敏感路径"""
|
||||
sensitive_patterns = [
|
||||
"passwd",
|
||||
"shadow",
|
||||
"hosts",
|
||||
"config",
|
||||
"secrets",
|
||||
"private",
|
||||
"key",
|
||||
"certificate",
|
||||
"cert",
|
||||
"ssl",
|
||||
"database",
|
||||
"backup",
|
||||
"dump",
|
||||
"log",
|
||||
"tmp",
|
||||
]
|
||||
|
||||
file_path_lower = file_path.lower()
|
||||
|
||||
return any(pattern in file_path_lower for pattern in sensitive_patterns)
|
||||
|
||||
def validate_operation(self, operation: str, dataset_id: Optional[str] = None) -> bool:
|
||||
"""验证操作权限
|
||||
|
||||
Args:
|
||||
operation: 操作名称 (save|load|exists|delete|scan)
|
||||
dataset_id: 数据集ID
|
||||
|
||||
Returns:
|
||||
True if operation is allowed, False otherwise
|
||||
"""
|
||||
operation_mapping = {
|
||||
"save": VolumePermission.WRITE,
|
||||
"load": VolumePermission.READ,
|
||||
"load_once": VolumePermission.READ,
|
||||
"load_stream": VolumePermission.READ,
|
||||
"download": VolumePermission.READ,
|
||||
"exists": VolumePermission.READ,
|
||||
"delete": VolumePermission.DELETE,
|
||||
"scan": VolumePermission.LIST,
|
||||
}
|
||||
|
||||
if operation not in operation_mapping:
|
||||
logger.warning("Unknown operation: %s", operation)
|
||||
return False
|
||||
|
||||
volume_permission = operation_mapping[operation]
|
||||
return self.check_permission(volume_permission, dataset_id)
|
||||
|
||||
|
||||
class VolumePermissionError(Exception):
|
||||
"""Volume权限错误异常"""
|
||||
|
||||
def __init__(self, message: str, operation: str, volume_type: str, dataset_id: Optional[str] = None):
|
||||
self.operation = operation
|
||||
self.volume_type = volume_type
|
||||
self.dataset_id = dataset_id
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
def check_volume_permission(
|
||||
permission_manager: VolumePermissionManager, operation: str, dataset_id: Optional[str] = None
|
||||
) -> None:
|
||||
"""权限检查装饰器函数
|
||||
|
||||
Args:
|
||||
permission_manager: 权限管理器
|
||||
operation: 操作名称
|
||||
dataset_id: 数据集ID
|
||||
|
||||
Raises:
|
||||
VolumePermissionError: 如果没有权限
|
||||
"""
|
||||
if not permission_manager.validate_operation(operation, dataset_id):
|
||||
error_message = f"Permission denied for operation '{operation}' on {permission_manager._volume_type} volume"
|
||||
if dataset_id:
|
||||
error_message += f" (dataset: {dataset_id})"
|
||||
|
||||
raise VolumePermissionError(
|
||||
error_message,
|
||||
operation=operation,
|
||||
volume_type=permission_manager._volume_type or "unknown",
|
||||
dataset_id=dataset_id,
|
||||
)
|
||||
|
|
@ -5,6 +5,7 @@ class StorageType(StrEnum):
|
|||
ALIYUN_OSS = "aliyun-oss"
|
||||
AZURE_BLOB = "azure-blob"
|
||||
BAIDU_OBS = "baidu-obs"
|
||||
CLICKZETTA_VOLUME = "clickzetta-volume"
|
||||
GOOGLE_STORAGE = "google-storage"
|
||||
HUAWEI_OBS = "huawei-obs"
|
||||
LOCAL = "local"
|
||||
|
|
|
|||
|
|
@ -194,6 +194,7 @@ vdb = [
|
|||
"alibabacloud_tea_openapi~=0.3.9",
|
||||
"chromadb==0.5.20",
|
||||
"clickhouse-connect~=0.7.16",
|
||||
"clickzetta-connector-python>=0.8.102",
|
||||
"couchbase~=4.3.0",
|
||||
"elasticsearch==8.14.0",
|
||||
"opensearch-py==2.4.0",
|
||||
|
|
@ -213,3 +214,4 @@ vdb = [
|
|||
"xinference-client~=1.2.2",
|
||||
"mo-vector~=0.1.13",
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,168 @@
|
|||
"""Integration tests for ClickZetta Volume Storage."""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
|
||||
from extensions.storage.clickzetta_volume.clickzetta_volume_storage import (
|
||||
ClickZettaVolumeConfig,
|
||||
ClickZettaVolumeStorage,
|
||||
)
|
||||
|
||||
|
||||
class TestClickZettaVolumeStorage(unittest.TestCase):
|
||||
"""Test cases for ClickZetta Volume Storage."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test environment."""
|
||||
self.config = ClickZettaVolumeConfig(
|
||||
username=os.getenv("CLICKZETTA_USERNAME", "test_user"),
|
||||
password=os.getenv("CLICKZETTA_PASSWORD", "test_pass"),
|
||||
instance=os.getenv("CLICKZETTA_INSTANCE", "test_instance"),
|
||||
service=os.getenv("CLICKZETTA_SERVICE", "uat-api.clickzetta.com"),
|
||||
workspace=os.getenv("CLICKZETTA_WORKSPACE", "quick_start"),
|
||||
vcluster=os.getenv("CLICKZETTA_VCLUSTER", "default_ap"),
|
||||
schema_name=os.getenv("CLICKZETTA_SCHEMA", "dify"),
|
||||
volume_type="table",
|
||||
table_prefix="test_dataset_",
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(not os.getenv("CLICKZETTA_USERNAME"), reason="ClickZetta credentials not provided")
|
||||
def test_user_volume_operations(self):
|
||||
"""Test basic operations with User Volume."""
|
||||
config = self.config
|
||||
config.volume_type = "user"
|
||||
|
||||
storage = ClickZettaVolumeStorage(config)
|
||||
|
||||
# Test file operations
|
||||
test_filename = "test_file.txt"
|
||||
test_content = b"Hello, ClickZetta Volume!"
|
||||
|
||||
# Save file
|
||||
storage.save(test_filename, test_content)
|
||||
|
||||
# Check if file exists
|
||||
assert storage.exists(test_filename)
|
||||
|
||||
# Load file
|
||||
loaded_content = storage.load_once(test_filename)
|
||||
assert loaded_content == test_content
|
||||
|
||||
# Test streaming
|
||||
stream_content = b""
|
||||
for chunk in storage.load_stream(test_filename):
|
||||
stream_content += chunk
|
||||
assert stream_content == test_content
|
||||
|
||||
# Test download
|
||||
with tempfile.NamedTemporaryFile() as temp_file:
|
||||
storage.download(test_filename, temp_file.name)
|
||||
with open(temp_file.name, "rb") as f:
|
||||
downloaded_content = f.read()
|
||||
assert downloaded_content == test_content
|
||||
|
||||
# Test scan
|
||||
files = storage.scan("", files=True, directories=False)
|
||||
assert test_filename in files
|
||||
|
||||
# Delete file
|
||||
storage.delete(test_filename)
|
||||
assert not storage.exists(test_filename)
|
||||
|
||||
@pytest.mark.skipif(not os.getenv("CLICKZETTA_USERNAME"), reason="ClickZetta credentials not provided")
|
||||
def test_table_volume_operations(self):
|
||||
"""Test basic operations with Table Volume."""
|
||||
config = self.config
|
||||
config.volume_type = "table"
|
||||
|
||||
storage = ClickZettaVolumeStorage(config)
|
||||
|
||||
# Test file operations with dataset_id
|
||||
dataset_id = "12345"
|
||||
test_filename = f"{dataset_id}/test_file.txt"
|
||||
test_content = b"Hello, Table Volume!"
|
||||
|
||||
# Save file
|
||||
storage.save(test_filename, test_content)
|
||||
|
||||
# Check if file exists
|
||||
assert storage.exists(test_filename)
|
||||
|
||||
# Load file
|
||||
loaded_content = storage.load_once(test_filename)
|
||||
assert loaded_content == test_content
|
||||
|
||||
# Test scan for dataset
|
||||
files = storage.scan(dataset_id, files=True, directories=False)
|
||||
assert "test_file.txt" in files
|
||||
|
||||
# Delete file
|
||||
storage.delete(test_filename)
|
||||
assert not storage.exists(test_filename)
|
||||
|
||||
def test_config_validation(self):
|
||||
"""Test configuration validation."""
|
||||
# Test missing required fields
|
||||
with pytest.raises(ValueError):
|
||||
ClickZettaVolumeConfig(
|
||||
username="", # Empty username should fail
|
||||
password="pass",
|
||||
instance="instance",
|
||||
)
|
||||
|
||||
# Test invalid volume type
|
||||
with pytest.raises(ValueError):
|
||||
ClickZettaVolumeConfig(username="user", password="pass", instance="instance", volume_type="invalid_type")
|
||||
|
||||
# Test external volume without volume_name
|
||||
with pytest.raises(ValueError):
|
||||
ClickZettaVolumeConfig(
|
||||
username="user",
|
||||
password="pass",
|
||||
instance="instance",
|
||||
volume_type="external",
|
||||
# Missing volume_name
|
||||
)
|
||||
|
||||
def test_volume_path_generation(self):
|
||||
"""Test volume path generation for different types."""
|
||||
storage = ClickZettaVolumeStorage(self.config)
|
||||
|
||||
# Test table volume path
|
||||
path = storage._get_volume_path("test.txt", "12345")
|
||||
assert path == "test_dataset_12345/test.txt"
|
||||
|
||||
# Test path with existing dataset_id prefix
|
||||
path = storage._get_volume_path("12345/test.txt")
|
||||
assert path == "12345/test.txt"
|
||||
|
||||
# Test user volume
|
||||
storage._config.volume_type = "user"
|
||||
path = storage._get_volume_path("test.txt")
|
||||
assert path == "test.txt"
|
||||
|
||||
def test_sql_prefix_generation(self):
|
||||
"""Test SQL prefix generation for different volume types."""
|
||||
storage = ClickZettaVolumeStorage(self.config)
|
||||
|
||||
# Test table volume SQL prefix
|
||||
prefix = storage._get_volume_sql_prefix("12345")
|
||||
assert prefix == "TABLE VOLUME test_dataset_12345"
|
||||
|
||||
# Test user volume SQL prefix
|
||||
storage._config.volume_type = "user"
|
||||
prefix = storage._get_volume_sql_prefix()
|
||||
assert prefix == "USER VOLUME"
|
||||
|
||||
# Test external volume SQL prefix
|
||||
storage._config.volume_type = "external"
|
||||
storage._config.volume_name = "my_external_volume"
|
||||
prefix = storage._get_volume_sql_prefix()
|
||||
assert prefix == "VOLUME my_external_volume"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
# Clickzetta Integration Tests
|
||||
|
||||
## Running Tests
|
||||
|
||||
To run the Clickzetta integration tests, you need to set the following environment variables:
|
||||
|
||||
```bash
|
||||
export CLICKZETTA_USERNAME=your_username
|
||||
export CLICKZETTA_PASSWORD=your_password
|
||||
export CLICKZETTA_INSTANCE=your_instance
|
||||
export CLICKZETTA_SERVICE=api.clickzetta.com
|
||||
export CLICKZETTA_WORKSPACE=your_workspace
|
||||
export CLICKZETTA_VCLUSTER=your_vcluster
|
||||
export CLICKZETTA_SCHEMA=dify
|
||||
```
|
||||
|
||||
Then run the tests:
|
||||
|
||||
```bash
|
||||
pytest api/tests/integration_tests/vdb/clickzetta/
|
||||
```
|
||||
|
||||
## Security Note
|
||||
|
||||
Never commit credentials to the repository. Always use environment variables or secure credential management systems.
|
||||
|
|
@ -0,0 +1,237 @@
|
|||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from core.rag.datasource.vdb.clickzetta.clickzetta_vector import ClickzettaConfig, ClickzettaVector
|
||||
from core.rag.models.document import Document
|
||||
from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text, setup_mock_redis
|
||||
|
||||
|
||||
class TestClickzettaVector(AbstractVectorTest):
|
||||
"""
|
||||
Test cases for Clickzetta vector database integration.
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def vector_store(self):
|
||||
"""Create a Clickzetta vector store instance for testing."""
|
||||
# Skip test if Clickzetta credentials are not configured
|
||||
if not os.getenv("CLICKZETTA_USERNAME"):
|
||||
pytest.skip("CLICKZETTA_USERNAME is not configured")
|
||||
if not os.getenv("CLICKZETTA_PASSWORD"):
|
||||
pytest.skip("CLICKZETTA_PASSWORD is not configured")
|
||||
if not os.getenv("CLICKZETTA_INSTANCE"):
|
||||
pytest.skip("CLICKZETTA_INSTANCE is not configured")
|
||||
|
||||
config = ClickzettaConfig(
|
||||
username=os.getenv("CLICKZETTA_USERNAME", ""),
|
||||
password=os.getenv("CLICKZETTA_PASSWORD", ""),
|
||||
instance=os.getenv("CLICKZETTA_INSTANCE", ""),
|
||||
service=os.getenv("CLICKZETTA_SERVICE", "api.clickzetta.com"),
|
||||
workspace=os.getenv("CLICKZETTA_WORKSPACE", "quick_start"),
|
||||
vcluster=os.getenv("CLICKZETTA_VCLUSTER", "default_ap"),
|
||||
schema=os.getenv("CLICKZETTA_SCHEMA", "dify_test"),
|
||||
batch_size=10, # Small batch size for testing
|
||||
enable_inverted_index=True,
|
||||
analyzer_type="chinese",
|
||||
analyzer_mode="smart",
|
||||
vector_distance_function="cosine_distance",
|
||||
)
|
||||
|
||||
with setup_mock_redis():
|
||||
vector = ClickzettaVector(
|
||||
collection_name="test_collection_" + str(os.getpid()),
|
||||
config=config
|
||||
)
|
||||
|
||||
yield vector
|
||||
|
||||
# Cleanup: delete the test collection
|
||||
try:
|
||||
vector.delete()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def test_clickzetta_vector_basic_operations(self, vector_store):
|
||||
"""Test basic CRUD operations on Clickzetta vector store."""
|
||||
# Prepare test data
|
||||
texts = [
|
||||
"这是第一个测试文档,包含一些中文内容。",
|
||||
"This is the second test document with English content.",
|
||||
"第三个文档混合了English和中文内容。",
|
||||
]
|
||||
embeddings = [
|
||||
[0.1, 0.2, 0.3, 0.4],
|
||||
[0.5, 0.6, 0.7, 0.8],
|
||||
[0.9, 1.0, 1.1, 1.2],
|
||||
]
|
||||
documents = [
|
||||
Document(page_content=text, metadata={"doc_id": f"doc_{i}", "source": "test"})
|
||||
for i, text in enumerate(texts)
|
||||
]
|
||||
|
||||
# Test create (initial insert)
|
||||
vector_store.create(texts=documents, embeddings=embeddings)
|
||||
|
||||
# Test text_exists
|
||||
assert vector_store.text_exists("doc_0")
|
||||
assert not vector_store.text_exists("doc_999")
|
||||
|
||||
# Test search_by_vector
|
||||
query_vector = [0.1, 0.2, 0.3, 0.4]
|
||||
results = vector_store.search_by_vector(query_vector, top_k=2)
|
||||
assert len(results) > 0
|
||||
assert results[0].page_content == texts[0] # Should match the first document
|
||||
|
||||
# Test search_by_full_text (Chinese)
|
||||
results = vector_store.search_by_full_text("中文", top_k=3)
|
||||
assert len(results) >= 2 # Should find documents with Chinese content
|
||||
|
||||
# Test search_by_full_text (English)
|
||||
results = vector_store.search_by_full_text("English", top_k=3)
|
||||
assert len(results) >= 2 # Should find documents with English content
|
||||
|
||||
# Test delete_by_ids
|
||||
vector_store.delete_by_ids(["doc_0"])
|
||||
assert not vector_store.text_exists("doc_0")
|
||||
assert vector_store.text_exists("doc_1")
|
||||
|
||||
# Test delete_by_metadata_field
|
||||
vector_store.delete_by_metadata_field("source", "test")
|
||||
assert not vector_store.text_exists("doc_1")
|
||||
assert not vector_store.text_exists("doc_2")
|
||||
|
||||
def test_clickzetta_vector_advanced_search(self, vector_store):
|
||||
"""Test advanced search features of Clickzetta vector store."""
|
||||
# Prepare test data with more complex metadata
|
||||
documents = []
|
||||
embeddings = []
|
||||
for i in range(10):
|
||||
doc = Document(
|
||||
page_content=f"Document {i}: " + get_example_text(),
|
||||
metadata={
|
||||
"doc_id": f"adv_doc_{i}",
|
||||
"category": "technical" if i % 2 == 0 else "general",
|
||||
"document_id": f"doc_{i // 3}", # Group documents
|
||||
"importance": i,
|
||||
}
|
||||
)
|
||||
documents.append(doc)
|
||||
# Create varied embeddings
|
||||
embeddings.append([0.1 * i, 0.2 * i, 0.3 * i, 0.4 * i])
|
||||
|
||||
vector_store.create(texts=documents, embeddings=embeddings)
|
||||
|
||||
# Test vector search with document filter
|
||||
query_vector = [0.5, 1.0, 1.5, 2.0]
|
||||
results = vector_store.search_by_vector(
|
||||
query_vector,
|
||||
top_k=5,
|
||||
document_ids_filter=["doc_0", "doc_1"]
|
||||
)
|
||||
assert len(results) > 0
|
||||
# All results should belong to doc_0 or doc_1 groups
|
||||
for result in results:
|
||||
assert result.metadata["document_id"] in ["doc_0", "doc_1"]
|
||||
|
||||
# Test score threshold
|
||||
results = vector_store.search_by_vector(
|
||||
query_vector,
|
||||
top_k=10,
|
||||
score_threshold=0.5
|
||||
)
|
||||
# Check that all results have a score above threshold
|
||||
for result in results:
|
||||
assert result.metadata.get("score", 0) >= 0.5
|
||||
|
||||
def test_clickzetta_batch_operations(self, vector_store):
|
||||
"""Test batch insertion operations."""
|
||||
# Prepare large batch of documents
|
||||
batch_size = 25
|
||||
documents = []
|
||||
embeddings = []
|
||||
|
||||
for i in range(batch_size):
|
||||
doc = Document(
|
||||
page_content=f"Batch document {i}: This is a test document for batch processing.",
|
||||
metadata={"doc_id": f"batch_doc_{i}", "batch": "test_batch"}
|
||||
)
|
||||
documents.append(doc)
|
||||
embeddings.append([0.1 * (i % 10), 0.2 * (i % 10), 0.3 * (i % 10), 0.4 * (i % 10)])
|
||||
|
||||
# Test batch insert
|
||||
vector_store.add_texts(documents=documents, embeddings=embeddings)
|
||||
|
||||
# Verify all documents were inserted
|
||||
for i in range(batch_size):
|
||||
assert vector_store.text_exists(f"batch_doc_{i}")
|
||||
|
||||
# Clean up
|
||||
vector_store.delete_by_metadata_field("batch", "test_batch")
|
||||
|
||||
def test_clickzetta_edge_cases(self, vector_store):
|
||||
"""Test edge cases and error handling."""
|
||||
# Test empty operations
|
||||
vector_store.create(texts=[], embeddings=[])
|
||||
vector_store.add_texts(documents=[], embeddings=[])
|
||||
vector_store.delete_by_ids([])
|
||||
|
||||
# Test special characters in content
|
||||
special_doc = Document(
|
||||
page_content="Special chars: 'quotes', \"double\", \\backslash, \n newline",
|
||||
metadata={"doc_id": "special_doc", "test": "edge_case"}
|
||||
)
|
||||
embeddings = [[0.1, 0.2, 0.3, 0.4]]
|
||||
|
||||
vector_store.add_texts(documents=[special_doc], embeddings=embeddings)
|
||||
assert vector_store.text_exists("special_doc")
|
||||
|
||||
# Test search with special characters
|
||||
results = vector_store.search_by_full_text("quotes", top_k=1)
|
||||
if results: # Full-text search might not be available
|
||||
assert len(results) > 0
|
||||
|
||||
# Clean up
|
||||
vector_store.delete_by_ids(["special_doc"])
|
||||
|
||||
def test_clickzetta_full_text_search_modes(self, vector_store):
|
||||
"""Test different full-text search capabilities."""
|
||||
# Prepare documents with various language content
|
||||
documents = [
|
||||
Document(
|
||||
page_content="云器科技提供强大的Lakehouse解决方案",
|
||||
metadata={"doc_id": "cn_doc_1", "lang": "chinese"}
|
||||
),
|
||||
Document(
|
||||
page_content="Clickzetta provides powerful Lakehouse solutions",
|
||||
metadata={"doc_id": "en_doc_1", "lang": "english"}
|
||||
),
|
||||
Document(
|
||||
page_content="Lakehouse是现代数据架构的重要组成部分",
|
||||
metadata={"doc_id": "cn_doc_2", "lang": "chinese"}
|
||||
),
|
||||
Document(
|
||||
page_content="Modern data architecture includes Lakehouse technology",
|
||||
metadata={"doc_id": "en_doc_2", "lang": "english"}
|
||||
),
|
||||
]
|
||||
|
||||
embeddings = [[0.1, 0.2, 0.3, 0.4] for _ in documents]
|
||||
|
||||
vector_store.create(texts=documents, embeddings=embeddings)
|
||||
|
||||
# Test Chinese full-text search
|
||||
results = vector_store.search_by_full_text("Lakehouse", top_k=4)
|
||||
assert len(results) >= 2 # Should find at least documents with "Lakehouse"
|
||||
|
||||
# Test English full-text search
|
||||
results = vector_store.search_by_full_text("solutions", top_k=2)
|
||||
assert len(results) >= 1 # Should find English documents with "solutions"
|
||||
|
||||
# Test mixed search
|
||||
results = vector_store.search_by_full_text("数据架构", top_k=2)
|
||||
assert len(results) >= 1 # Should find Chinese documents with this phrase
|
||||
|
||||
# Clean up
|
||||
vector_store.delete_by_metadata_field("lang", "chinese")
|
||||
vector_store.delete_by_metadata_field("lang", "english")
|
||||
|
|
@ -0,0 +1,165 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test Clickzetta integration in Docker environment
|
||||
"""
|
||||
import os
|
||||
import time
|
||||
|
||||
import requests
|
||||
from clickzetta import connect
|
||||
|
||||
|
||||
def test_clickzetta_connection():
|
||||
"""Test direct connection to Clickzetta"""
|
||||
print("=== Testing direct Clickzetta connection ===")
|
||||
try:
|
||||
conn = connect(
|
||||
username=os.getenv("CLICKZETTA_USERNAME", "test_user"),
|
||||
password=os.getenv("CLICKZETTA_PASSWORD", "test_password"),
|
||||
instance=os.getenv("CLICKZETTA_INSTANCE", "test_instance"),
|
||||
service=os.getenv("CLICKZETTA_SERVICE", "api.clickzetta.com"),
|
||||
workspace=os.getenv("CLICKZETTA_WORKSPACE", "test_workspace"),
|
||||
vcluster=os.getenv("CLICKZETTA_VCLUSTER", "default"),
|
||||
database=os.getenv("CLICKZETTA_SCHEMA", "dify")
|
||||
)
|
||||
|
||||
with conn.cursor() as cursor:
|
||||
# Test basic connectivity
|
||||
cursor.execute("SELECT 1 as test")
|
||||
result = cursor.fetchone()
|
||||
print(f"✓ Connection test: {result}")
|
||||
|
||||
# Check if our test table exists
|
||||
cursor.execute("SHOW TABLES IN dify")
|
||||
tables = cursor.fetchall()
|
||||
print(f"✓ Existing tables: {[t[1] for t in tables if t[0] == 'dify']}")
|
||||
|
||||
# Check if test collection exists
|
||||
test_collection = "collection_test_dataset"
|
||||
if test_collection in [t[1] for t in tables if t[0] == 'dify']:
|
||||
cursor.execute(f"DESCRIBE dify.{test_collection}")
|
||||
columns = cursor.fetchall()
|
||||
print(f"✓ Table structure for {test_collection}:")
|
||||
for col in columns:
|
||||
print(f" - {col[0]}: {col[1]}")
|
||||
|
||||
# Check for indexes
|
||||
cursor.execute(f"SHOW INDEXES IN dify.{test_collection}")
|
||||
indexes = cursor.fetchall()
|
||||
print(f"✓ Indexes on {test_collection}:")
|
||||
for idx in indexes:
|
||||
print(f" - {idx}")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"✗ Connection test failed: {e}")
|
||||
return False
|
||||
|
||||
def test_dify_api():
|
||||
"""Test Dify API with Clickzetta backend"""
|
||||
print("\n=== Testing Dify API ===")
|
||||
base_url = "http://localhost:5001"
|
||||
|
||||
# Wait for API to be ready
|
||||
max_retries = 30
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
response = requests.get(f"{base_url}/console/api/health")
|
||||
if response.status_code == 200:
|
||||
print("✓ Dify API is ready")
|
||||
break
|
||||
except:
|
||||
if i == max_retries - 1:
|
||||
print("✗ Dify API is not responding")
|
||||
return False
|
||||
time.sleep(2)
|
||||
|
||||
# Check vector store configuration
|
||||
try:
|
||||
# This is a simplified check - in production, you'd use proper auth
|
||||
print("✓ Dify is configured to use Clickzetta as vector store")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"✗ API test failed: {e}")
|
||||
return False
|
||||
|
||||
def verify_table_structure():
|
||||
"""Verify the table structure meets Dify requirements"""
|
||||
print("\n=== Verifying Table Structure ===")
|
||||
|
||||
expected_columns = {
|
||||
"id": "VARCHAR",
|
||||
"page_content": "VARCHAR",
|
||||
"metadata": "VARCHAR", # JSON stored as VARCHAR in Clickzetta
|
||||
"vector": "ARRAY<FLOAT>"
|
||||
}
|
||||
|
||||
expected_metadata_fields = [
|
||||
"doc_id",
|
||||
"doc_hash",
|
||||
"document_id",
|
||||
"dataset_id"
|
||||
]
|
||||
|
||||
print("✓ Expected table structure:")
|
||||
for col, dtype in expected_columns.items():
|
||||
print(f" - {col}: {dtype}")
|
||||
|
||||
print("\n✓ Required metadata fields:")
|
||||
for field in expected_metadata_fields:
|
||||
print(f" - {field}")
|
||||
|
||||
print("\n✓ Index requirements:")
|
||||
print(" - Vector index (HNSW) on 'vector' column")
|
||||
print(" - Full-text index on 'page_content' (optional)")
|
||||
print(" - Functional index on metadata->>'$.doc_id' (recommended)")
|
||||
print(" - Functional index on metadata->>'$.document_id' (recommended)")
|
||||
|
||||
return True
|
||||
|
||||
def main():
|
||||
"""Run all tests"""
|
||||
print("Starting Clickzetta integration tests for Dify Docker\n")
|
||||
|
||||
tests = [
|
||||
("Direct Clickzetta Connection", test_clickzetta_connection),
|
||||
("Dify API Status", test_dify_api),
|
||||
("Table Structure Verification", verify_table_structure),
|
||||
]
|
||||
|
||||
results = []
|
||||
for test_name, test_func in tests:
|
||||
try:
|
||||
success = test_func()
|
||||
results.append((test_name, success))
|
||||
except Exception as e:
|
||||
print(f"\n✗ {test_name} crashed: {e}")
|
||||
results.append((test_name, False))
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*50)
|
||||
print("Test Summary:")
|
||||
print("="*50)
|
||||
|
||||
passed = sum(1 for _, success in results if success)
|
||||
total = len(results)
|
||||
|
||||
for test_name, success in results:
|
||||
status = "✅ PASSED" if success else "❌ FAILED"
|
||||
print(f"{test_name}: {status}")
|
||||
|
||||
print(f"\nTotal: {passed}/{total} tests passed")
|
||||
|
||||
if passed == total:
|
||||
print("\n🎉 All tests passed! Clickzetta is ready for Dify Docker deployment.")
|
||||
print("\nNext steps:")
|
||||
print("1. Run: cd docker && docker-compose -f docker-compose.yaml -f docker-compose.clickzetta.yaml up -d")
|
||||
print("2. Access Dify at http://localhost:3000")
|
||||
print("3. Create a dataset and test vector storage with Clickzetta")
|
||||
return 0
|
||||
else:
|
||||
print("\n⚠️ Some tests failed. Please check the errors above.")
|
||||
return 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
|
|
@ -0,0 +1,928 @@
|
|||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from constants.model_template import default_app_templates
|
||||
from models.model import App, Site
|
||||
from services.account_service import AccountService, TenantService
|
||||
from services.app_service import AppService
|
||||
|
||||
|
||||
class TestAppService:
|
||||
"""Integration tests for AppService using testcontainers."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("services.app_service.FeatureService") as mock_feature_service,
|
||||
patch("services.app_service.EnterpriseService") as mock_enterprise_service,
|
||||
patch("services.app_service.ModelManager") as mock_model_manager,
|
||||
patch("services.account_service.FeatureService") as mock_account_feature_service,
|
||||
):
|
||||
# Setup default mock returns for app service
|
||||
mock_feature_service.get_system_features.return_value.webapp_auth.enabled = False
|
||||
mock_enterprise_service.WebAppAuth.update_app_access_mode.return_value = None
|
||||
mock_enterprise_service.WebAppAuth.cleanup_webapp.return_value = None
|
||||
|
||||
# Setup default mock returns for account service
|
||||
mock_account_feature_service.get_system_features.return_value.is_allow_register = True
|
||||
|
||||
# Mock ModelManager for model configuration
|
||||
mock_model_instance = mock_model_manager.return_value
|
||||
mock_model_instance.get_default_model_instance.return_value = None
|
||||
mock_model_instance.get_default_provider_model_name.return_value = ("openai", "gpt-3.5-turbo")
|
||||
|
||||
yield {
|
||||
"feature_service": mock_feature_service,
|
||||
"enterprise_service": mock_enterprise_service,
|
||||
"model_manager": mock_model_manager,
|
||||
"account_feature_service": mock_account_feature_service,
|
||||
}
|
||||
|
||||
def test_create_app_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful app creation with basic parameters.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Setup app creation arguments
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🤖",
|
||||
"icon_background": "#FF6B6B",
|
||||
"api_rph": 100,
|
||||
"api_rpm": 10,
|
||||
}
|
||||
|
||||
# Create app
|
||||
app_service = AppService()
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Verify app was created correctly
|
||||
assert app.name == app_args["name"]
|
||||
assert app.description == app_args["description"]
|
||||
assert app.mode == app_args["mode"]
|
||||
assert app.icon_type == app_args["icon_type"]
|
||||
assert app.icon == app_args["icon"]
|
||||
assert app.icon_background == app_args["icon_background"]
|
||||
assert app.tenant_id == tenant.id
|
||||
assert app.api_rph == app_args["api_rph"]
|
||||
assert app.api_rpm == app_args["api_rpm"]
|
||||
assert app.created_by == account.id
|
||||
assert app.updated_by == account.id
|
||||
assert app.status == "normal"
|
||||
assert app.enable_site is True
|
||||
assert app.enable_api is True
|
||||
assert app.is_demo is False
|
||||
assert app.is_public is False
|
||||
assert app.is_universal is False
|
||||
|
||||
def test_create_app_with_different_modes(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test app creation with different app modes.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
app_service = AppService()
|
||||
|
||||
# Test different app modes
|
||||
# from AppMode enum in default_app_model_template
|
||||
app_modes = [v.value for v in default_app_templates]
|
||||
|
||||
for mode in app_modes:
|
||||
app_args = {
|
||||
"name": f"{fake.company()} {mode}",
|
||||
"description": f"Test app for {mode} mode",
|
||||
"mode": mode,
|
||||
"icon_type": "emoji",
|
||||
"icon": "🚀",
|
||||
"icon_background": "#4ECDC4",
|
||||
}
|
||||
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Verify app mode was set correctly
|
||||
assert app.mode == mode
|
||||
assert app.name == app_args["name"]
|
||||
assert app.tenant_id == tenant.id
|
||||
assert app.created_by == account.id
|
||||
|
||||
def test_get_app_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful app retrieval.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create app first
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🎯",
|
||||
"icon_background": "#45B7D1",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
created_app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Get app using the service
|
||||
retrieved_app = app_service.get_app(created_app)
|
||||
|
||||
# Verify retrieved app matches created app
|
||||
assert retrieved_app.id == created_app.id
|
||||
assert retrieved_app.name == created_app.name
|
||||
assert retrieved_app.description == created_app.description
|
||||
assert retrieved_app.mode == created_app.mode
|
||||
assert retrieved_app.tenant_id == created_app.tenant_id
|
||||
assert retrieved_app.created_by == created_app.created_by
|
||||
|
||||
def test_get_paginate_apps_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful paginated app list retrieval.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
app_service = AppService()
|
||||
|
||||
# Create multiple apps
|
||||
app_names = [fake.company() for _ in range(5)]
|
||||
for name in app_names:
|
||||
app_args = {
|
||||
"name": name,
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "📱",
|
||||
"icon_background": "#96CEB4",
|
||||
}
|
||||
app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Get paginated apps
|
||||
args = {
|
||||
"page": 1,
|
||||
"limit": 10,
|
||||
"mode": "chat",
|
||||
}
|
||||
|
||||
paginated_apps = app_service.get_paginate_apps(account.id, tenant.id, args)
|
||||
|
||||
# Verify pagination results
|
||||
assert paginated_apps is not None
|
||||
assert len(paginated_apps.items) >= 5 # Should have at least 5 apps
|
||||
assert paginated_apps.page == 1
|
||||
assert paginated_apps.per_page == 10
|
||||
|
||||
# Verify all apps belong to the correct tenant
|
||||
for app in paginated_apps.items:
|
||||
assert app.tenant_id == tenant.id
|
||||
assert app.mode == "chat"
|
||||
|
||||
def test_get_paginate_apps_with_filters(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test paginated app list with various filters.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
app_service = AppService()
|
||||
|
||||
# Create apps with different modes
|
||||
chat_app_args = {
|
||||
"name": "Chat App",
|
||||
"description": "A chat application",
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "💬",
|
||||
"icon_background": "#FF6B6B",
|
||||
}
|
||||
completion_app_args = {
|
||||
"name": "Completion App",
|
||||
"description": "A completion application",
|
||||
"mode": "completion",
|
||||
"icon_type": "emoji",
|
||||
"icon": "✍️",
|
||||
"icon_background": "#4ECDC4",
|
||||
}
|
||||
|
||||
chat_app = app_service.create_app(tenant.id, chat_app_args, account)
|
||||
completion_app = app_service.create_app(tenant.id, completion_app_args, account)
|
||||
|
||||
# Test filter by mode
|
||||
chat_args = {
|
||||
"page": 1,
|
||||
"limit": 10,
|
||||
"mode": "chat",
|
||||
}
|
||||
chat_apps = app_service.get_paginate_apps(account.id, tenant.id, chat_args)
|
||||
assert len(chat_apps.items) == 1
|
||||
assert chat_apps.items[0].mode == "chat"
|
||||
|
||||
# Test filter by name
|
||||
name_args = {
|
||||
"page": 1,
|
||||
"limit": 10,
|
||||
"mode": "chat",
|
||||
"name": "Chat",
|
||||
}
|
||||
filtered_apps = app_service.get_paginate_apps(account.id, tenant.id, name_args)
|
||||
assert len(filtered_apps.items) == 1
|
||||
assert "Chat" in filtered_apps.items[0].name
|
||||
|
||||
# Test filter by created_by_me
|
||||
created_by_me_args = {
|
||||
"page": 1,
|
||||
"limit": 10,
|
||||
"mode": "completion",
|
||||
"is_created_by_me": True,
|
||||
}
|
||||
my_apps = app_service.get_paginate_apps(account.id, tenant.id, created_by_me_args)
|
||||
assert len(my_apps.items) == 1
|
||||
|
||||
def test_get_paginate_apps_with_tag_filters(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test paginated app list with tag filters.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
app_service = AppService()
|
||||
|
||||
# Create an app
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🏷️",
|
||||
"icon_background": "#FFEAA7",
|
||||
}
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Mock TagService to return the app ID for tag filtering
|
||||
with patch("services.app_service.TagService.get_target_ids_by_tag_ids") as mock_tag_service:
|
||||
mock_tag_service.return_value = [app.id]
|
||||
|
||||
# Test with tag filter
|
||||
args = {
|
||||
"page": 1,
|
||||
"limit": 10,
|
||||
"mode": "chat",
|
||||
"tag_ids": ["tag1", "tag2"],
|
||||
}
|
||||
|
||||
paginated_apps = app_service.get_paginate_apps(account.id, tenant.id, args)
|
||||
|
||||
# Verify tag service was called
|
||||
mock_tag_service.assert_called_once_with("app", tenant.id, ["tag1", "tag2"])
|
||||
|
||||
# Verify results
|
||||
assert paginated_apps is not None
|
||||
assert len(paginated_apps.items) == 1
|
||||
assert paginated_apps.items[0].id == app.id
|
||||
|
||||
# Test with tag filter that returns no results
|
||||
with patch("services.app_service.TagService.get_target_ids_by_tag_ids") as mock_tag_service:
|
||||
mock_tag_service.return_value = []
|
||||
|
||||
args = {
|
||||
"page": 1,
|
||||
"limit": 10,
|
||||
"mode": "chat",
|
||||
"tag_ids": ["nonexistent_tag"],
|
||||
}
|
||||
|
||||
paginated_apps = app_service.get_paginate_apps(account.id, tenant.id, args)
|
||||
|
||||
# Should return None when no apps match tag filter
|
||||
assert paginated_apps is None
|
||||
|
||||
def test_update_app_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful app update with all fields.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create app first
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🎯",
|
||||
"icon_background": "#45B7D1",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Store original values
|
||||
original_name = app.name
|
||||
original_description = app.description
|
||||
original_icon = app.icon
|
||||
original_icon_background = app.icon_background
|
||||
original_use_icon_as_answer_icon = app.use_icon_as_answer_icon
|
||||
|
||||
# Update app
|
||||
update_args = {
|
||||
"name": "Updated App Name",
|
||||
"description": "Updated app description",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🔄",
|
||||
"icon_background": "#FF8C42",
|
||||
"use_icon_as_answer_icon": True,
|
||||
}
|
||||
|
||||
with patch("flask_login.utils._get_user", return_value=account):
|
||||
updated_app = app_service.update_app(app, update_args)
|
||||
|
||||
# Verify updated fields
|
||||
assert updated_app.name == update_args["name"]
|
||||
assert updated_app.description == update_args["description"]
|
||||
assert updated_app.icon == update_args["icon"]
|
||||
assert updated_app.icon_background == update_args["icon_background"]
|
||||
assert updated_app.use_icon_as_answer_icon is True
|
||||
assert updated_app.updated_by == account.id
|
||||
|
||||
# Verify other fields remain unchanged
|
||||
assert updated_app.mode == app.mode
|
||||
assert updated_app.tenant_id == app.tenant_id
|
||||
assert updated_app.created_by == app.created_by
|
||||
|
||||
def test_update_app_name_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful app name update.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create app first
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🎯",
|
||||
"icon_background": "#45B7D1",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Store original name
|
||||
original_name = app.name
|
||||
|
||||
# Update app name
|
||||
new_name = "New App Name"
|
||||
with patch("flask_login.utils._get_user", return_value=account):
|
||||
updated_app = app_service.update_app_name(app, new_name)
|
||||
|
||||
assert updated_app.name == new_name
|
||||
assert updated_app.updated_by == account.id
|
||||
|
||||
# Verify other fields remain unchanged
|
||||
assert updated_app.description == app.description
|
||||
assert updated_app.mode == app.mode
|
||||
assert updated_app.tenant_id == app.tenant_id
|
||||
assert updated_app.created_by == app.created_by
|
||||
|
||||
def test_update_app_icon_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful app icon update.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create app first
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🎯",
|
||||
"icon_background": "#45B7D1",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Store original values
|
||||
original_icon = app.icon
|
||||
original_icon_background = app.icon_background
|
||||
|
||||
# Update app icon
|
||||
new_icon = "🌟"
|
||||
new_icon_background = "#FFD93D"
|
||||
with patch("flask_login.utils._get_user", return_value=account):
|
||||
updated_app = app_service.update_app_icon(app, new_icon, new_icon_background)
|
||||
|
||||
assert updated_app.icon == new_icon
|
||||
assert updated_app.icon_background == new_icon_background
|
||||
assert updated_app.updated_by == account.id
|
||||
|
||||
# Verify other fields remain unchanged
|
||||
assert updated_app.name == app.name
|
||||
assert updated_app.description == app.description
|
||||
assert updated_app.mode == app.mode
|
||||
assert updated_app.tenant_id == app.tenant_id
|
||||
assert updated_app.created_by == app.created_by
|
||||
|
||||
def test_update_app_site_status_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful app site status update.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create app first
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🌐",
|
||||
"icon_background": "#74B9FF",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Store original site status
|
||||
original_site_status = app.enable_site
|
||||
|
||||
# Update site status to disabled
|
||||
with patch("flask_login.utils._get_user", return_value=account):
|
||||
updated_app = app_service.update_app_site_status(app, False)
|
||||
assert updated_app.enable_site is False
|
||||
assert updated_app.updated_by == account.id
|
||||
|
||||
# Update site status back to enabled
|
||||
with patch("flask_login.utils._get_user", return_value=account):
|
||||
updated_app = app_service.update_app_site_status(updated_app, True)
|
||||
assert updated_app.enable_site is True
|
||||
assert updated_app.updated_by == account.id
|
||||
|
||||
# Verify other fields remain unchanged
|
||||
assert updated_app.name == app.name
|
||||
assert updated_app.description == app.description
|
||||
assert updated_app.mode == app.mode
|
||||
assert updated_app.tenant_id == app.tenant_id
|
||||
assert updated_app.created_by == app.created_by
|
||||
|
||||
def test_update_app_api_status_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful app API status update.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create app first
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🔌",
|
||||
"icon_background": "#A29BFE",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Store original API status
|
||||
original_api_status = app.enable_api
|
||||
|
||||
# Update API status to disabled
|
||||
with patch("flask_login.utils._get_user", return_value=account):
|
||||
updated_app = app_service.update_app_api_status(app, False)
|
||||
assert updated_app.enable_api is False
|
||||
assert updated_app.updated_by == account.id
|
||||
|
||||
# Update API status back to enabled
|
||||
with patch("flask_login.utils._get_user", return_value=account):
|
||||
updated_app = app_service.update_app_api_status(updated_app, True)
|
||||
assert updated_app.enable_api is True
|
||||
assert updated_app.updated_by == account.id
|
||||
|
||||
# Verify other fields remain unchanged
|
||||
assert updated_app.name == app.name
|
||||
assert updated_app.description == app.description
|
||||
assert updated_app.mode == app.mode
|
||||
assert updated_app.tenant_id == app.tenant_id
|
||||
assert updated_app.created_by == app.created_by
|
||||
|
||||
def test_update_app_site_status_no_change(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test app site status update when status doesn't change.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create app first
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🔄",
|
||||
"icon_background": "#FD79A8",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Store original values
|
||||
original_site_status = app.enable_site
|
||||
original_updated_at = app.updated_at
|
||||
|
||||
# Update site status to the same value (no change)
|
||||
updated_app = app_service.update_app_site_status(app, original_site_status)
|
||||
|
||||
# Verify app is returned unchanged
|
||||
assert updated_app.id == app.id
|
||||
assert updated_app.enable_site == original_site_status
|
||||
assert updated_app.updated_at == original_updated_at
|
||||
|
||||
# Verify other fields remain unchanged
|
||||
assert updated_app.name == app.name
|
||||
assert updated_app.description == app.description
|
||||
assert updated_app.mode == app.mode
|
||||
assert updated_app.tenant_id == app.tenant_id
|
||||
assert updated_app.created_by == app.created_by
|
||||
|
||||
def test_delete_app_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful app deletion.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create app first
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🗑️",
|
||||
"icon_background": "#E17055",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Store app ID for verification
|
||||
app_id = app.id
|
||||
|
||||
# Mock the async deletion task
|
||||
with patch("services.app_service.remove_app_and_related_data_task") as mock_delete_task:
|
||||
mock_delete_task.delay.return_value = None
|
||||
|
||||
# Delete app
|
||||
app_service.delete_app(app)
|
||||
|
||||
# Verify async deletion task was called
|
||||
mock_delete_task.delay.assert_called_once_with(tenant_id=tenant.id, app_id=app_id)
|
||||
|
||||
# Verify app was deleted from database
|
||||
from extensions.ext_database import db
|
||||
|
||||
deleted_app = db.session.query(App).filter_by(id=app_id).first()
|
||||
assert deleted_app is None
|
||||
|
||||
def test_delete_app_with_related_data(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test app deletion with related data cleanup.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create app first
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🧹",
|
||||
"icon_background": "#00B894",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Store app ID for verification
|
||||
app_id = app.id
|
||||
|
||||
# Mock webapp auth cleanup
|
||||
mock_external_service_dependencies[
|
||||
"feature_service"
|
||||
].get_system_features.return_value.webapp_auth.enabled = True
|
||||
|
||||
# Mock the async deletion task
|
||||
with patch("services.app_service.remove_app_and_related_data_task") as mock_delete_task:
|
||||
mock_delete_task.delay.return_value = None
|
||||
|
||||
# Delete app
|
||||
app_service.delete_app(app)
|
||||
|
||||
# Verify webapp auth cleanup was called
|
||||
mock_external_service_dependencies["enterprise_service"].WebAppAuth.cleanup_webapp.assert_called_once_with(
|
||||
app_id
|
||||
)
|
||||
|
||||
# Verify async deletion task was called
|
||||
mock_delete_task.delay.assert_called_once_with(tenant_id=tenant.id, app_id=app_id)
|
||||
|
||||
# Verify app was deleted from database
|
||||
from extensions.ext_database import db
|
||||
|
||||
deleted_app = db.session.query(App).filter_by(id=app_id).first()
|
||||
assert deleted_app is None
|
||||
|
||||
def test_get_app_meta_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful app metadata retrieval.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create app first
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "📊",
|
||||
"icon_background": "#6C5CE7",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Get app metadata
|
||||
app_meta = app_service.get_app_meta(app)
|
||||
|
||||
# Verify metadata contains expected fields
|
||||
assert "tool_icons" in app_meta
|
||||
# Note: get_app_meta currently only returns tool_icons
|
||||
|
||||
def test_get_app_code_by_id_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful app code retrieval by app ID.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create app first
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🔗",
|
||||
"icon_background": "#FDCB6E",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Get app code by ID
|
||||
app_code = AppService.get_app_code_by_id(app.id)
|
||||
|
||||
# Verify app code was retrieved correctly
|
||||
# Note: Site would be created when App is created, site.code is auto-generated
|
||||
assert app_code is not None
|
||||
assert len(app_code) > 0
|
||||
|
||||
def test_get_app_id_by_code_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful app ID retrieval by app code.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Create app first
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "chat",
|
||||
"icon_type": "emoji",
|
||||
"icon": "🆔",
|
||||
"icon_background": "#E84393",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
app = app_service.create_app(tenant.id, app_args, account)
|
||||
|
||||
# Create a site for the app
|
||||
site = Site()
|
||||
site.app_id = app.id
|
||||
site.code = fake.postalcode()
|
||||
site.title = fake.company()
|
||||
site.status = "normal"
|
||||
site.default_language = "en-US"
|
||||
site.customize_token_strategy = "uuid"
|
||||
from extensions.ext_database import db
|
||||
|
||||
db.session.add(site)
|
||||
db.session.commit()
|
||||
|
||||
# Get app ID by code
|
||||
app_id = AppService.get_app_id_by_code(site.code)
|
||||
|
||||
# Verify app ID was retrieved correctly
|
||||
assert app_id == app.id
|
||||
|
||||
def test_create_app_invalid_mode(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test app creation with invalid mode.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant first
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
# Setup app creation arguments with invalid mode
|
||||
app_args = {
|
||||
"name": fake.company(),
|
||||
"description": fake.text(max_nb_chars=100),
|
||||
"mode": "invalid_mode", # Invalid mode
|
||||
"icon_type": "emoji",
|
||||
"icon": "❌",
|
||||
"icon_background": "#D63031",
|
||||
}
|
||||
|
||||
app_service = AppService()
|
||||
|
||||
# Attempt to create app with invalid mode
|
||||
with pytest.raises(ValueError, match="invalid mode value"):
|
||||
app_service.create_app(tenant.id, app_args, account)
|
||||
|
|
@ -49,7 +49,7 @@ def test_executor_with_json_body_and_number_variable():
|
|||
assert executor.method == "post"
|
||||
assert executor.url == "https://api.example.com/data"
|
||||
assert executor.headers == {"Content-Type": "application/json"}
|
||||
assert executor.params == []
|
||||
assert executor.params is None
|
||||
assert executor.json == {"number": 42}
|
||||
assert executor.data is None
|
||||
assert executor.files is None
|
||||
|
|
@ -102,7 +102,7 @@ def test_executor_with_json_body_and_object_variable():
|
|||
assert executor.method == "post"
|
||||
assert executor.url == "https://api.example.com/data"
|
||||
assert executor.headers == {"Content-Type": "application/json"}
|
||||
assert executor.params == []
|
||||
assert executor.params is None
|
||||
assert executor.json == {"name": "John Doe", "age": 30, "email": "john@example.com"}
|
||||
assert executor.data is None
|
||||
assert executor.files is None
|
||||
|
|
@ -157,7 +157,7 @@ def test_executor_with_json_body_and_nested_object_variable():
|
|||
assert executor.method == "post"
|
||||
assert executor.url == "https://api.example.com/data"
|
||||
assert executor.headers == {"Content-Type": "application/json"}
|
||||
assert executor.params == []
|
||||
assert executor.params is None
|
||||
assert executor.json == {"object": {"name": "John Doe", "age": 30, "email": "john@example.com"}}
|
||||
assert executor.data is None
|
||||
assert executor.files is None
|
||||
|
|
@ -245,7 +245,7 @@ def test_executor_with_form_data():
|
|||
assert executor.url == "https://api.example.com/upload"
|
||||
assert "Content-Type" in executor.headers
|
||||
assert "multipart/form-data" in executor.headers["Content-Type"]
|
||||
assert executor.params == []
|
||||
assert executor.params is None
|
||||
assert executor.json is None
|
||||
# '__multipart_placeholder__' is expected when no file inputs exist,
|
||||
# to ensure the request is treated as multipart/form-data by the backend.
|
||||
|
|
|
|||
58
api/uv.lock
58
api/uv.lock
|
|
@ -983,6 +983,25 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/42/1f/935d0810b73184a1d306f92458cb0a2e9b0de2377f536da874e063b8e422/clickhouse_connect-0.7.19-cp312-cp312-win_amd64.whl", hash = "sha256:b771ca6a473d65103dcae82810d3a62475c5372fc38d8f211513c72b954fb020", size = 239584, upload-time = "2024-08-21T21:36:22.105Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clickzetta-connector-python"
|
||||
version = "0.8.102"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "future" },
|
||||
{ name = "numpy" },
|
||||
{ name = "packaging" },
|
||||
{ name = "pandas" },
|
||||
{ name = "pyarrow" },
|
||||
{ name = "python-dateutil" },
|
||||
{ name = "requests" },
|
||||
{ name = "sqlalchemy" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c6/e5/23dcc950e873127df0135cf45144062a3207f5d2067259c73854e8ce7228/clickzetta_connector_python-0.8.102-py3-none-any.whl", hash = "sha256:c45486ae77fd82df7113ec67ec50e772372588d79c23757f8ee6291a057994a7", size = 77861, upload-time = "2025-07-17T03:11:59.543Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cloudscraper"
|
||||
version = "1.2.71"
|
||||
|
|
@ -1383,6 +1402,7 @@ vdb = [
|
|||
{ name = "alibabacloud-tea-openapi" },
|
||||
{ name = "chromadb" },
|
||||
{ name = "clickhouse-connect" },
|
||||
{ name = "clickzetta-connector-python" },
|
||||
{ name = "couchbase" },
|
||||
{ name = "elasticsearch" },
|
||||
{ name = "mo-vector" },
|
||||
|
|
@ -1568,6 +1588,7 @@ vdb = [
|
|||
{ name = "alibabacloud-tea-openapi", specifier = "~=0.3.9" },
|
||||
{ name = "chromadb", specifier = "==0.5.20" },
|
||||
{ name = "clickhouse-connect", specifier = "~=0.7.16" },
|
||||
{ name = "clickzetta-connector-python", specifier = ">=0.8.102" },
|
||||
{ name = "couchbase", specifier = "~=4.3.0" },
|
||||
{ name = "elasticsearch", specifier = "==8.14.0" },
|
||||
{ name = "mo-vector", specifier = "~=0.1.13" },
|
||||
|
|
@ -2111,7 +2132,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "google-cloud-bigquery"
|
||||
version = "3.34.0"
|
||||
version = "3.30.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-api-core", extra = ["grpc"] },
|
||||
|
|
@ -2122,9 +2143,9 @@ dependencies = [
|
|||
{ name = "python-dateutil" },
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/24/f9/e9da2d56d7028f05c0e2f5edf6ce43c773220c3172666c3dd925791d763d/google_cloud_bigquery-3.34.0.tar.gz", hash = "sha256:5ee1a78ba5c2ccb9f9a8b2bf3ed76b378ea68f49b6cac0544dc55cc97ff7c1ce", size = 489091, upload-time = "2025-05-29T17:18:06.03Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f1/2f/3dda76b3ec029578838b1fe6396e6b86eb574200352240e23dea49265bb7/google_cloud_bigquery-3.30.0.tar.gz", hash = "sha256:7e27fbafc8ed33cc200fe05af12ecd74d279fe3da6692585a3cef7aee90575b6", size = 474389, upload-time = "2025-02-27T18:49:45.416Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b1/7e/7115c4f67ca0bc678f25bff1eab56cc37d06eb9a3978940b2ebd0705aa0a/google_cloud_bigquery-3.34.0-py3-none-any.whl", hash = "sha256:de20ded0680f8136d92ff5256270b5920dfe4fae479f5d0f73e90e5df30b1cf7", size = 253555, upload-time = "2025-05-29T17:18:02.904Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0c/6d/856a6ca55c1d9d99129786c929a27dd9d31992628ebbff7f5d333352981f/google_cloud_bigquery-3.30.0-py2.py3-none-any.whl", hash = "sha256:f4d28d846a727f20569c9b2d2f4fa703242daadcb2ec4240905aa485ba461877", size = 247885, upload-time = "2025-02-27T18:49:43.454Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -3918,11 +3939,11 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "24.2"
|
||||
version = "23.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950, upload-time = "2024-11-08T09:47:47.202Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fb/2b/9b9c33ffed44ee921d0967086d653047286054117d584f1b1a7c22ceaf7b/packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", size = 146714, upload-time = "2023-10-01T13:50:05.279Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451, upload-time = "2024-11-08T09:47:44.722Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/1a/610693ac4ee14fcdf2d9bf3c493370e4f2ef7ae2e19217d7a237ff42367d/packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7", size = 53011, upload-time = "2023-10-01T13:50:03.745Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -4302,6 +4323,31 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyarrow"
|
||||
version = "14.0.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "numpy" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d7/8b/d18b7eb6fb22e5ed6ffcbc073c85dae635778dbd1270a6cf5d750b031e84/pyarrow-14.0.2.tar.gz", hash = "sha256:36cef6ba12b499d864d1def3e990f97949e0b79400d08b7cf74504ffbd3eb025", size = 1063645, upload-time = "2023-12-18T15:43:41.625Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/94/8a/411ef0b05483076b7f548c74ccaa0f90c1e60d3875db71a821f6ffa8cf42/pyarrow-14.0.2-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:87482af32e5a0c0cce2d12eb3c039dd1d853bd905b04f3f953f147c7a196915b", size = 26904455, upload-time = "2023-12-18T15:40:43.477Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/6c/882a57798877e3a49ba54d8e0540bea24aed78fb42e1d860f08c3449c75e/pyarrow-14.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:059bd8f12a70519e46cd64e1ba40e97eae55e0cbe1695edd95384653d7626b23", size = 23997116, upload-time = "2023-12-18T15:40:48.533Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/3f/ef47fe6192ce4d82803a073db449b5292135406c364a7fc49dfbcd34c987/pyarrow-14.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f16111f9ab27e60b391c5f6d197510e3ad6654e73857b4e394861fc79c37200", size = 35944575, upload-time = "2023-12-18T15:40:55.128Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/90/2021e529d7f234a3909f419d4341d53382541ef77d957fa274a99c533b18/pyarrow-14.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06ff1264fe4448e8d02073f5ce45a9f934c0f3db0a04460d0b01ff28befc3696", size = 38079719, upload-time = "2023-12-18T15:41:02.565Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/30/a9/474caf5fd54a6d5315aaf9284c6e8f5d071ca825325ad64c53137b646e1f/pyarrow-14.0.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6dd4f4b472ccf4042f1eab77e6c8bce574543f54d2135c7e396f413046397d5a", size = 35429706, upload-time = "2023-12-18T15:41:09.955Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/f8/cfba56f5353e51c19b0c240380ce39483f4c76e5c4aee5a000f3d75b72da/pyarrow-14.0.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:32356bfb58b36059773f49e4e214996888eeea3a08893e7dbde44753799b2a02", size = 38001476, upload-time = "2023-12-18T15:41:16.372Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/43/3f/7bdf7dc3b3b0cfdcc60760e7880954ba99ccd0bc1e0df806f3dd61bc01cd/pyarrow-14.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:52809ee69d4dbf2241c0e4366d949ba035cbcf48409bf404f071f624ed313a2b", size = 24576230, upload-time = "2023-12-18T15:41:22.561Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/69/5b/d8ab6c20c43b598228710e4e4a6cba03a01f6faa3d08afff9ce76fd0fd47/pyarrow-14.0.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:c87824a5ac52be210d32906c715f4ed7053d0180c1060ae3ff9b7e560f53f944", size = 26819585, upload-time = "2023-12-18T15:41:27.59Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2d/29/bed2643d0dd5e9570405244a61f6db66c7f4704a6e9ce313f84fa5a3675a/pyarrow-14.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a25eb2421a58e861f6ca91f43339d215476f4fe159eca603c55950c14f378cc5", size = 23965222, upload-time = "2023-12-18T15:41:32.449Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2a/34/da464632e59a8cdd083370d69e6c14eae30221acb284f671c6bc9273fadd/pyarrow-14.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c1da70d668af5620b8ba0a23f229030a4cd6c5f24a616a146f30d2386fec422", size = 35942036, upload-time = "2023-12-18T15:41:38.767Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/ff/cbed4836d543b29f00d2355af67575c934999ff1d43e3f438ab0b1b394f1/pyarrow-14.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cc61593c8e66194c7cdfae594503e91b926a228fba40b5cf25cc593563bcd07", size = 38089266, upload-time = "2023-12-18T15:41:47.617Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/38/41/345011cb831d3dbb2dab762fc244c745a5df94b199223a99af52a5f7dff6/pyarrow-14.0.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:78ea56f62fb7c0ae8ecb9afdd7893e3a7dbeb0b04106f5c08dbb23f9c0157591", size = 35404468, upload-time = "2023-12-18T15:41:54.49Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fd/af/2fc23ca2068ff02068d8dabf0fb85b6185df40ec825973470e613dbd8790/pyarrow-14.0.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:37c233ddbce0c67a76c0985612fef27c0c92aef9413cf5aa56952f359fcb7379", size = 38003134, upload-time = "2023-12-18T15:42:01.593Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/95/1f/9d912f66a87e3864f694e000977a6a70a644ea560289eac1d733983f215d/pyarrow-14.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:e4b123ad0f6add92de898214d404e488167b87b5dd86e9a434126bc2b7a5578d", size = 25043754, upload-time = "2023-12-18T15:42:07.108Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyasn1"
|
||||
version = "0.6.1"
|
||||
|
|
|
|||
|
|
@ -333,6 +333,25 @@ OPENDAL_SCHEME=fs
|
|||
# Configurations for OpenDAL Local File System.
|
||||
OPENDAL_FS_ROOT=storage
|
||||
|
||||
# ClickZetta Volume Configuration (for storage backend)
|
||||
# To use ClickZetta Volume as storage backend, set STORAGE_TYPE=clickzetta-volume
|
||||
# Note: ClickZetta Volume will reuse the existing CLICKZETTA_* connection parameters
|
||||
|
||||
# Volume type selection (three types available):
|
||||
# - user: Personal/small team use, simple config, user-level permissions
|
||||
# - table: Enterprise multi-tenant, smart routing, table-level + user-level permissions
|
||||
# - external: Data lake integration, external storage connection, volume-level + storage-level permissions
|
||||
CLICKZETTA_VOLUME_TYPE=user
|
||||
|
||||
# External Volume name (required only when TYPE=external)
|
||||
CLICKZETTA_VOLUME_NAME=
|
||||
|
||||
# Table Volume table prefix (used only when TYPE=table)
|
||||
CLICKZETTA_VOLUME_TABLE_PREFIX=dataset_
|
||||
|
||||
# Dify file directory prefix (isolates from other apps, recommended to keep default)
|
||||
CLICKZETTA_VOLUME_DIFY_PREFIX=dify_km
|
||||
|
||||
# S3 Configuration
|
||||
#
|
||||
S3_ENDPOINT=
|
||||
|
|
@ -416,7 +435,7 @@ SUPABASE_URL=your-server-url
|
|||
# ------------------------------
|
||||
|
||||
# The type of vector store to use.
|
||||
# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`.
|
||||
# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `clickzetta`.
|
||||
VECTOR_STORE=weaviate
|
||||
# Prefix used to create collection name in vector database
|
||||
VECTOR_INDEX_NAME_PREFIX=Vector_index
|
||||
|
|
@ -655,6 +674,20 @@ TABLESTORE_ACCESS_KEY_ID=xxx
|
|||
TABLESTORE_ACCESS_KEY_SECRET=xxx
|
||||
TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE=false
|
||||
|
||||
# Clickzetta configuration, only available when VECTOR_STORE is `clickzetta`
|
||||
CLICKZETTA_USERNAME=
|
||||
CLICKZETTA_PASSWORD=
|
||||
CLICKZETTA_INSTANCE=
|
||||
CLICKZETTA_SERVICE=api.clickzetta.com
|
||||
CLICKZETTA_WORKSPACE=quick_start
|
||||
CLICKZETTA_VCLUSTER=default_ap
|
||||
CLICKZETTA_SCHEMA=dify
|
||||
CLICKZETTA_BATCH_SIZE=100
|
||||
CLICKZETTA_ENABLE_INVERTED_INDEX=true
|
||||
CLICKZETTA_ANALYZER_TYPE=chinese
|
||||
CLICKZETTA_ANALYZER_MODE=smart
|
||||
CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance
|
||||
|
||||
# ------------------------------
|
||||
# Knowledge Configuration
|
||||
# ------------------------------
|
||||
|
|
|
|||
|
|
@ -93,6 +93,10 @@ x-shared-env: &shared-api-worker-env
|
|||
STORAGE_TYPE: ${STORAGE_TYPE:-opendal}
|
||||
OPENDAL_SCHEME: ${OPENDAL_SCHEME:-fs}
|
||||
OPENDAL_FS_ROOT: ${OPENDAL_FS_ROOT:-storage}
|
||||
CLICKZETTA_VOLUME_TYPE: ${CLICKZETTA_VOLUME_TYPE:-user}
|
||||
CLICKZETTA_VOLUME_NAME: ${CLICKZETTA_VOLUME_NAME:-}
|
||||
CLICKZETTA_VOLUME_TABLE_PREFIX: ${CLICKZETTA_VOLUME_TABLE_PREFIX:-dataset_}
|
||||
CLICKZETTA_VOLUME_DIFY_PREFIX: ${CLICKZETTA_VOLUME_DIFY_PREFIX:-dify_km}
|
||||
S3_ENDPOINT: ${S3_ENDPOINT:-}
|
||||
S3_REGION: ${S3_REGION:-us-east-1}
|
||||
S3_BUCKET_NAME: ${S3_BUCKET_NAME:-difyai}
|
||||
|
|
@ -313,6 +317,18 @@ x-shared-env: &shared-api-worker-env
|
|||
TABLESTORE_ACCESS_KEY_ID: ${TABLESTORE_ACCESS_KEY_ID:-xxx}
|
||||
TABLESTORE_ACCESS_KEY_SECRET: ${TABLESTORE_ACCESS_KEY_SECRET:-xxx}
|
||||
TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE: ${TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE:-false}
|
||||
CLICKZETTA_USERNAME: ${CLICKZETTA_USERNAME:-}
|
||||
CLICKZETTA_PASSWORD: ${CLICKZETTA_PASSWORD:-}
|
||||
CLICKZETTA_INSTANCE: ${CLICKZETTA_INSTANCE:-}
|
||||
CLICKZETTA_SERVICE: ${CLICKZETTA_SERVICE:-api.clickzetta.com}
|
||||
CLICKZETTA_WORKSPACE: ${CLICKZETTA_WORKSPACE:-quick_start}
|
||||
CLICKZETTA_VCLUSTER: ${CLICKZETTA_VCLUSTER:-default_ap}
|
||||
CLICKZETTA_SCHEMA: ${CLICKZETTA_SCHEMA:-dify}
|
||||
CLICKZETTA_BATCH_SIZE: ${CLICKZETTA_BATCH_SIZE:-100}
|
||||
CLICKZETTA_ENABLE_INVERTED_INDEX: ${CLICKZETTA_ENABLE_INVERTED_INDEX:-true}
|
||||
CLICKZETTA_ANALYZER_TYPE: ${CLICKZETTA_ANALYZER_TYPE:-chinese}
|
||||
CLICKZETTA_ANALYZER_MODE: ${CLICKZETTA_ANALYZER_MODE:-smart}
|
||||
CLICKZETTA_VECTOR_DISTANCE_FUNCTION: ${CLICKZETTA_VECTOR_DISTANCE_FUNCTION:-cosine_distance}
|
||||
UPLOAD_FILE_SIZE_LIMIT: ${UPLOAD_FILE_SIZE_LIMIT:-15}
|
||||
UPLOAD_FILE_BATCH_LIMIT: ${UPLOAD_FILE_BATCH_LIMIT:-5}
|
||||
ETL_TYPE: ${ETL_TYPE:-dify}
|
||||
|
|
|
|||
|
|
@ -1,41 +0,0 @@
|
|||
'use client'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import {
|
||||
RiAddLine,
|
||||
RiArrowRightLine,
|
||||
} from '@remixicon/react'
|
||||
import Link from 'next/link'
|
||||
|
||||
type CreateAppCardProps = {
|
||||
ref?: React.Ref<HTMLAnchorElement>
|
||||
}
|
||||
|
||||
const CreateAppCard = ({ ref }: CreateAppCardProps) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
return (
|
||||
<div className='bg-background-default-dimm flex min-h-[160px] flex-col rounded-xl border-[0.5px]
|
||||
border-components-panel-border transition-all duration-200 ease-in-out'
|
||||
>
|
||||
<Link ref={ref} className='group flex grow cursor-pointer items-start p-4' href='/datasets/create'>
|
||||
<div className='flex items-center gap-3'>
|
||||
<div className='flex h-10 w-10 items-center justify-center rounded-lg border border-dashed border-divider-regular bg-background-default-lighter
|
||||
p-2 group-hover:border-solid group-hover:border-effects-highlight group-hover:bg-background-default-dodge'
|
||||
>
|
||||
<RiAddLine className='h-4 w-4 text-text-tertiary group-hover:text-text-accent' />
|
||||
</div>
|
||||
<div className='system-md-semibold text-text-secondary group-hover:text-text-accent'>{t('dataset.createDataset')}</div>
|
||||
</div>
|
||||
</Link>
|
||||
<div className='system-xs-regular p-4 pt-0 text-text-tertiary'>{t('dataset.createDatasetIntro')}</div>
|
||||
<Link className='group flex cursor-pointer items-center gap-1 rounded-b-xl border-t-[0.5px] border-divider-subtle p-4' href='/datasets/connect'>
|
||||
<div className='system-xs-medium text-text-tertiary group-hover:text-text-accent'>{t('dataset.connectDataset')}</div>
|
||||
<RiArrowRightLine className='h-3.5 w-3.5 text-text-tertiary group-hover:text-text-accent' />
|
||||
</Link>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
CreateAppCard.displayName = 'CreateAppCard'
|
||||
|
||||
export default CreateAppCard
|
||||
|
|
@ -106,8 +106,8 @@ const Uploader: FC<Props> = ({
|
|||
<div className='flex w-full items-center justify-center space-x-2'>
|
||||
<RiUploadCloud2Line className='h-6 w-6 text-text-tertiary' />
|
||||
<div className='text-text-tertiary'>
|
||||
{t('datasetCreation.stepOne.uploader.button')}
|
||||
<span className='cursor-pointer pl-1 text-text-accent' onClick={selectHandle}>{t('datasetDocuments.list.batchModal.browse')}</span>
|
||||
{t('app.dslUploader.button')}
|
||||
<span className='cursor-pointer pl-1 text-text-accent' onClick={selectHandle}>{t('app.dslUploader.browse')}</span>
|
||||
</div>
|
||||
</div>
|
||||
{dragging && <div ref={dragRef} className='absolute left-0 top-0 h-full w-full' />}
|
||||
|
|
|
|||
|
|
@ -370,20 +370,14 @@ const AppCard = ({ app, onRefresh }: AppCardProps) => {
|
|||
{app.description}
|
||||
</div>
|
||||
</div>
|
||||
<div className={cn(
|
||||
'absolute bottom-1 left-0 right-0 h-[42px] shrink-0 items-center pb-[6px] pl-[14px] pr-[6px] pt-1',
|
||||
tags.length ? 'flex' : '!hidden group-hover:!flex',
|
||||
)}>
|
||||
<div className='absolute bottom-1 left-0 right-0 flex h-[42px] shrink-0 items-center pb-[6px] pl-[14px] pr-[6px] pt-1'>
|
||||
{isCurrentWorkspaceEditor && (
|
||||
<>
|
||||
<div className={cn('flex w-0 grow items-center gap-1')} onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
e.preventDefault()
|
||||
}}>
|
||||
<div className={cn(
|
||||
'mr-[41px] w-full grow group-hover:!mr-0 group-hover:!block',
|
||||
tags.length ? '!block' : '!hidden',
|
||||
)}>
|
||||
<div className='mr-[41px] w-full grow group-hover:!mr-0'>
|
||||
<TagSelector
|
||||
position='bl'
|
||||
type='app'
|
||||
|
|
@ -395,7 +389,7 @@ const AppCard = ({ app, onRefresh }: AppCardProps) => {
|
|||
/>
|
||||
</div>
|
||||
</div>
|
||||
<div className='mx-1 !hidden h-[14px] w-[1px] shrink-0 group-hover:!flex' />
|
||||
<div className='mx-1 !hidden h-[14px] w-[1px] shrink-0 bg-divider-regular group-hover:!flex' />
|
||||
<div className='!hidden shrink-0 group-hover:!flex'>
|
||||
<CustomPopover
|
||||
htmlContent={<Operations />}
|
||||
|
|
|
|||
|
|
@ -284,9 +284,9 @@ const Chat: FC<ChatProps> = ({
|
|||
{
|
||||
!noStopResponding && isResponding && (
|
||||
<div className='mb-2 flex justify-center'>
|
||||
<Button onClick={onStopResponding}>
|
||||
<StopCircle className='mr-[5px] h-3.5 w-3.5 text-gray-500' />
|
||||
<span className='text-xs font-normal text-gray-500'>{t('appDebug.operation.stopResponding')}</span>
|
||||
<Button className='border-components-panel-border bg-components-panel-bg text-components-button-secondary-text' onClick={onStopResponding}>
|
||||
<StopCircle className='mr-[5px] h-3.5 w-3.5' />
|
||||
<span className='text-xs font-normal'>{t('appDebug.operation.stopResponding')}</span>
|
||||
</Button>
|
||||
</div>
|
||||
)
|
||||
|
|
|
|||
|
|
@ -313,7 +313,7 @@ const FileUploader = ({
|
|||
<RiUploadCloud2Line className='mr-2 size-5' />
|
||||
|
||||
<span>
|
||||
{t('datasetCreation.stepOne.uploader.button')}
|
||||
{notSupportBatchUpload ? t('datasetCreation.stepOne.uploader.buttonSingleFile') : t('datasetCreation.stepOne.uploader.button')}
|
||||
{supportTypes.length > 0 && (
|
||||
<label className="ml-1 cursor-pointer text-text-accent" onClick={selectHandle}>{t('datasetCreation.stepOne.uploader.browse')}</label>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
'use client'
|
||||
import { useEffect, useState } from 'react'
|
||||
import { useEffect, useMemo, useState } from 'react'
|
||||
import { useContext } from 'use-context-selector'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { RiListUnordered } from '@remixicon/react'
|
||||
import { RiCloseLine, RiListUnordered } from '@remixicon/react'
|
||||
import TemplateEn from './template/template.en.mdx'
|
||||
import TemplateZh from './template/template.zh.mdx'
|
||||
import TemplateJa from './template/template.ja.mdx'
|
||||
|
|
@ -30,6 +30,7 @@ const Doc = ({ appDetail }: IDocProps) => {
|
|||
const { t } = useTranslation()
|
||||
const [toc, setToc] = useState<Array<{ href: string; text: string }>>([])
|
||||
const [isTocExpanded, setIsTocExpanded] = useState(false)
|
||||
const [activeSection, setActiveSection] = useState<string>('')
|
||||
const { theme } = useTheme()
|
||||
|
||||
const variables = appDetail?.model_config?.configs?.prompt_variables || []
|
||||
|
|
@ -59,13 +60,43 @@ const Doc = ({ appDetail }: IDocProps) => {
|
|||
return null
|
||||
}).filter((item): item is { href: string; text: string } => item !== null)
|
||||
setToc(tocItems)
|
||||
if (tocItems.length > 0)
|
||||
setActiveSection(tocItems[0].href.replace('#', ''))
|
||||
}
|
||||
}
|
||||
|
||||
// Run after component has rendered
|
||||
setTimeout(extractTOC, 0)
|
||||
}, [appDetail, locale])
|
||||
|
||||
useEffect(() => {
|
||||
const handleScroll = () => {
|
||||
const scrollContainer = document.querySelector('.overflow-auto')
|
||||
if (!scrollContainer || toc.length === 0)
|
||||
return
|
||||
|
||||
let currentSection = ''
|
||||
toc.forEach((item) => {
|
||||
const targetId = item.href.replace('#', '')
|
||||
const element = document.getElementById(targetId)
|
||||
if (element) {
|
||||
const rect = element.getBoundingClientRect()
|
||||
if (rect.top <= window.innerHeight / 2)
|
||||
currentSection = targetId
|
||||
}
|
||||
})
|
||||
|
||||
if (currentSection && currentSection !== activeSection)
|
||||
setActiveSection(currentSection)
|
||||
}
|
||||
|
||||
const scrollContainer = document.querySelector('.overflow-auto')
|
||||
if (scrollContainer) {
|
||||
scrollContainer.addEventListener('scroll', handleScroll)
|
||||
handleScroll()
|
||||
return () => scrollContainer.removeEventListener('scroll', handleScroll)
|
||||
}
|
||||
}, [toc, activeSection])
|
||||
|
||||
const handleTocClick = (e: React.MouseEvent<HTMLAnchorElement>, item: { href: string; text: string }) => {
|
||||
e.preventDefault()
|
||||
const targetId = item.href.replace('#', '')
|
||||
|
|
@ -82,94 +113,128 @@ const Doc = ({ appDetail }: IDocProps) => {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Template = useMemo(() => {
|
||||
if (appDetail?.mode === 'chat' || appDetail?.mode === 'agent-chat') {
|
||||
switch (locale) {
|
||||
case LanguagesSupported[1]:
|
||||
return <TemplateChatZh appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
case LanguagesSupported[7]:
|
||||
return <TemplateChatJa appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
default:
|
||||
return <TemplateChatEn appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
}
|
||||
}
|
||||
if (appDetail?.mode === 'advanced-chat') {
|
||||
switch (locale) {
|
||||
case LanguagesSupported[1]:
|
||||
return <TemplateAdvancedChatZh appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
case LanguagesSupported[7]:
|
||||
return <TemplateAdvancedChatJa appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
default:
|
||||
return <TemplateAdvancedChatEn appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
}
|
||||
}
|
||||
if (appDetail?.mode === 'workflow') {
|
||||
switch (locale) {
|
||||
case LanguagesSupported[1]:
|
||||
return <TemplateWorkflowZh appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
case LanguagesSupported[7]:
|
||||
return <TemplateWorkflowJa appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
default:
|
||||
return <TemplateWorkflowEn appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
}
|
||||
}
|
||||
if (appDetail?.mode === 'completion') {
|
||||
switch (locale) {
|
||||
case LanguagesSupported[1]:
|
||||
return <TemplateZh appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
case LanguagesSupported[7]:
|
||||
return <TemplateJa appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
default:
|
||||
return <TemplateEn appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
}
|
||||
}
|
||||
return null
|
||||
}, [appDetail, locale, variables, inputs])
|
||||
|
||||
return (
|
||||
<div className="flex">
|
||||
<div className={`fixed right-8 top-32 z-10 transition-all ${isTocExpanded ? 'w-64' : 'w-10'}`}>
|
||||
<div className={`fixed right-20 top-32 z-10 transition-all duration-150 ease-out ${isTocExpanded ? 'w-[280px]' : 'w-11'}`}>
|
||||
{isTocExpanded
|
||||
? (
|
||||
<nav className="toc max-h-[calc(100vh-150px)] w-full overflow-y-auto rounded-lg border border-components-panel-border bg-components-panel-bg p-4 shadow-md">
|
||||
<div className="mb-4 flex items-center justify-between">
|
||||
<h3 className="text-lg font-semibold text-text-primary">{t('appApi.develop.toc')}</h3>
|
||||
<nav className="toc flex max-h-[calc(100vh-150px)] w-full flex-col overflow-hidden rounded-xl border-[0.5px] border-components-panel-border bg-background-default-hover shadow-xl">
|
||||
<div className="relative z-10 flex items-center justify-between border-b border-components-panel-border-subtle bg-background-default-hover px-4 py-2.5">
|
||||
<span className="text-xs font-medium uppercase tracking-wide text-text-tertiary">
|
||||
{t('appApi.develop.toc')}
|
||||
</span>
|
||||
<button
|
||||
onClick={() => setIsTocExpanded(false)}
|
||||
className="text-text-tertiary hover:text-text-secondary"
|
||||
className="group flex h-6 w-6 items-center justify-center rounded-md transition-colors hover:bg-state-base-hover"
|
||||
aria-label="Close"
|
||||
>
|
||||
✕
|
||||
<RiCloseLine className="h-3 w-3 text-text-quaternary transition-colors group-hover:text-text-secondary" />
|
||||
</button>
|
||||
</div>
|
||||
<ul className="space-y-2">
|
||||
{toc.map((item, index) => (
|
||||
<li key={index}>
|
||||
<a
|
||||
href={item.href}
|
||||
className="text-text-secondary transition-colors duration-200 hover:text-text-primary hover:underline"
|
||||
onClick={e => handleTocClick(e, item)}
|
||||
>
|
||||
{item.text}
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
|
||||
<div className="from-components-panel-border-subtle/20 pointer-events-none absolute left-0 right-0 top-[41px] z-10 h-2 bg-gradient-to-b to-transparent"></div>
|
||||
<div className="pointer-events-none absolute left-0 right-0 top-[43px] z-10 h-3 bg-gradient-to-b from-background-default-hover to-transparent"></div>
|
||||
|
||||
<div className="relative flex-1 overflow-y-auto px-3 py-3 pt-1">
|
||||
{toc.length === 0 ? (
|
||||
<div className="px-2 py-8 text-center text-xs text-text-quaternary">
|
||||
{t('appApi.develop.noContent')}
|
||||
</div>
|
||||
) : (
|
||||
<ul className="space-y-0.5">
|
||||
{toc.map((item, index) => {
|
||||
const isActive = activeSection === item.href.replace('#', '')
|
||||
return (
|
||||
<li key={index}>
|
||||
<a
|
||||
href={item.href}
|
||||
onClick={e => handleTocClick(e, item)}
|
||||
className={cn(
|
||||
'group relative flex items-center rounded-md px-3 py-2 text-[13px] transition-all duration-200',
|
||||
isActive
|
||||
? 'bg-state-base-hover font-medium text-text-primary'
|
||||
: 'text-text-tertiary hover:bg-state-base-hover hover:text-text-secondary',
|
||||
)}
|
||||
>
|
||||
<span
|
||||
className={cn(
|
||||
'mr-2 h-1.5 w-1.5 rounded-full transition-all duration-200',
|
||||
isActive
|
||||
? 'scale-100 bg-text-accent'
|
||||
: 'scale-75 bg-components-panel-border',
|
||||
)}
|
||||
/>
|
||||
<span className="flex-1 truncate">
|
||||
{item.text}
|
||||
</span>
|
||||
</a>
|
||||
</li>
|
||||
)
|
||||
})}
|
||||
</ul>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="pointer-events-none absolute bottom-0 left-0 right-0 z-10 h-4 rounded-b-xl bg-gradient-to-t from-background-default-hover to-transparent"></div>
|
||||
</nav>
|
||||
)
|
||||
: (
|
||||
<button
|
||||
onClick={() => setIsTocExpanded(true)}
|
||||
className="flex h-10 w-10 items-center justify-center rounded-full border border-components-panel-border bg-components-button-secondary-bg shadow-md transition-colors duration-200 hover:bg-components-button-secondary-bg-hover"
|
||||
className="group flex h-11 w-11 items-center justify-center rounded-full border-[0.5px] border-components-panel-border bg-components-panel-bg shadow-lg transition-all duration-150 hover:bg-background-default-hover hover:shadow-xl"
|
||||
aria-label="Open table of contents"
|
||||
>
|
||||
<RiListUnordered className="h-6 w-6 text-components-button-secondary-text" />
|
||||
<RiListUnordered className="h-5 w-5 text-text-tertiary transition-colors group-hover:text-text-secondary" />
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
<article className={cn('prose-xl prose', theme === Theme.dark && 'prose-invert')} >
|
||||
{(appDetail?.mode === 'chat' || appDetail?.mode === 'agent-chat') && (
|
||||
(() => {
|
||||
switch (locale) {
|
||||
case LanguagesSupported[1]:
|
||||
return <TemplateChatZh appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
case LanguagesSupported[7]:
|
||||
return <TemplateChatJa appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
default:
|
||||
return <TemplateChatEn appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
}
|
||||
})()
|
||||
)}
|
||||
{appDetail?.mode === 'advanced-chat' && (
|
||||
(() => {
|
||||
switch (locale) {
|
||||
case LanguagesSupported[1]:
|
||||
return <TemplateAdvancedChatZh appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
case LanguagesSupported[7]:
|
||||
return <TemplateAdvancedChatJa appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
default:
|
||||
return <TemplateAdvancedChatEn appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
}
|
||||
})()
|
||||
)}
|
||||
{appDetail?.mode === 'workflow' && (
|
||||
(() => {
|
||||
switch (locale) {
|
||||
case LanguagesSupported[1]:
|
||||
return <TemplateWorkflowZh appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
case LanguagesSupported[7]:
|
||||
return <TemplateWorkflowJa appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
default:
|
||||
return <TemplateWorkflowEn appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
}
|
||||
})()
|
||||
)}
|
||||
{appDetail?.mode === 'completion' && (
|
||||
(() => {
|
||||
switch (locale) {
|
||||
case LanguagesSupported[1]:
|
||||
return <TemplateZh appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
case LanguagesSupported[7]:
|
||||
return <TemplateJa appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
default:
|
||||
return <TemplateEn appDetail={appDetail} variables={variables} inputs={inputs} />
|
||||
}
|
||||
})()
|
||||
)}
|
||||
<article className={cn('prose-xl prose', theme === Theme.dark && 'prose-invert')}>
|
||||
{Template}
|
||||
</article>
|
||||
</div>
|
||||
)
|
||||
|
|
|
|||
|
|
@ -448,7 +448,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
|||
url='/text-to-audio'
|
||||
method='POST'
|
||||
title='テキストから音声'
|
||||
name='#audio'
|
||||
name='#text-to-audio'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
|
|||
|
|
@ -423,7 +423,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
|
|||
url='/text-to-audio'
|
||||
method='POST'
|
||||
title='文字转语音'
|
||||
name='#audio'
|
||||
name='#text-to-audio'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
|
|||
|
|
@ -1136,7 +1136,7 @@ Chat applications support session persistence, allowing previous chat history to
|
|||
url='/audio-to-text'
|
||||
method='POST'
|
||||
title='Speech to Text'
|
||||
name='#audio'
|
||||
name='#audio-to-text'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
@ -1187,7 +1187,7 @@ Chat applications support session persistence, allowing previous chat history to
|
|||
url='/text-to-audio'
|
||||
method='POST'
|
||||
title='Text to Audio'
|
||||
name='#audio'
|
||||
name='#text-to-audio'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
|
|||
|
|
@ -1136,7 +1136,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
|||
url='/audio-to-text'
|
||||
method='POST'
|
||||
title='音声からテキストへ'
|
||||
name='#audio'
|
||||
name='#audio-to-text'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
@ -1187,7 +1187,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
|||
url='/text-to-audio'
|
||||
method='POST'
|
||||
title='テキストから音声へ'
|
||||
name='#audio'
|
||||
name='#text-to-audio'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
|
|||
|
|
@ -1174,7 +1174,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
|
|||
url='/audio-to-text'
|
||||
method='POST'
|
||||
title='语音转文字'
|
||||
name='#audio'
|
||||
name='#audio-to-text'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
@ -1222,7 +1222,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
|
|||
url='/text-to-audio'
|
||||
method='POST'
|
||||
title='文字转语音'
|
||||
name='#audio'
|
||||
name='#text-to-audio'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
|
|||
|
|
@ -1170,7 +1170,7 @@ Chat applications support session persistence, allowing previous chat history to
|
|||
url='/audio-to-text'
|
||||
method='POST'
|
||||
title='Speech to Text'
|
||||
name='#audio'
|
||||
name='#audio-to-text'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
@ -1221,7 +1221,7 @@ Chat applications support session persistence, allowing previous chat history to
|
|||
url='/text-to-audio'
|
||||
method='POST'
|
||||
title='Text to Audio'
|
||||
name='#audio'
|
||||
name='#text-to-audio'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
|
|||
|
|
@ -1169,7 +1169,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
|||
url='/audio-to-text'
|
||||
method='POST'
|
||||
title='音声からテキストへ'
|
||||
name='#audio'
|
||||
name='#audio-to-text'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
@ -1220,7 +1220,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
|
|||
url='/text-to-audio'
|
||||
method='POST'
|
||||
title='テキストから音声へ'
|
||||
name='#audio'
|
||||
name='#text-to-audio'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
|
|||
|
|
@ -1185,7 +1185,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
|
|||
url='/audio-to-text'
|
||||
method='POST'
|
||||
title='语音转文字'
|
||||
name='#audio'
|
||||
name='#audio-to-text'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
@ -1233,7 +1233,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
|
|||
url='/text-to-audio'
|
||||
method='POST'
|
||||
title='文字转语音'
|
||||
name='#audio'
|
||||
name='#text-to-audio'
|
||||
/>
|
||||
<Row>
|
||||
<Col>
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ const DebugAndPreview = () => {
|
|||
<div
|
||||
ref={containerRef}
|
||||
className={cn(
|
||||
'relative flex h-full flex-col rounded-l-2xl border border-r-0 border-components-panel-border bg-chatbot-bg shadow-xl',
|
||||
'relative flex h-full flex-col rounded-l-2xl border border-r-0 border-components-panel-border bg-components-panel-bg shadow-xl',
|
||||
)}
|
||||
style={{ width: `${panelWidth}px` }}
|
||||
>
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ const ShortcutsName = ({
|
|||
keys.map(key => (
|
||||
<div
|
||||
key={key}
|
||||
className='system-kbd flex h-4 w-4 items-center justify-center rounded-[4px] bg-components-kbd-bg-gray capitalize'
|
||||
className='system-kbd flex h-4 min-w-4 items-center justify-center rounded-[4px] bg-components-kbd-bg-gray capitalize'
|
||||
>
|
||||
{getKeyboardKeyNameBySystem(key)}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
@use '../../themes/light';
|
||||
@use '../../themes/dark';
|
||||
@use '../../themes/markdown-light';
|
||||
@use '../../themes/markdown-dark';
|
||||
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ const translation = {
|
|||
after: '',
|
||||
},
|
||||
},
|
||||
contentEnableLabel: 'Moderater Inhalt aktiviert',
|
||||
contentEnableLabel: 'Inhaltsmoderation aktiviert',
|
||||
},
|
||||
fileUpload: {
|
||||
title: 'Datei-Upload',
|
||||
|
|
|
|||
|
|
@ -166,6 +166,10 @@ const translation = {
|
|||
description: 'Gibt an, ob das web app Symbol zum Ersetzen 🤖 in der freigegebenen Anwendung verwendet werden soll',
|
||||
},
|
||||
importFromDSLUrlPlaceholder: 'DSL-Link hier einfügen',
|
||||
dslUploader: {
|
||||
button: 'Datei per Drag & Drop ablegen oder',
|
||||
browse: 'Durchsuchen',
|
||||
},
|
||||
duplicate: 'Duplikat',
|
||||
importFromDSL: 'Import von DSL',
|
||||
importDSL: 'DSL-Datei importieren',
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ const translation = {
|
|||
uploader: {
|
||||
title: 'Textdatei hochladen',
|
||||
button: 'Dateien und Ordner hierher ziehen oder klicken',
|
||||
buttonSingleFile: 'Datei hierher ziehen oder klicken',
|
||||
browse: 'Durchsuchen',
|
||||
tip: 'Unterstützt {{supportTypes}}. Maximal {{size}}MB pro Datei.',
|
||||
validation: {
|
||||
|
|
|
|||
|
|
@ -287,6 +287,18 @@ const translation = {
|
|||
zoomTo50: 'Auf 50% vergrößern',
|
||||
zoomTo100: 'Auf 100% vergrößern',
|
||||
zoomToFit: 'An Bildschirm anpassen',
|
||||
selectionAlignment: 'Ausrichtung der Auswahl',
|
||||
alignLeft: 'Links',
|
||||
alignTop: 'Nach oben',
|
||||
distributeVertical: 'Vertikaler Raum',
|
||||
alignBottom: 'Unteres',
|
||||
distributeHorizontal: 'Horizontaler Raum',
|
||||
vertical: 'Senkrecht',
|
||||
alignMiddle: 'Mitte',
|
||||
alignCenter: 'Mitte',
|
||||
alignRight: 'Rechts',
|
||||
alignNodes: 'Knoten ausrichten',
|
||||
horizontal: 'Horizontal',
|
||||
},
|
||||
panel: {
|
||||
userInputField: 'Benutzereingabefeld',
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ const translation = {
|
|||
moderation: {
|
||||
title: 'Content moderation',
|
||||
description: 'Secure model output by using moderation API or maintaining a sensitive word list.',
|
||||
contentEnableLabel: 'Enabled moderate content',
|
||||
contentEnableLabel: 'Content moderation enabled',
|
||||
allEnabled: 'INPUT & OUTPUT',
|
||||
inputEnabled: 'INPUT',
|
||||
outputEnabled: 'OUTPUT',
|
||||
|
|
|
|||
|
|
@ -23,6 +23,10 @@ const translation = {
|
|||
importFromDSLFile: 'From DSL file',
|
||||
importFromDSLUrl: 'From URL',
|
||||
importFromDSLUrlPlaceholder: 'Paste DSL link here',
|
||||
dslUploader: {
|
||||
button: 'Drag and drop file, or',
|
||||
browse: 'Browse',
|
||||
},
|
||||
deleteAppConfirmTitle: 'Delete this app?',
|
||||
deleteAppConfirmContent:
|
||||
'Deleting the app is irreversible. Users will no longer be able to access your app, and all prompt configurations and logs will be permanently deleted.',
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ const translation = {
|
|||
uploader: {
|
||||
title: 'Upload file',
|
||||
button: 'Drag and drop file or folder, or',
|
||||
buttonSingleFile: 'Drag and drop file, or',
|
||||
browse: 'Browse',
|
||||
tip: 'Supports {{supportTypes}}. Max {{batchCount}} in a batch and {{size}} MB each.',
|
||||
validation: {
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ const translation = {
|
|||
after: '',
|
||||
},
|
||||
},
|
||||
contentEnableLabel: 'Contenido moderado habilitado',
|
||||
contentEnableLabel: 'Moderación de contenido habilitada',
|
||||
},
|
||||
fileUpload: {
|
||||
title: 'Subida de archivos',
|
||||
|
|
|
|||
|
|
@ -170,6 +170,10 @@ const translation = {
|
|||
},
|
||||
importFromDSLUrl: 'URL de origen',
|
||||
importFromDSLUrlPlaceholder: 'Pegar enlace DSL aquí',
|
||||
dslUploader: {
|
||||
button: 'Arrastrar y soltar archivo, o',
|
||||
browse: 'Examinar',
|
||||
},
|
||||
importFromDSL: 'Importar desde DSL',
|
||||
importFromDSLFile: 'Desde el archivo DSL',
|
||||
mermaid: {
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ const translation = {
|
|||
uploader: {
|
||||
title: 'Cargar archivo',
|
||||
button: 'Arrastre y suelte archivos o carpetas, o',
|
||||
buttonSingleFile: 'Arrastre y suelte archivo, o',
|
||||
browse: 'Buscar',
|
||||
tip: 'Soporta {{supportTypes}}. Máximo {{size}}MB cada uno.',
|
||||
validation: {
|
||||
|
|
|
|||
|
|
@ -287,6 +287,18 @@ const translation = {
|
|||
zoomTo50: 'Zoom al 50%',
|
||||
zoomTo100: 'Zoom al 100%',
|
||||
zoomToFit: 'Ajustar al tamaño',
|
||||
alignTop: 'Arriba',
|
||||
alignBottom: 'Fondo',
|
||||
alignNodes: 'Alinear nodos',
|
||||
alignCenter: 'Centro',
|
||||
selectionAlignment: 'Alineación de selección',
|
||||
horizontal: 'Horizontal',
|
||||
distributeHorizontal: 'Espaciar horizontalmente',
|
||||
vertical: 'Vertical',
|
||||
distributeVertical: 'Espaciar verticalmente',
|
||||
alignMiddle: 'medio',
|
||||
alignLeft: 'izquierdo',
|
||||
alignRight: 'derecho',
|
||||
},
|
||||
panel: {
|
||||
userInputField: 'Campo de entrada del usuario',
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ const translation = {
|
|||
after: '',
|
||||
},
|
||||
},
|
||||
contentEnableLabel: 'محتوای متوسط فعال شده است',
|
||||
contentEnableLabel: 'مدیریت محتوا فعال شده است',
|
||||
},
|
||||
generate: {
|
||||
title: 'تولید کننده دستورالعمل',
|
||||
|
|
|
|||
|
|
@ -19,6 +19,10 @@ const translation = {
|
|||
importFromDSLFile: 'از فایل DSL',
|
||||
importFromDSLUrl: 'از URL',
|
||||
importFromDSLUrlPlaceholder: 'لینک DSL را اینجا بچسبانید',
|
||||
dslUploader: {
|
||||
button: 'فایل را بکشید و رها کنید، یا',
|
||||
browse: 'مرور',
|
||||
},
|
||||
deleteAppConfirmTitle: 'آیا این برنامه حذف شود؟',
|
||||
deleteAppConfirmContent:
|
||||
'حذف برنامه غیرقابل برگشت است. کاربران دیگر قادر به دسترسی به برنامه شما نخواهند بود و تمام تنظیمات و گزارشات درخواستها به صورت دائم حذف خواهند شد.',
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ const translation = {
|
|||
uploader: {
|
||||
title: 'بارگذاری فایل',
|
||||
button: 'فایل ها یا پوشه ها را بکشید و رها کنید یا',
|
||||
buttonSingleFile: 'فایل را بکشید و رها کنید یا',
|
||||
browse: 'مرور',
|
||||
tip: 'پشتیبانی از {{supportTypes}}. حداکثر {{size}}MB هر کدام.',
|
||||
validation: {
|
||||
|
|
|
|||
|
|
@ -287,6 +287,18 @@ const translation = {
|
|||
zoomTo50: 'بزرگنمایی به 50%',
|
||||
zoomTo100: 'بزرگنمایی به 100%',
|
||||
zoomToFit: 'تناسب با اندازه',
|
||||
horizontal: 'افقی',
|
||||
alignBottom: 'پایین',
|
||||
alignRight: 'راست',
|
||||
vertical: 'عمودی',
|
||||
alignCenter: 'مرکز',
|
||||
alignLeft: 'چپ',
|
||||
distributeVertical: 'فضا عمودی',
|
||||
distributeHorizontal: 'فضا به صورت افقی',
|
||||
alignTop: 'بالا',
|
||||
alignNodes: 'تراز کردن گره ها',
|
||||
selectionAlignment: 'تراز انتخاب',
|
||||
alignMiddle: 'میانه',
|
||||
},
|
||||
panel: {
|
||||
userInputField: 'فیلد ورودی کاربر',
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ const translation = {
|
|||
after: 'Sorry, but you didn\'t provide a text to translate. Could you please provide the text?',
|
||||
},
|
||||
},
|
||||
contentEnableLabel: 'Activation du contenu modéré',
|
||||
contentEnableLabel: 'Modération de contenu activée',
|
||||
},
|
||||
fileUpload: {
|
||||
title: 'Téléchargement de fichier',
|
||||
|
|
@ -242,7 +242,7 @@ const translation = {
|
|||
'Veuillez attendre que la réponse à la tâche en lot soit terminée.',
|
||||
notSelectModel: 'Veuillez choisir un modèle',
|
||||
waitForImgUpload: 'Veuillez attendre que l\'image soit téléchargée',
|
||||
waitForFileUpload: 'Veuillez patienter jusqu’à ce que le(s) fichier(s) soit/les fichiers à télécharger',
|
||||
waitForFileUpload: 'Veuillez patienter pendant le téléchargement du/des fichier(s)',
|
||||
},
|
||||
chatSubTitle: 'Instructions',
|
||||
completionSubTitle: 'Indicatif de Prompt',
|
||||
|
|
|
|||
|
|
@ -169,7 +169,11 @@ const translation = {
|
|||
descriptionInExplore: 'Utilisation de l’icône web app pour remplacer 🤖 dans Explore',
|
||||
},
|
||||
importFromDSLUrlPlaceholder: 'Collez le lien DSL ici',
|
||||
importFromDSL: 'Importation à partir d’une DSL',
|
||||
dslUploader: {
|
||||
button: 'Glisser-déposer un fichier, ou',
|
||||
browse: 'Parcourir',
|
||||
},
|
||||
importFromDSL: 'Importation à partir d\'une DSL',
|
||||
importFromDSLUrl: 'À partir de l’URL',
|
||||
importFromDSLFile: 'À partir d’un fichier DSL',
|
||||
mermaid: {
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ const translation = {
|
|||
uploader: {
|
||||
title: 'Télécharger le fichier texte',
|
||||
button: 'Faites glisser et déposez des fichiers ou des dossiers, ou',
|
||||
buttonSingleFile: 'Faites glisser et déposez un fichier, ou',
|
||||
browse: 'Parcourir',
|
||||
tip: 'Prend en charge {{supportTypes}}. Max {{size}}MB chacun.',
|
||||
validation: {
|
||||
|
|
|
|||
|
|
@ -287,6 +287,18 @@ const translation = {
|
|||
zoomTo50: 'Zoomer à 50%',
|
||||
zoomTo100: 'Zoomer à 100%',
|
||||
zoomToFit: 'Zoomer pour ajuster',
|
||||
alignBottom: 'Fond',
|
||||
alignLeft: 'Gauche',
|
||||
alignCenter: 'Centre',
|
||||
alignTop: 'Retour au début',
|
||||
alignNodes: 'Aligner les nœuds',
|
||||
distributeHorizontal: 'Espace horizontal',
|
||||
alignMiddle: 'Milieu',
|
||||
horizontal: 'Horizontal',
|
||||
selectionAlignment: 'Alignement de la sélection',
|
||||
alignRight: 'Droite',
|
||||
vertical: 'Vertical',
|
||||
distributeVertical: 'Espace vertical',
|
||||
},
|
||||
panel: {
|
||||
userInputField: 'Champ de saisie de l\'utilisateur',
|
||||
|
|
|
|||
|
|
@ -213,7 +213,7 @@ const translation = {
|
|||
after: 'में कॉन्फ़िगर किए गए ओपनएआई एपीआई कुंजी की आवश्यकता होती है।',
|
||||
},
|
||||
},
|
||||
contentEnableLabel: 'मध्य स्तर की सामग्री सक्षम की गई',
|
||||
contentEnableLabel: 'सामग्री मॉडरेशन सक्षम है',
|
||||
},
|
||||
fileUpload: {
|
||||
numberLimit: 'मैक्स अपलोड करता है',
|
||||
|
|
|
|||
|
|
@ -172,6 +172,10 @@ const translation = {
|
|||
importFromDSLUrl: 'यूआरएल से',
|
||||
importFromDSL: 'DSL से आयात करें',
|
||||
importFromDSLUrlPlaceholder: 'डीएसएल लिंक यहां पेस्ट करें',
|
||||
dslUploader: {
|
||||
button: 'फ़ाइल खींचकर छोड़ें, या',
|
||||
browse: 'ब्राउज़ करें',
|
||||
},
|
||||
mermaid: {
|
||||
handDrawn: 'हाथ खींचा',
|
||||
classic: 'क्लासिक',
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ const translation = {
|
|||
uploader: {
|
||||
title: 'फ़ाइल अपलोड करें',
|
||||
button: 'फ़ाइलों या फ़ोल्डरों को खींचें और छोड़ें, या',
|
||||
buttonSingleFile: 'फ़ाइल को खींचें और छोड़ें, या',
|
||||
browse: 'ब्राउज़ करें',
|
||||
tip: 'समर्थित {{supportTypes}}। प्रत्येक अधिकतम {{size}}MB।',
|
||||
validation: {
|
||||
|
|
|
|||
|
|
@ -298,6 +298,18 @@ const translation = {
|
|||
zoomTo50: '50% पर ज़ूम करें',
|
||||
zoomTo100: '100% पर ज़ूम करें',
|
||||
zoomToFit: 'फिट करने के लिए ज़ूम करें',
|
||||
alignRight: 'सही',
|
||||
alignLeft: 'बाईं ओर',
|
||||
alignTop: 'शीर्ष',
|
||||
horizontal: 'क्षैतिज',
|
||||
alignNodes: 'नोड्स को संरेखित करें',
|
||||
selectionAlignment: 'चयन संरेखण',
|
||||
alignCenter: 'केंद्र',
|
||||
vertical: 'ऊर्ध्वाधर',
|
||||
distributeHorizontal: 'क्षैतिज स्पेस',
|
||||
alignBottom: 'तल',
|
||||
distributeVertical: 'अंतरिक्ष को वर्टिकल रूप से',
|
||||
alignMiddle: 'मध्य',
|
||||
},
|
||||
panel: {
|
||||
userInputField: 'उपयोगकर्ता इनपुट फ़ील्ड',
|
||||
|
|
|
|||
|
|
@ -215,7 +215,7 @@ const translation = {
|
|||
after: '',
|
||||
},
|
||||
},
|
||||
contentEnableLabel: 'Abilitato il contenuto moderato',
|
||||
contentEnableLabel: 'Moderazione dei contenuti abilitata',
|
||||
},
|
||||
fileUpload: {
|
||||
title: 'Caricamento File',
|
||||
|
|
|
|||
|
|
@ -178,6 +178,10 @@ const translation = {
|
|||
importFromDSLFile: 'Da file DSL',
|
||||
importFromDSL: 'Importazione da DSL',
|
||||
importFromDSLUrlPlaceholder: 'Incolla qui il link DSL',
|
||||
dslUploader: {
|
||||
button: 'Trascina e rilascia il file, o',
|
||||
browse: 'Sfoglia',
|
||||
},
|
||||
mermaid: {
|
||||
handDrawn: 'Disegnato a mano',
|
||||
classic: 'Classico',
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ const translation = {
|
|||
uploader: {
|
||||
title: 'Carica file',
|
||||
button: 'Trascina e rilascia file o cartelle, oppure',
|
||||
buttonSingleFile: 'Trascina e rilascia un file, oppure',
|
||||
browse: 'Sfoglia',
|
||||
tip: 'Supporta {{supportTypes}}. Max {{size}}MB ciascuno.',
|
||||
validation: {
|
||||
|
|
|
|||
|
|
@ -301,6 +301,18 @@ const translation = {
|
|||
zoomTo50: 'Zoom al 50%',
|
||||
zoomTo100: 'Zoom al 100%',
|
||||
zoomToFit: 'Zoom per Adattare',
|
||||
alignRight: 'A destra',
|
||||
selectionAlignment: 'Allineamento della selezione',
|
||||
alignBottom: 'Fondoschiena',
|
||||
alignTop: 'In alto',
|
||||
vertical: 'Verticale',
|
||||
alignCenter: 'Centro',
|
||||
alignLeft: 'A sinistra',
|
||||
alignMiddle: 'Mezzo',
|
||||
horizontal: 'Orizzontale',
|
||||
alignNodes: 'Allinea nodi',
|
||||
distributeHorizontal: 'Spazia orizzontalmente',
|
||||
distributeVertical: 'Spazia verticalmente',
|
||||
},
|
||||
panel: {
|
||||
userInputField: 'Campo di Input Utente',
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ const translation = {
|
|||
moderation: {
|
||||
title: 'コンテンツのモデレーション',
|
||||
description: 'モデレーション API を使用するか、機密語リストを維持することで、モデルの出力を安全にします。',
|
||||
contentEnableLabel: 'モデレート・コンテンツを有効にする',
|
||||
contentEnableLabel: 'コンテンツモデレーションが有効',
|
||||
allEnabled: '入力/出力コンテンツが有効になっています',
|
||||
inputEnabled: '入力コンテンツが有効になっています',
|
||||
outputEnabled: '出力コンテンツが有効になっています',
|
||||
|
|
|
|||
|
|
@ -23,6 +23,10 @@ const translation = {
|
|||
importFromDSLFile: 'DSL ファイルから',
|
||||
importFromDSLUrl: 'URL から',
|
||||
importFromDSLUrlPlaceholder: 'DSL リンクをここに貼り付けます',
|
||||
dslUploader: {
|
||||
button: 'ファイルをドラッグ&ドロップするか、',
|
||||
browse: '参照',
|
||||
},
|
||||
deleteAppConfirmTitle: 'このアプリを削除しますか?',
|
||||
deleteAppConfirmContent:
|
||||
'アプリを削除すると、元に戻すことはできません。他のユーザーはもはやこのアプリにアクセスできず、すべてのプロンプトの設定とログが永久に削除されます。',
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ const translation = {
|
|||
uploader: {
|
||||
title: 'テキストファイルをアップロード',
|
||||
button: 'ファイルまたはフォルダをドラッグアンドドロップする',
|
||||
buttonSingleFile: 'ファイルをドラッグアンドドロップする',
|
||||
browse: '参照',
|
||||
tip: '{{supportTypes}}をサポートしています。1 つあたりの最大サイズは{{size}}MB です。',
|
||||
validation: {
|
||||
|
|
|
|||
|
|
@ -287,6 +287,18 @@ const translation = {
|
|||
zoomTo50: '50% サイズ',
|
||||
zoomTo100: '等倍表示',
|
||||
zoomToFit: '画面に合わせる',
|
||||
horizontal: '横',
|
||||
alignBottom: '底',
|
||||
alignNodes: 'ノードを整列させる',
|
||||
vertical: '垂直',
|
||||
alignLeft: '左',
|
||||
alignTop: 'トップ',
|
||||
alignRight: '右',
|
||||
alignMiddle: '中間',
|
||||
distributeVertical: '垂直にスペースを',
|
||||
alignCenter: 'センター',
|
||||
selectionAlignment: '選択の整列',
|
||||
distributeHorizontal: '空間を水平方向に',
|
||||
},
|
||||
variableReference: {
|
||||
noAvailableVars: '利用可能な変数がありません',
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ const translation = {
|
|||
after: '에 OpenAI API 키가 설정되어 있어야 합니다.',
|
||||
},
|
||||
},
|
||||
contentEnableLabel: '중간 콘텐츠 사용',
|
||||
contentEnableLabel: '콘텐츠 모더레이션이 활성화됨',
|
||||
},
|
||||
fileUpload: {
|
||||
title: '파일 업로드',
|
||||
|
|
|
|||
|
|
@ -189,6 +189,10 @@ const translation = {
|
|||
importFromDSLFile: 'DSL 파일에서',
|
||||
importFromDSLUrl: 'URL 에서',
|
||||
importFromDSLUrlPlaceholder: '여기에 DSL 링크 붙여 넣기',
|
||||
dslUploader: {
|
||||
button: '파일을 드래그 앤 드롭하거나',
|
||||
browse: '찾아보기',
|
||||
},
|
||||
mermaid: {
|
||||
handDrawn: '손으로 그린',
|
||||
classic: '고전',
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ const translation = {
|
|||
uploader: {
|
||||
title: '텍스트 파일 업로드',
|
||||
button: '파일이나 폴더를 끌어서 놓기',
|
||||
buttonSingleFile: '파일을 끌어서 놓기',
|
||||
browse: '찾아보기',
|
||||
tip: '{{supportTypes}}을 (를) 지원합니다. 파일당 최대 크기는 {{size}}MB 입니다.',
|
||||
validation: {
|
||||
|
|
|
|||
|
|
@ -308,6 +308,18 @@ const translation = {
|
|||
zoomTo50: '50% 로 확대',
|
||||
zoomTo100: '100% 로 확대',
|
||||
zoomToFit: '화면에 맞게 확대',
|
||||
alignCenter: '중',
|
||||
alignRight: '오른쪽',
|
||||
alignLeft: '왼쪽',
|
||||
vertical: '세로',
|
||||
alignTop: '맨 위로',
|
||||
alignMiddle: '중간',
|
||||
alignNodes: '노드 정렬',
|
||||
distributeVertical: '수직 공간',
|
||||
horizontal: '가로',
|
||||
selectionAlignment: '선택 정렬',
|
||||
alignBottom: '밑바닥',
|
||||
distributeHorizontal: '수평 공간',
|
||||
},
|
||||
panel: {
|
||||
userInputField: '사용자 입력 필드',
|
||||
|
|
|
|||
|
|
@ -173,6 +173,10 @@ const translation = {
|
|||
importFromDSLUrl: 'Z adresu URL',
|
||||
importFromDSLFile: 'Z pliku DSL',
|
||||
importFromDSLUrlPlaceholder: 'Wklej tutaj link DSL',
|
||||
dslUploader: {
|
||||
button: 'Przeciągnij i upuść plik, lub',
|
||||
browse: 'Przeglądaj',
|
||||
},
|
||||
mermaid: {
|
||||
handDrawn: 'Ręcznie rysowane',
|
||||
classic: 'Klasyczny',
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ const translation = {
|
|||
uploader: {
|
||||
title: 'Prześlij plik tekstowy',
|
||||
button: 'Przeciągnij i upuść pliki lub foldery lub',
|
||||
buttonSingleFile: 'Przeciągnij i upuść plik lub',
|
||||
browse: 'Przeglądaj',
|
||||
tip: 'Obsługuje {{supportTypes}}. Maksymalnie {{size}}MB każdy.',
|
||||
validation: {
|
||||
|
|
|
|||
|
|
@ -287,6 +287,18 @@ const translation = {
|
|||
zoomTo50: 'Powiększ do 50%',
|
||||
zoomTo100: 'Powiększ do 100%',
|
||||
zoomToFit: 'Dopasuj do ekranu',
|
||||
alignMiddle: 'Środek',
|
||||
alignTop: 'Do góry',
|
||||
distributeHorizontal: 'Odstęp w poziomie',
|
||||
alignCenter: 'Centrum',
|
||||
alignRight: 'Prawy',
|
||||
alignNodes: 'Wyrównywanie węzłów',
|
||||
selectionAlignment: 'Wyrównanie zaznaczenia',
|
||||
horizontal: 'Poziomy',
|
||||
distributeVertical: 'Przestrzeń w pionie',
|
||||
alignBottom: 'Dno',
|
||||
alignLeft: 'Lewy',
|
||||
vertical: 'Pionowy',
|
||||
},
|
||||
panel: {
|
||||
userInputField: 'Pole wprowadzania użytkownika',
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ const translation = {
|
|||
after: '',
|
||||
},
|
||||
},
|
||||
contentEnableLabel: 'Conteúdo moderado habilitado',
|
||||
contentEnableLabel: 'Moderação de conteúdo habilitada',
|
||||
},
|
||||
fileUpload: {
|
||||
title: 'Upload de Arquivo',
|
||||
|
|
|
|||
|
|
@ -169,6 +169,10 @@ const translation = {
|
|||
title: 'Use o ícone do web app para substituir 🤖',
|
||||
},
|
||||
importFromDSLUrlPlaceholder: 'Cole o link DSL aqui',
|
||||
dslUploader: {
|
||||
button: 'Arraste e solte o arquivo, ou',
|
||||
browse: 'Navegar',
|
||||
},
|
||||
importFromDSLUrl: 'Do URL',
|
||||
importFromDSLFile: 'Do arquivo DSL',
|
||||
importFromDSL: 'Importar de DSL',
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ const translation = {
|
|||
uploader: {
|
||||
title: 'Enviar arquivo de texto',
|
||||
button: 'Arraste e solte arquivos ou pastas, ou',
|
||||
buttonSingleFile: 'Arraste e solte um arquivo, ou',
|
||||
browse: 'Navegar',
|
||||
tip: 'Suporta {{supportTypes}}. Máximo de {{size}}MB cada.',
|
||||
validation: {
|
||||
|
|
|
|||
|
|
@ -287,6 +287,18 @@ const translation = {
|
|||
zoomTo50: 'Aproximar para 50%',
|
||||
zoomTo100: 'Aproximar para 100%',
|
||||
zoomToFit: 'Aproximar para ajustar',
|
||||
vertical: 'Vertical',
|
||||
alignNodes: 'Alinhar nós',
|
||||
selectionAlignment: 'Alinhamento de seleção',
|
||||
alignLeft: 'Esquerda',
|
||||
alignBottom: 'Fundo',
|
||||
distributeHorizontal: 'Espaço horizontalmente',
|
||||
alignMiddle: 'Meio',
|
||||
alignRight: 'Certo',
|
||||
horizontal: 'Horizontal',
|
||||
distributeVertical: 'Espaço Verticalmente',
|
||||
alignCenter: 'Centro',
|
||||
alignTop: 'Início',
|
||||
},
|
||||
panel: {
|
||||
userInputField: 'Campo de entrada do usuário',
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ const translation = {
|
|||
after: '',
|
||||
},
|
||||
},
|
||||
contentEnableLabel: 'Conținut moderat activat',
|
||||
contentEnableLabel: 'Moderarea conținutului activată',
|
||||
},
|
||||
fileUpload: {
|
||||
title: 'Încărcare fișier',
|
||||
|
|
|
|||
|
|
@ -171,6 +171,10 @@ const translation = {
|
|||
importFromDSL: 'Import din DSL',
|
||||
importFromDSLUrl: 'De la URL',
|
||||
importFromDSLUrlPlaceholder: 'Lipiți linkul DSL aici',
|
||||
dslUploader: {
|
||||
button: 'Trageți și plasați fișierul, sau',
|
||||
browse: 'Răsfoiți',
|
||||
},
|
||||
importFromDSLFile: 'Din fișierul DSL',
|
||||
mermaid: {
|
||||
handDrawn: 'Desenat de mână',
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ const translation = {
|
|||
uploader: {
|
||||
title: 'Încărcați fișier text',
|
||||
button: 'Trageți și plasați fișiere sau foldere sau',
|
||||
buttonSingleFile: 'Trageți și plasați un fișier sau',
|
||||
browse: 'Răsfoire',
|
||||
tip: 'Acceptă {{supportTypes}}. Maxim {{size}}MB fiecare.',
|
||||
validation: {
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue