diff --git a/.devcontainer/post_create_command.sh b/.devcontainer/post_create_command.sh index d3a5dc5595..a26fd076ed 100755 --- a/.devcontainer/post_create_command.sh +++ b/.devcontainer/post_create_command.sh @@ -1,17 +1,15 @@ #!/bin/bash WORKSPACE_ROOT=$(pwd) -npm add -g pnpm@10.15.0 corepack enable cd web && pnpm install pipx install uv echo "alias start-api=\"cd $WORKSPACE_ROOT/api && uv run python -m flask run --host 0.0.0.0 --port=5001 --debug\"" >> ~/.bashrc -echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion,plugin,workflow_storage\"" >> ~/.bashrc +echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P threads -c 1 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor\"" >> ~/.bashrc echo "alias start-web=\"cd $WORKSPACE_ROOT/web && pnpm dev\"" >> ~/.bashrc echo "alias start-web-prod=\"cd $WORKSPACE_ROOT/web && pnpm build && pnpm start\"" >> ~/.bashrc echo "alias start-containers=\"cd $WORKSPACE_ROOT/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env up -d\"" >> ~/.bashrc echo "alias stop-containers=\"cd $WORKSPACE_ROOT/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env down\"" >> ~/.bashrc source /home/vscode/.bashrc - diff --git a/.editorconfig b/.editorconfig index 374da0b5d2..be14939ddb 100644 --- a/.editorconfig +++ b/.editorconfig @@ -29,7 +29,7 @@ trim_trailing_whitespace = false # Matches multiple files with brace expansion notation # Set default charset -[*.{js,tsx}] +[*.{js,jsx,ts,tsx,mjs}] indent_style = space indent_size = 2 diff --git a/.github/workflows/api-tests.yml b/.github/workflows/api-tests.yml index 37d351627b..557d747a8c 100644 --- a/.github/workflows/api-tests.yml +++ b/.github/workflows/api-tests.yml @@ -62,7 +62,7 @@ jobs: compose-file: | docker/docker-compose.middleware.yaml services: | - db + db_postgres redis sandbox ssrf_proxy diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml index 2ce8a09a7d..81392a9734 100644 --- a/.github/workflows/autofix.yml +++ b/.github/workflows/autofix.yml @@ -28,6 +28,11 @@ jobs: # Format code uv run ruff format .. + - name: count migration progress + run: | + cd api + ./cnt_base.sh + - name: ast-grep run: | uvx --from ast-grep-cli sg --pattern 'db.session.query($WHATEVER).filter($HERE)' --rewrite 'db.session.query($WHATEVER).where($HERE)' -l py --update-all diff --git a/.github/workflows/db-migration-test.yml b/.github/workflows/db-migration-test.yml index b9961a4714..101d973466 100644 --- a/.github/workflows/db-migration-test.yml +++ b/.github/workflows/db-migration-test.yml @@ -8,7 +8,7 @@ concurrency: cancel-in-progress: true jobs: - db-migration-test: + db-migration-test-postgres: runs-on: ubuntu-latest steps: @@ -45,7 +45,7 @@ jobs: compose-file: | docker/docker-compose.middleware.yaml services: | - db + db_postgres redis - name: Prepare configs @@ -57,3 +57,60 @@ jobs: env: DEBUG: true run: uv run --directory api flask upgrade-db + + db-migration-test-mysql: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + persist-credentials: false + + - name: Setup UV and Python + uses: astral-sh/setup-uv@v6 + with: + enable-cache: true + python-version: "3.12" + cache-dependency-glob: api/uv.lock + + - name: Install dependencies + run: uv sync --project api + - name: Ensure Offline migration are supported + run: | + # upgrade + uv run --directory api flask db upgrade 'base:head' --sql + # downgrade + uv run --directory api flask db downgrade 'head:base' --sql + + - name: Prepare middleware env for MySQL + run: | + cd docker + cp middleware.env.example middleware.env + sed -i 's/DB_TYPE=postgresql/DB_TYPE=mysql/' middleware.env + sed -i 's/DB_HOST=db_postgres/DB_HOST=db_mysql/' middleware.env + sed -i 's/DB_PORT=5432/DB_PORT=3306/' middleware.env + sed -i 's/DB_USERNAME=postgres/DB_USERNAME=mysql/' middleware.env + + - name: Set up Middlewares + uses: hoverkraft-tech/compose-action@v2.0.2 + with: + compose-file: | + docker/docker-compose.middleware.yaml + services: | + db_mysql + redis + + - name: Prepare configs for MySQL + run: | + cd api + cp .env.example .env + sed -i 's/DB_TYPE=postgresql/DB_TYPE=mysql/' .env + sed -i 's/DB_PORT=5432/DB_PORT=3306/' .env + sed -i 's/DB_USERNAME=postgres/DB_USERNAME=root/' .env + + - name: Run DB Migration + env: + DEBUG: true + run: uv run --directory api flask upgrade-db diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 06584c1b78..e652657705 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -103,6 +103,11 @@ jobs: run: | pnpm run lint + - name: Web type check + if: steps.changed-files.outputs.any_changed == 'true' + working-directory: ./web + run: pnpm run type-check + docker-compose-template: name: Docker Compose Template runs-on: ubuntu-latest diff --git a/.github/workflows/vdb-tests.yml b/.github/workflows/vdb-tests.yml index f54f5d6c64..291171e5c7 100644 --- a/.github/workflows/vdb-tests.yml +++ b/.github/workflows/vdb-tests.yml @@ -51,13 +51,13 @@ jobs: - name: Expose Service Ports run: sh .github/workflows/expose_service_ports.sh - - name: Set up Vector Store (TiDB) - uses: hoverkraft-tech/compose-action@v2.0.2 - with: - compose-file: docker/tidb/docker-compose.yaml - services: | - tidb - tiflash +# - name: Set up Vector Store (TiDB) +# uses: hoverkraft-tech/compose-action@v2.0.2 +# with: +# compose-file: docker/tidb/docker-compose.yaml +# services: | +# tidb +# tiflash - name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase, OceanBase) uses: hoverkraft-tech/compose-action@v2.0.2 @@ -83,8 +83,8 @@ jobs: ls -lah . cp api/tests/integration_tests/.env.example api/tests/integration_tests/.env - - name: Check VDB Ready (TiDB) - run: uv run --project api python api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py +# - name: Check VDB Ready (TiDB) +# run: uv run --project api python api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py - name: Test Vector Stores run: uv run --project api bash dev/pytest/pytest_vdb.sh diff --git a/.gitignore b/.gitignore index 32ec6914cd..79ba44b207 100644 --- a/.gitignore +++ b/.gitignore @@ -100,6 +100,7 @@ __pypackages__/ # Celery stuff celerybeat-schedule +celerybeat-schedule.db celerybeat.pid # SageMath parsed files @@ -185,6 +186,8 @@ docker/volumes/couchbase/* docker/volumes/oceanbase/* docker/volumes/plugin_daemon/* docker/volumes/matrixone/* +docker/volumes/mysql/* +docker/volumes/seekdb/* !docker/volumes/oceanbase/init.d docker/nginx/conf.d/default.conf diff --git a/.vscode/launch.json.template b/.vscode/launch.json.template index f5a7f0893b..cb934d01b5 100644 --- a/.vscode/launch.json.template +++ b/.vscode/launch.json.template @@ -8,8 +8,7 @@ "module": "flask", "env": { "FLASK_APP": "app.py", - "FLASK_ENV": "development", - "GEVENT_SUPPORT": "True" + "FLASK_ENV": "development" }, "args": [ "run", @@ -28,9 +27,7 @@ "type": "debugpy", "request": "launch", "module": "celery", - "env": { - "GEVENT_SUPPORT": "True" - }, + "env": {}, "args": [ "-A", "app.celery", @@ -40,7 +37,7 @@ "-c", "1", "-Q", - "dataset,generation,mail,ops_trace", + "dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor", "--loglevel", "INFO" ], diff --git a/Makefile b/Makefile index 19c398ec82..07afd8187e 100644 --- a/Makefile +++ b/Makefile @@ -70,6 +70,11 @@ type-check: @uv run --directory api --dev basedpyright @echo "✅ Type check complete" +test: + @echo "🧪 Running backend unit tests..." + @uv run --project api --dev dev/pytest/pytest_unit_tests.sh + @echo "✅ Tests complete" + # Build Docker images build-web: @echo "Building web Docker image: $(WEB_IMAGE):$(VERSION)..." @@ -119,6 +124,7 @@ help: @echo " make check - Check code with ruff" @echo " make lint - Format and fix code with ruff" @echo " make type-check - Run type checking with basedpyright" + @echo " make test - Run backend unit tests" @echo "" @echo "Docker Build Targets:" @echo " make build-web - Build web Docker image" @@ -128,4 +134,4 @@ help: @echo " make build-push-all - Build and push all Docker images" # Phony targets -.PHONY: build-web build-api push-web push-api build-all push-all build-push-all dev-setup prepare-docker prepare-web prepare-api dev-clean help format check lint type-check +.PHONY: build-web build-api push-web push-api build-all push-all build-push-all dev-setup prepare-docker prepare-web prepare-api dev-clean help format check lint type-check test diff --git a/README.md b/README.md index 110d74b63d..e5cc05fbc0 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ All of Dify's offerings come with corresponding APIs, so you could effortlessly Use our [documentation](https://docs.dify.ai) for further references and more in-depth instructions. - **Dify for enterprise / organizations
** - We provide additional enterprise-centric features. [Log your questions for us through this chatbot](https://udify.app/chat/22L1zSxg6yW1cWQg) or [send us an email](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) to discuss enterprise needs.
+ We provide additional enterprise-centric features. [Send us an email](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) to discuss your enterprise needs.
> For startups and small businesses using AWS, check out [Dify Premium on AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) and deploy it to your own AWS VPC with one click. It's an affordable AMI offering with the option to create apps with custom logo and branding. diff --git a/api/.env.example b/api/.env.example index b659a1b3bf..ba512a668d 100644 --- a/api/.env.example +++ b/api/.env.example @@ -28,7 +28,7 @@ FILES_URL=http://localhost:5001 INTERNAL_FILES_URL=http://127.0.0.1:5001 # TRIGGER URL -TRIGGER_URL="http://localhost:5001" +TRIGGER_URL=http://localhost:5001 # The time in seconds after the signature is rejected FILES_ACCESS_TIMEOUT=300 @@ -72,12 +72,15 @@ REDIS_CLUSTERS_PASSWORD= # celery configuration CELERY_BROKER_URL=redis://:difyai123456@localhost:${REDIS_PORT}/1 CELERY_BACKEND=redis -# PostgreSQL database configuration + +# Database configuration +DB_TYPE=postgresql DB_USERNAME=postgres DB_PASSWORD=difyai123456 DB_HOST=localhost DB_PORT=5432 DB_DATABASE=dify + SQLALCHEMY_POOL_PRE_PING=true SQLALCHEMY_POOL_TIMEOUT=30 @@ -159,9 +162,11 @@ SUPABASE_URL=your-server-url # CORS configuration WEB_API_CORS_ALLOW_ORIGINS=http://localhost:3000,* CONSOLE_CORS_ALLOW_ORIGINS=http://localhost:3000,* +# When the frontend and backend run on different subdomains, set COOKIE_DOMAIN to the site’s top-level domain (e.g., `example.com`). Leading dots are optional. +COOKIE_DOMAIN= # Vector database configuration -# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`. +# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`. VECTOR_STORE=weaviate # Prefix used to create collection name in vector database VECTOR_INDEX_NAME_PREFIX=Vector_index @@ -172,6 +177,17 @@ WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih WEAVIATE_GRPC_ENABLED=false WEAVIATE_BATCH_SIZE=100 +# OceanBase Vector configuration +OCEANBASE_VECTOR_HOST=127.0.0.1 +OCEANBASE_VECTOR_PORT=2881 +OCEANBASE_VECTOR_USER=root@test +OCEANBASE_VECTOR_PASSWORD=difyai123456 +OCEANBASE_VECTOR_DATABASE=test +OCEANBASE_MEMORY_LIMIT=6G +OCEANBASE_ENABLE_HYBRID_SEARCH=false +OCEANBASE_FULLTEXT_PARSER=ik +SEEKDB_MEMORY_LIMIT=2G + # Qdrant configuration, use `http://localhost:6333` for local mode or `https://your-qdrant-cluster-url.qdrant.io` for remote mode QDRANT_URL=http://localhost:6333 QDRANT_API_KEY=difyai123456 @@ -337,15 +353,6 @@ LINDORM_PASSWORD=admin LINDORM_USING_UGC=True LINDORM_QUERY_TIMEOUT=1 -# OceanBase Vector configuration -OCEANBASE_VECTOR_HOST=127.0.0.1 -OCEANBASE_VECTOR_PORT=2881 -OCEANBASE_VECTOR_USER=root@test -OCEANBASE_VECTOR_PASSWORD=difyai123456 -OCEANBASE_VECTOR_DATABASE=test -OCEANBASE_MEMORY_LIMIT=6G -OCEANBASE_ENABLE_HYBRID_SEARCH=false - # AlibabaCloud MySQL Vector configuration ALIBABACLOUD_MYSQL_HOST=127.0.0.1 ALIBABACLOUD_MYSQL_PORT=3306 @@ -371,6 +378,12 @@ UPLOAD_IMAGE_FILE_SIZE_LIMIT=10 UPLOAD_VIDEO_FILE_SIZE_LIMIT=100 UPLOAD_AUDIO_FILE_SIZE_LIMIT=50 +# Comma-separated list of file extensions blocked from upload for security reasons. +# Extensions should be lowercase without dots (e.g., exe,bat,sh,dll). +# Empty by default to allow all file types. +# Recommended: exe,bat,cmd,com,scr,vbs,ps1,msi,dll +UPLOAD_FILE_EXTENSION_BLACKLIST= + # Model configuration MULTIMODAL_SEND_FORMAT=base64 PROMPT_GENERATION_MAX_TOKENS=512 @@ -518,7 +531,7 @@ API_WORKFLOW_NODE_EXECUTION_REPOSITORY=repositories.sqlalchemy_api_workflow_node API_WORKFLOW_RUN_REPOSITORY=repositories.sqlalchemy_api_workflow_run_repository.DifyAPISQLAlchemyWorkflowRunRepository # Workflow log cleanup configuration # Enable automatic cleanup of workflow run logs to manage database size -WORKFLOW_LOG_CLEANUP_ENABLED=true +WORKFLOW_LOG_CLEANUP_ENABLED=false # Number of days to retain workflow run logs (default: 30 days) WORKFLOW_LOG_RETENTION_DAYS=30 # Batch size for workflow log cleanup operations (default: 100) @@ -617,3 +630,9 @@ SWAGGER_UI_PATH=/swagger-ui.html # Whether to encrypt dataset IDs when exporting DSL files (default: true) # Set to false to export dataset IDs as plain text for easier cross-environment import DSL_EXPORT_ENCRYPT_DATASET_ID=true + +# Tenant isolated task queue configuration +TENANT_ISOLATED_TASK_CONCURRENCY=1 + +# Maximum number of segments for dataset segments API (0 for unlimited) +DATASET_MAX_SEGMENTS_PER_REQUEST=0 diff --git a/api/.vscode/launch.json.example b/api/.vscode/launch.json.example index a52eca63d9..092c66e798 100644 --- a/api/.vscode/launch.json.example +++ b/api/.vscode/launch.json.example @@ -54,7 +54,7 @@ "--loglevel", "DEBUG", "-Q", - "dataset,generation,mail,ops_trace,app_deletion,workflow" + "dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor" ] } ] diff --git a/api/AGENTS.md b/api/AGENTS.md index 6fa97cdea4..17398ec4b8 100644 --- a/api/AGENTS.md +++ b/api/AGENTS.md @@ -2,57 +2,61 @@ Start with the section that best matches your need. Each entry lists the problems it solves plus key files/concepts so you know what to expect before opening it. ---- +______________________________________________________________________ ## Platform Foundations -- **[Infrastructure Overview](agent_skills/infra.md)** - When to read this: - - You need to understand where a feature belongs in the architecture. - - You’re wiring storage, Redis, vector stores, or OTEL. - - You’re about to add CLI commands or async jobs. - What it covers: configuration stack (`configs/app_config.py`, remote settings), storage entry points (`extensions/ext_storage.py`, `core/file/file_manager.py`), Redis conventions (`extensions/ext_redis.py`), plugin runtime topology, vector-store factory (`core/rag/datasource/vdb/*`), observability hooks, SSRF proxy usage, and core CLI commands. +- **[Infrastructure Overview](agent_skills/infra.md)**\ + When to read this: -- **[Coding Style](agent_skills/coding_style.md)** - When to read this: - - You’re writing or reviewing backend code and need the authoritative checklist. - - You’re unsure about Pydantic validators, SQLAlchemy session usage, or logging patterns. - - You want the exact lint/type/test commands used in PRs. - Includes: Ruff & BasedPyright commands, no-annotation policy, session examples (`with Session(db.engine, ...)`), `@field_validator` usage, logging expectations, and the rule set for file size, helpers, and package management. + - You need to understand where a feature belongs in the architecture. + - You’re wiring storage, Redis, vector stores, or OTEL. + - You’re about to add CLI commands or async jobs.\ + What it covers: configuration stack (`configs/app_config.py`, remote settings), storage entry points (`extensions/ext_storage.py`, `core/file/file_manager.py`), Redis conventions (`extensions/ext_redis.py`), plugin runtime topology, vector-store factory (`core/rag/datasource/vdb/*`), observability hooks, SSRF proxy usage, and core CLI commands. ---- +- **[Coding Style](agent_skills/coding_style.md)**\ + When to read this: + + - You’re writing or reviewing backend code and need the authoritative checklist. + - You’re unsure about Pydantic validators, SQLAlchemy session usage, or logging patterns. + - You want the exact lint/type/test commands used in PRs.\ + Includes: Ruff & BasedPyright commands, no-annotation policy, session examples (`with Session(db.engine, ...)`), `@field_validator` usage, logging expectations, and the rule set for file size, helpers, and package management. + +______________________________________________________________________ ## Plugin & Extension Development -- **[Plugin Systems](agent_skills/plugin.md)** - When to read this: - - You’re building or debugging a marketplace plugin. - - You need to know how manifests, providers, daemons, and migrations fit together. - What it covers: plugin manifests (`core/plugin/entities/plugin.py`), installation/upgrade flows (`services/plugin/plugin_service.py`, CLI commands), runtime adapters (`core/plugin/impl/*` for tool/model/datasource/trigger/endpoint/agent), daemon coordination (`core/plugin/entities/plugin_daemon.py`), and how provider registries surface capabilities to the rest of the platform. +- **[Plugin Systems](agent_skills/plugin.md)**\ + When to read this: -- **[Plugin OAuth](agent_skills/plugin_oauth.md)** - When to read this: - - You must integrate OAuth for a plugin or datasource. - - You’re handling credential encryption or refresh flows. - Topics: credential storage, encryption helpers (`core/helper/provider_encryption.py`), OAuth client bootstrap (`services/plugin/oauth_service.py`, `services/plugin/plugin_parameter_service.py`), and how console/API layers expose the flows. + - You’re building or debugging a marketplace plugin. + - You need to know how manifests, providers, daemons, and migrations fit together.\ + What it covers: plugin manifests (`core/plugin/entities/plugin.py`), installation/upgrade flows (`services/plugin/plugin_service.py`, CLI commands), runtime adapters (`core/plugin/impl/*` for tool/model/datasource/trigger/endpoint/agent), daemon coordination (`core/plugin/entities/plugin_daemon.py`), and how provider registries surface capabilities to the rest of the platform. ---- +- **[Plugin OAuth](agent_skills/plugin_oauth.md)**\ + When to read this: + + - You must integrate OAuth for a plugin or datasource. + - You’re handling credential encryption or refresh flows.\ + Topics: credential storage, encryption helpers (`core/helper/provider_encryption.py`), OAuth client bootstrap (`services/plugin/oauth_service.py`, `services/plugin/plugin_parameter_service.py`), and how console/API layers expose the flows. + +______________________________________________________________________ ## Workflow Entry & Execution -- **[Trigger Concepts](agent_skills/trigger.md)** - When to read this: - - You’re debugging why a workflow didn’t start. - - You’re adding a new trigger type or hook. - - You need to trace async execution, draft debugging, or webhook/schedule pipelines. - Details: Start-node taxonomy, webhook & schedule internals (`core/workflow/nodes/trigger_*`, `services/trigger/*`), async orchestration (`services/async_workflow_service.py`, Celery queues), debug event bus, and storage/logging interactions. +- **[Trigger Concepts](agent_skills/trigger.md)**\ + When to read this: + - You’re debugging why a workflow didn’t start. + - You’re adding a new trigger type or hook. + - You need to trace async execution, draft debugging, or webhook/schedule pipelines.\ + Details: Start-node taxonomy, webhook & schedule internals (`core/workflow/nodes/trigger_*`, `services/trigger/*`), async orchestration (`services/async_workflow_service.py`, Celery queues), debug event bus, and storage/logging interactions. ---- +______________________________________________________________________ ## Additional Notes for Agents -- All skill docs assume you follow the coding style guide—run Ruff/BasedPyright/tests listed there before submitting changes. -- When you cannot find an answer in these briefs, search the codebase using the paths referenced (e.g., `core/plugin/impl/tool.py`, `services/dataset_service.py`). -- If you run into cross-cutting concerns (tenancy, configuration, storage), check the infrastructure guide first; it links to most supporting modules. -- Keep multi-tenancy and configuration central: everything flows through `configs.dify_config` and `tenant_id`. +- All skill docs assume you follow the coding style guide—run Ruff/BasedPyright/tests listed there before submitting changes. +- When you cannot find an answer in these briefs, search the codebase using the paths referenced (e.g., `core/plugin/impl/tool.py`, `services/dataset_service.py`). +- If you run into cross-cutting concerns (tenancy, configuration, storage), check the infrastructure guide first; it links to most supporting modules. +- Keep multi-tenancy and configuration central: everything flows through `configs.dify_config` and `tenant_id`. - When touching plugins or triggers, consult both the system overview and the specialised doc to ensure you adjust lifecycle, storage, and observability consistently. diff --git a/api/Dockerfile b/api/Dockerfile index 79a4892768..ed61923a40 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -15,7 +15,11 @@ FROM base AS packages # RUN sed -i 's@deb.debian.org@mirrors.aliyun.com@g' /etc/apt/sources.list.d/debian.sources RUN apt-get update \ - && apt-get install -y --no-install-recommends gcc g++ libc-dev libffi-dev libgmp-dev libmpfr-dev libmpc-dev + && apt-get install -y --no-install-recommends \ + # basic environment + g++ \ + # for building gmpy2 + libmpfr-dev libmpc-dev # Install Python dependencies COPY pyproject.toml uv.lock ./ @@ -49,7 +53,9 @@ RUN \ # Install dependencies && apt-get install -y --no-install-recommends \ # basic environment - curl nodejs libgmp-dev libmpfr-dev libmpc-dev \ + curl nodejs \ + # for gmpy2 \ + libgmp-dev libmpfr-dev libmpc-dev \ # For Security expat libldap-2.5-0 perl libsqlite3-0 zlib1g \ # install fonts to support the use of tools like pypdfium2 diff --git a/api/README.md b/api/README.md index ea6f547a0a..2dab2ec6e6 100644 --- a/api/README.md +++ b/api/README.md @@ -15,8 +15,8 @@ ```bash cd ../docker cp middleware.env.example middleware.env - # change the profile to other vector database if you are not using weaviate - docker compose -f docker-compose.middleware.yaml --profile weaviate -p dify up -d + # change the profile to mysql if you are not using postgres,change the profile to other vector database if you are not using weaviate + docker compose -f docker-compose.middleware.yaml --profile postgresql --profile weaviate -p dify up -d cd ../api ``` @@ -26,6 +26,10 @@ cp .env.example .env ``` +> [!IMPORTANT] +> +> When the frontend and backend run on different subdomains, set COOKIE_DOMAIN to the site’s top-level domain (e.g., `example.com`). The frontend and backend must be under the same top-level domain in order to share authentication cookies. + 1. Generate a `SECRET_KEY` in the `.env` file. bash for Linux @@ -80,7 +84,7 @@ 1. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service. ```bash -uv run celery -A app.celery worker -P gevent -c 2 --loglevel INFO -Q dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation +uv run celery -A app.celery worker -P threads -c 2 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor ``` Additionally, if you want to debug the celery scheduled tasks, you can run the following command in another terminal to start the beat service: diff --git a/api/agent_skills/coding_style.md b/api/agent_skills/coding_style.md index 26be5990aa..a2b66f0bd5 100644 --- a/api/agent_skills/coding_style.md +++ b/api/agent_skills/coding_style.md @@ -19,7 +19,6 @@ - Prefer simple functions over classes for lightweight helpers. - Keep files below 800 lines; split when necessary. - Keep code readable—no clever hacks. -- Never use type annotations. - Never use `print`; log with `logger = logging.getLogger(__name__)`. ## Guiding Principles @@ -31,6 +30,7 @@ ## SQLAlchemy Patterns - Models inherit from `models.base.Base`; never create ad-hoc metadata or engines. + - Open sessions with context managers: ```python @@ -45,7 +45,9 @@ ``` - Use SQLAlchemy expressions; avoid raw SQL unless necessary. + - Introduce repository abstractions only for very large tables (e.g., workflow executions) to support alternative storage strategies. + - Always scope queries by `tenant_id` and protect write paths with safeguards (`FOR UPDATE`, row counts, etc.). ## Storage & External IO @@ -57,7 +59,9 @@ ## Pydantic Usage - Define DTOs with Pydantic v2 models and forbid extras by default. + - Use `@field_validator` / `@model_validator` for domain rules. + - Example: ```python diff --git a/api/agent_skills/trigger.md b/api/agent_skills/trigger.md index 1ff3f1f9e1..f4b076332c 100644 --- a/api/agent_skills/trigger.md +++ b/api/agent_skills/trigger.md @@ -14,8 +14,8 @@ Trigger is a collection of nodes that we called `Start` nodes, also, the concept Before `Trigger` concept is introduced, it's what we called `Start` node, but now, to avoid confusion, it was renamed to `UserInput` node, has a strong relation with `ServiceAPI` in `controllers/service_api/app` 1. `UserInput` node introduces a list of arguments that need to be provided by the user, finally it will be converted into variables in the workflow variable pool. -2. `ServiceAPI` accept those arguments, and pass through them into `UserInput` node. -3. For its detailed implementation, please refer to `core/workflow/nodes/start` +1. `ServiceAPI` accept those arguments, and pass through them into `UserInput` node. +1. For its detailed implementation, please refer to `core/workflow/nodes/start` ### Trigger Webhook @@ -34,7 +34,7 @@ To Achieve this, a `WorkflowSchedulePlan` model was introduced in `models/trigge `Trigger Plugin` node allows user define there own distributed trigger plugin, whenever a request was received, Dify forwards it to the plugin and wait for parsed variables from it. 1. Requests were saved in storage by `services/trigger/trigger_request_service.py`, referenced by `services/trigger/trigger_service.py`.`TriggerService`.`process_endpoint` -2. Plugins accept those requests and parse variables from it, see `core/plugin/impl/trigger.py` for details. +1. Plugins accept those requests and parse variables from it, see `core/plugin/impl/trigger.py` for details. A `subscription` concept was out here by Dify, it means an endpoint address from Dify was bound to thirdparty webhook service like `Github` `Slack` `Linear` `GoogleDrive` `Gmail` etc. Once a subscription was created, Dify continually receives requests from the platforms and handle them one by one. diff --git a/api/app.py b/api/app.py index e0a903b10d..99f70f32d5 100644 --- a/api/app.py +++ b/api/app.py @@ -1,7 +1,7 @@ import sys -def is_db_command(): +def is_db_command() -> bool: if len(sys.argv) > 1 and sys.argv[0].endswith("flask") and sys.argv[1] == "db": return True return False @@ -13,23 +13,12 @@ if is_db_command(): app = create_migrations_app() else: - # It seems that JetBrains Python debugger does not work well with gevent, - # so we need to disable gevent in debug mode. - # If you are using debugpy and set GEVENT_SUPPORT=True, you can debug with gevent. - # if (flask_debug := os.environ.get("FLASK_DEBUG", "0")) and flask_debug.lower() in {"false", "0", "no"}: - # from gevent import monkey + # Gunicorn and Celery handle monkey patching automatically in production by + # specifying the `gevent` worker class. Manual monkey patching is not required here. # - # # gevent - # monkey.patch_all() + # See `api/docker/entrypoint.sh` (lines 33 and 47) for details. # - # from grpc.experimental import gevent as grpc_gevent # type: ignore - # - # # grpc gevent - # grpc_gevent.init_gevent() - - # import psycogreen.gevent # type: ignore - # - # psycogreen.gevent.patch_psycopg() + # For third-party library patching, refer to `gunicorn.conf.py` and `celery_entrypoint.py`. from app_factory import create_app diff --git a/api/app_factory.py b/api/app_factory.py index 17c376de77..933cf294d1 100644 --- a/api/app_factory.py +++ b/api/app_factory.py @@ -18,6 +18,7 @@ def create_flask_app_with_configs() -> DifyApp: """ dify_app = DifyApp(__name__) dify_app.config.from_mapping(dify_config.model_dump()) + dify_app.config["RESTX_INCLUDE_ALL_MODELS"] = True # add before request hook @dify_app.before_request diff --git a/api/cnt_base.sh b/api/cnt_base.sh new file mode 100755 index 0000000000..9e407f3584 --- /dev/null +++ b/api/cnt_base.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -euxo pipefail + +for pattern in "Base" "TypeBase"; do + printf "%s " "$pattern" + grep "($pattern):" -r --include='*.py' --exclude-dir=".venv" --exclude-dir="tests" . | wc -l +done diff --git a/api/commands.py b/api/commands.py index 2246c90ab5..e15c996a34 100644 --- a/api/commands.py +++ b/api/commands.py @@ -321,6 +321,8 @@ def migrate_knowledge_vector_database(): ) datasets = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False) + if not datasets.items: + break except SQLAlchemyError: raise @@ -1469,7 +1471,10 @@ def setup_datasource_oauth_client(provider, client_params): @click.command("transform-datasource-credentials", help="Transform datasource credentials.") -def transform_datasource_credentials(): +@click.option( + "--environment", prompt=True, help="the environment to transform datasource credentials", default="online" +) +def transform_datasource_credentials(environment: str): """ Transform datasource credentials """ @@ -1480,9 +1485,14 @@ def transform_datasource_credentials(): notion_plugin_id = "langgenius/notion_datasource" firecrawl_plugin_id = "langgenius/firecrawl_datasource" jina_plugin_id = "langgenius/jina_datasource" - notion_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(notion_plugin_id) # pyright: ignore[reportPrivateUsage] - firecrawl_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(firecrawl_plugin_id) # pyright: ignore[reportPrivateUsage] - jina_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(jina_plugin_id) # pyright: ignore[reportPrivateUsage] + if environment == "online": + notion_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(notion_plugin_id) # pyright: ignore[reportPrivateUsage] + firecrawl_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(firecrawl_plugin_id) # pyright: ignore[reportPrivateUsage] + jina_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(jina_plugin_id) # pyright: ignore[reportPrivateUsage] + else: + notion_plugin_unique_identifier = None + firecrawl_plugin_unique_identifier = None + jina_plugin_unique_identifier = None oauth_credential_type = CredentialType.OAUTH2 api_key_credential_type = CredentialType.API_KEY @@ -1648,7 +1658,7 @@ def transform_datasource_credentials(): "integration_secret": api_key, } datasource_provider = DatasourceProvider( - provider="jina", + provider="jinareader", tenant_id=tenant_id, plugin_id=jina_plugin_id, auth_type=api_key_credential_type.value, diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index f7e0e7e865..7cce3847b4 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -77,10 +77,6 @@ class AppExecutionConfig(BaseSettings): description="Maximum number of concurrent active requests per app (0 for unlimited)", default=0, ) - APP_DAILY_RATE_LIMIT: NonNegativeInt = Field( - description="Maximum number of requests per app per day", - default=5000, - ) class CodeExecutionSandboxConfig(BaseSettings): @@ -360,12 +356,42 @@ class FileUploadConfig(BaseSettings): default=10, ) + inner_UPLOAD_FILE_EXTENSION_BLACKLIST: str = Field( + description=( + "Comma-separated list of file extensions that are blocked from upload. " + "Extensions should be lowercase without dots (e.g., 'exe,bat,sh,dll'). " + "Empty by default to allow all file types." + ), + validation_alias=AliasChoices("UPLOAD_FILE_EXTENSION_BLACKLIST"), + default="", + ) + + @computed_field # type: ignore[misc] + @property + def UPLOAD_FILE_EXTENSION_BLACKLIST(self) -> set[str]: + """ + Parse and return the blacklist as a set of lowercase extensions. + Returns an empty set if no blacklist is configured. + """ + if not self.inner_UPLOAD_FILE_EXTENSION_BLACKLIST: + return set() + return { + ext.strip().lower().strip(".") + for ext in self.inner_UPLOAD_FILE_EXTENSION_BLACKLIST.split(",") + if ext.strip() + } + class HttpConfig(BaseSettings): """ HTTP-related configurations for the application """ + COOKIE_DOMAIN: str = Field( + description="Explicit cookie domain for console/service cookies when sharing across subdomains", + default="", + ) + API_COMPRESSION_ENABLED: bool = Field( description="Enable or disable gzip compression for HTTP responses", default=False, @@ -944,6 +970,11 @@ class DataSetConfig(BaseSettings): default=True, ) + DATASET_MAX_SEGMENTS_PER_REQUEST: NonNegativeInt = Field( + description="Maximum number of segments for dataset segments API (0 for unlimited)", + default=0, + ) + class WorkspaceConfig(BaseSettings): """ @@ -1051,7 +1082,7 @@ class CeleryScheduleTasksConfig(BaseSettings): ) TRIGGER_PROVIDER_CREDENTIAL_THRESHOLD_SECONDS: int = Field( description="Proactive credential refresh threshold in seconds", - default=180, + default=60 * 60, ) TRIGGER_PROVIDER_SUBSCRIPTION_THRESHOLD_SECONDS: int = Field( description="Proactive subscription refresh threshold in seconds", @@ -1155,7 +1186,7 @@ class AccountConfig(BaseSettings): class WorkflowLogConfig(BaseSettings): - WORKFLOW_LOG_CLEANUP_ENABLED: bool = Field(default=True, description="Enable workflow run log cleanup") + WORKFLOW_LOG_CLEANUP_ENABLED: bool = Field(default=False, description="Enable workflow run log cleanup") WORKFLOW_LOG_RETENTION_DAYS: int = Field(default=30, description="Retention days for workflow run logs") WORKFLOW_LOG_CLEANUP_BATCH_SIZE: int = Field( default=100, description="Batch size for workflow run log cleanup operations" @@ -1174,6 +1205,13 @@ class SwaggerUIConfig(BaseSettings): ) +class TenantIsolatedTaskQueueConfig(BaseSettings): + TENANT_ISOLATED_TASK_CONCURRENCY: int = Field( + description="Number of tasks allowed to be delivered concurrently from isolated queue per tenant", + default=1, + ) + + class FeatureConfig( # place the configs in alphabet order AppExecutionConfig, @@ -1200,6 +1238,7 @@ class FeatureConfig( RagEtlConfig, RepositoryConfig, SecurityConfig, + TenantIsolatedTaskQueueConfig, ToolConfig, UpdateConfig, WorkflowConfig, diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py index 816d0e442f..a5e35c99ca 100644 --- a/api/configs/middleware/__init__.py +++ b/api/configs/middleware/__init__.py @@ -105,6 +105,12 @@ class KeywordStoreConfig(BaseSettings): class DatabaseConfig(BaseSettings): + # Database type selector + DB_TYPE: Literal["postgresql", "mysql", "oceanbase"] = Field( + description="Database type to use. OceanBase is MySQL-compatible.", + default="postgresql", + ) + DB_HOST: str = Field( description="Hostname or IP address of the database server.", default="localhost", @@ -140,10 +146,10 @@ class DatabaseConfig(BaseSettings): default="", ) - SQLALCHEMY_DATABASE_URI_SCHEME: str = Field( - description="Database URI scheme for SQLAlchemy connection.", - default="postgresql", - ) + @computed_field # type: ignore[prop-decorator] + @property + def SQLALCHEMY_DATABASE_URI_SCHEME(self) -> str: + return "postgresql" if self.DB_TYPE == "postgresql" else "mysql+pymysql" @computed_field # type: ignore[prop-decorator] @property @@ -204,15 +210,15 @@ class DatabaseConfig(BaseSettings): # Parse DB_EXTRAS for 'options' db_extras_dict = dict(parse_qsl(self.DB_EXTRAS)) options = db_extras_dict.get("options", "") - # Always include timezone - timezone_opt = "-c timezone=UTC" - if options: - # Merge user options and timezone - merged_options = f"{options} {timezone_opt}" - else: - merged_options = timezone_opt - - connect_args = {"options": merged_options} + connect_args = {} + # Use the dynamic SQLALCHEMY_DATABASE_URI_SCHEME property + if self.SQLALCHEMY_DATABASE_URI_SCHEME.startswith("postgresql"): + timezone_opt = "-c timezone=UTC" + if options: + merged_options = f"{options} {timezone_opt}" + else: + merged_options = timezone_opt + connect_args = {"options": merged_options} return { "pool_size": self.SQLALCHEMY_POOL_SIZE, diff --git a/api/configs/middleware/vdb/weaviate_config.py b/api/configs/middleware/vdb/weaviate_config.py index 6a79412ab8..aa81c870f6 100644 --- a/api/configs/middleware/vdb/weaviate_config.py +++ b/api/configs/middleware/vdb/weaviate_config.py @@ -22,6 +22,11 @@ class WeaviateConfig(BaseSettings): default=True, ) + WEAVIATE_GRPC_ENDPOINT: str | None = Field( + description="URL of the Weaviate gRPC server (e.g., 'grpc://localhost:50051' or 'grpcs://weaviate.example.com:443')", + default=None, + ) + WEAVIATE_BATCH_SIZE: PositiveInt = Field( description="Number of objects to be processed in a single batch operation (default is 100)", default=100, diff --git a/api/constants/pipeline_templates.json b/api/constants/pipeline_templates.json new file mode 100644 index 0000000000..32b42769e3 --- /dev/null +++ b/api/constants/pipeline_templates.json @@ -0,0 +1,7343 @@ +{ + "pipeline_templates": { + "en-US": { + "pipeline_templates": [ + { + "id": "9f5ea5a7-7796-49f3-9e9a-ae2d8e84cfa3", + "name": "General Mode-ECO", + "description": "In this template, the document content is divided into smaller paragraphs, known as general chunks, which are directly used for matching user queries and retrieval in Economical indexing mode.", + "icon": { + "icon_type": "image", + "icon": "52064ff0-26b6-47d0-902f-e331f94d959b", + "icon_background": null, + "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAT1klEQVR4Ae1dzXPcRBbvlsZ2xo6dcbwXinyMC+IDW5WY08IJh2NyIFRxJLvhHyDxaWv3kuS0e4v5CwjLHqmCHMgxMbVbBZxIOEAVCWXnq7hsMiaJPf4aad9Pmh5rNBqPPmdamtdVdkutVuv1r396\/fX0RgpNwspvterurqjatqiatlWxhKgYUhyHeLaQFYrwh5OqE3v+SSkqtrruSS\/yoRRijbBa89bRSZN7aVLYq7hu2eKBgfzSWLXpeqkkVmdfmXau4fogA8nc37CyUqs0TLEghfUOEatKhJoXspNU\/ZVqOJ8mbXGHCLlq2\/ZdKY07ZkMsz85Ot5E6a2T6QsB7j2oL9Aa+QxVdoArhryMYhiEMUnmmaQpJKg1\/SEMgcJxzHJumm4ZjFVR+dT4MMWEp8OcNOLdI3algWQ3KQ52GbTl5LcuNGw2L8lEfExBASiHt5YZhfDZ3ZPpOQJZUkzIjIDSdZVgXbCnfI4kXlNQgS6lkOkQD2UZGRlqEU3k47g8CjUZDgIy7uzsUN8TOzm7bg4kcq0Tpq68f+8P1tgspnqROQId4JXGRXrlLalwG0o2NjRLZRh3y4ZyDngiAhNvbWw4ZlZYEEUlLXH\/t6PTVtKVOlQn3H\/7vnLSNazSuqELQkZGSOHCg7MRpC87lZY\/A1tZ2i4x4GoiYtkZMhYCk9aoN0\/6UZFyAoEw8oFCcAK24vr7uHTd+ZY7IxTRm0okJuPKodtGy7SvobtG1lstl0npjxUGfa9JCABqxXq8rItJs2VpMOj6MTUBnrGeKyzQXuwQJR0dHxMTERGu22pKaDwqFAMaFICHIiEDtv3Ti2Mxi3ErGIiC6XMuwv6Sx3jxrvbjQ5\/u+zc0th4hY+sHSjTEq34\/TJUcmYJN8tzHRwDrd1NRka70u35Cy9FERgDZ8\/vyF0yUTkVaNEXk6KgkjEdBLPqzhTU4eZPJFbbWC5QcJX7x46awjxiFhaAL6yQfNx+t5BWNTzOqgG4YmxGJ2VBKGIiCTL2bLDNFtcUnYubEaAFpzwlFFt8uaLwAgTnJ6Q3ADHKEluaq1bX9JiqvSC5qeBPz1YQ07G\/OYcGDMx91uL0iH9zq4oeYF4MyuaV3uhca+XTBtrV0QwvgUBR86NMUTjl5o8nUHAUxMfv\/9uWOBQ13z4onjM0vdoOlKQGfcZ9o\/YIdjfHycdze6IcjpgQhgnXBjYwPX1mjb7s1uyzNdu2Da270G8sGKhbfWAjHmxH0QAGewO0ah0thx7AQCcwcS0O16xTmM+7C3y4ERiIOAZ2t24f7D2rmgMgIJSCZVzuAR5FNWyUE3cxojsB8CmDsoBUbfp1wLmhV3EPDXR7XLapsN3S8HRiAJAuiKYZ5Hw7nqrmE5hive8joISJ9QXUAGqE8OjEAaCMAoGYE04kW\/FmwjIMZ+0H5gLP44MAJpIODhU4W04AVvmW0EVGO\/0VE2KPWCxMfJEfBoQXyk1gotAq48rs3z2K+FCx+kjAC0ICYlFBbwma4qvkVA+jzvAhK561XQcJw2Aq1JrWUtqLJbBJSGfAeJ3P0qaDhOGwF8lotAmtDhGo4dAmJmQiZd80hgDQgUOGSBABwSqG5YzYYdAjbMxgIeyOTLAnYuUyEA8oGECPAPhNghoG1LR\/sZhnsRFzgwAlkgAHtBJ9juONAhIDHzFBLhp4UDI5AlAoqAjmc0elCTgKKKhwZ5nkI6B0YgLQSUkqPe2FF6zS7YnYAodqb1MC6HEfAj0JyEILmKfyWajVTJixxbvQCNnISNDUvcvl0X9+7tiKfPGuLp04Yj+fi4IY68WhKnTo2KkyfHxMyMfmN6EBAWVrCahldciVVpadu3MQOenJzMSRMMp5gg2uefvxC\/3HPdYvRC4a23DoizZya0IyLM9fEJJ\/mOPF2SdqOCoaBHNfaqV9+v443\/\/vtN8csvO+Lxk93WG3\/kSEnMHDbpjR8TADvrMEg5bt3eEDdvbpCZe7Bn06C6f\/fdprh7d8sh4bvvjgdlGUgalmKcb4jtRlX++uDpJWLitbGxMTLB0kdIhQwA\/PzfL3oCj+4Gb3tWRBykHF\/fXBdff72uIIkVA5uzZ\/UwscO3IvhmBB8sleCNHlvE8M+sW\/jii5cCb36YgO7pX58\/d7Rj2kAPUg7UP4h8cydonEdjvVOesd7jx7viEf3dvPmScGjXlCBxuSyFDprQ09tWSrBUBfU8iWHaO\/M8ACws+bzC4L563RIffJDOeHaQcuClQrfrDePjUpwhbfbu6c7eCkMS\/L1Nw5FbNEm5SVpzg7BQAXXBcGXQkxP1mYchjePOMgwE1ImAGLsEvfUKyF4xwEeXmTQMWg4QxjvmA\/kuXZwOJJ+\/ru+eLotLlypivNxqYnoxbZrEPPdnHeg59bzyOCTQaRsOwCcN6I69b3+c8gYpB7QfXgBvgOaDhgsbkPeMb9z3Cy3dJMUl7PO75VPKjjzrTu+9Ht1y9zkdoAP8pAFv+3fftjdglDIHLcfdH9s1+MyMEUrz+esITTh3on2L9fatuj9bX8\/xuy8ItCR4SDsC3kmh61Rohl0vU\/m98aDl+PFu+1rfmTMHveJFOj5J4z5vuBdyHdF7T1bH1AO7v8Gmyyy4Riv7aYUnT+KXNWg5MKP1BuxwxA2YKXvD02d7ExNver+OPTYHVYN+xYkWovWZhGAZIa2QpCsftBz+cdrRo\/EJ6J\/1JsElrbZR5WjXBSvBOB4OBLQjoP9tTdIMRyPMGP3PGbQc\/ucn0Vp+bY4FaV2CdgR8NcFYxw\/q9OH41Ru0HDM+2ZOsaz7xDWuOHmmfFftx6+d5axKi1mb6+fCgZ83NpQfOqVPxDRQGLceJuXa\/PD\/6lmWCsOuW5l\/PPHmyvexu92WV7uFaxaCtOK0mIW+\/VW5bvY8LAtbNsCUVNwxaDv9WGxaQb91q35YLUzdsZ\/q7b2zHDTK0EXCQggQ9G+OT839Ovo+bZN0Mcg1aDjzfv4AMTeYfzwVhqNKwlOPfS4a1kH98qfIPIo4\/SMpQWqxbJbHagOlREu2nqjZoOc6fn2rrDbC7s7RUC6UJofmWPlnr2EsGNjoF8+PFv16BQMqRoC7CvfEGjVNosgaz8yjhNFmJnDsXf9fA\/6xBygET+9KIFD\/9tLcrskvLpD\/9vC2+IwNdZWgwNeXqEXS1MNy9cWNd\/Oe\/dfrRaRpgecJ77x0Uf3xjsN2vEqded7dJ5f2HzxwpDx+eVte0ir+lveEg+za\/kLAU+fDDKTGf0fhmkHKg601iHQSsdDJIhTzPntUQCe0J6EhJ\/0CAH2mf+Blt1alxEMYy2KI6QTPnt\/50QEBjZB0GJUeQfV+Yuu5nPxjm\/qzy5I6AWQGRp3LRxUIb+s20utUBVtPnz09qNelQsjIBFRI5jEFEmGvBYubxE7Lv23DHeugR8JEWeoTTC7Sc1YceIS58TMC4yPF9qSCgCJj9oCkVcbmQoiLABCxqy+akXkzAnDRUUcVkAha1ZXNSLyZgThqqqGIyAYvasjmpFxMwJw1VVDGZgEVt2ZzUiwmYk4Yqqpjxv\/UrKiL71At+WnTwTKqLHPtAFfpSbqxhQtcog4zYe9XBM6kucqQBsdqKywUB8cYHeUhV5lhZekiFZXFUz6RoIJjUwwYviWW3t6F1kcMrU5Lj3BCQPZMKxwSrqAapWo8B2TOpcJx0BpEvzx5SvZpT2y44iRk6XJIl8ZCKsdY\/\/lnr+KCnm2dSL6BBlsvojv\/+t8ORDUN1kcNbv7SOVRes5TIMLH6D3vqwlU\/qIRXk18EzqS5yhMU9Tj4tCQjgk4a4HlKhdfwm74PwTKqLHEnbodf92hGQPZO6TVZkD6leUmpHQPZM6jbP0HhI9bJRh2P2TOq2QpE9pHp5pp0GVN\/8eoWMe4xxVNSgi2dSXeSIil\/U\/NoRMGoFOH++EdCOgGl6borjIdX\/\/DhaVFHCr82xHhg26CJHWHnj5tOOgOyZ1G3KofGQGpe5Wd3HnkldZIvsIdXLHe00IHsmdZunyB5StSYgxkmD9JCK5+vgmVQXObxkyeJYOw2ISrJnUrep2UNqFpQPWSZ7JhWOdyv2kBqSMFllY8+kxTZI1dYe0E\/oYfdMGmRn6Mco6Jw9pAahkrM0LEbDRMxvptWtGll5JtVFjm71jpKuDFJzowGjVC6rvCCADp5JdZEjCc5MwCTo8b2JEVAE1HIZJnHtuIDcIMAEzE1TFVNQJmAx2zU3tWIC5qapiikoE7CY7ZqbWjEBc9NUxRSUCVjMds1NrZiAuWmqYgrKBCxmu+amVlp7x1Io6uIRlOVQLZJerPVeMPY82TPpXmPrgseeRPGP1FactgTUxSMoyxGfZPvdqQhofrz41yvIWC6X98vf12swfbpxY13s7Li\/gxvl4bu7Qvz087Zzy9zcaJRbO\/KyHB2QpJZQr286ZWk3BoTGCfIN2G+PoCxHalzbtyCtumCMcdgz6V576YLHnkTpHakuWKtlGHR57Jl0r5F1wWNPovSPtCEg3na\/yfsweybVBY\/0KddeokHuctaQZNvRB\/ztRSU708UjKMuRrB3D3O3h2ppBvNOCgLp4BGU5wlAoWZ42AiYrKr27dfEIynKk16ZhStJmDKiLR1CWIwxt0sujDQHTqxKXlCcEtCGgLh5BWY7s6WtZ7oRX0vzDEFKs4pGNhpX9k\/d5gi4eQVmOfRoppUtqEmJLEFCToItHUJajv4QAAbVYhtHFIyjL0WcCWrb9Ox5p24PtgnXxCMpyZE9Ay3J\/v0UKuapNF4xq6+IRlOXIloTeTTfYA85LKRdKJVOMjIxk++QepY+PG0IHj6AsR4+GSnh5Z2dH7JLhJk1GbshfHzy9ZEt5bWxsTExMjCcsOp3bYQUSZBMYpfSzZybE2bMTUW7pyMtydECSSsLGxobY3NwCARdLDWk7azE0Ckyl8DQKAXnKZUPc\/JrMs+rRxqZpegRlOdJozc4yLMttUymNVXnvUW1B2vZt0zTFoUNTnbkHmAKTJGghv5lWN5GK7plUFzy64R82\/cWLF\/S5BXXBUp6WKyu1asO0VwzDEJXKobBl9DUfgGfPpHuQ64LHnkTRjtbWfhfQguaInHV+Pe\/+w2dO\/zs9XRE0IYlWGudmBCIioMzxXz92WLrLMLa7Hae2SCKWx9kZgdAI7O421wBtcQc3uQSU7gmmxxwYgSwRUIvQNA15gOc0NaDtnCh2ZikAlz3cCGD9zw22VwPay0hU7HQz8H9GIH0EGo1mFyyNPQKaDXMZj4IG5HFg+qBziXsIYPkFwWyIZcROFzw7Ow2LmGWQj7thwMIhCwQU+cgQ9U6Tc80xID2NyPcNHrq97fpVyUIALnO4Edje3nIAsIXLNZy4kxDnyFhGxAQEChyyQEBpQMsyrqvyWwQ8cXR6mRKdblhlVJk4ZgSSIrC1teXsftA2x+rc7LQzAUGZLQLihPaEbyDe3Kwj4sAIpIaA6lltIa96C20joEGqkRi6Bg3IWtALEx8nQUDxCdrv9WPT171ltREQMxMy0f8EGVgLemHi4yQIrK+vO7cTtz7zl0OkbA9kHVOxDPsH+mSuOj5eFgcOHGjPwGeMQAQEMPZbX9+gr3\/F6mvHDs\/6b23TgLgILUh2Wos4hhtVXpgGEhziIIBvzZUrXv\/YT5XXQUBcoH76K4qcGfHLl676VDdwzAiERQDDuKb181f+sZ8qI5CAuGg25EekNmlCskPjQdehtLqJY0agFwL45mNraxtd7xoZnjo9atA9XQlIXfEq2UxfxU1Qo4N23REkPKfpiYDb9bpLedT1Ls6+QlzqEroSEPlfOz69RIPATzAOhB0\/k7ALipzcQgAcAVecuQNxp1vXq24gDbl\/aM6Kb9OseB4fLk1NTbLZ\/v6QDe1VkO75cyiqBm1qiDuvHT\/8Zi8w9tWAuBmzYsOS71OBqygYD+CZcS9Yh+96G\/loycUYle+HQaGnBlSF4Os5Wh+EJqyyJlSocAwEOsg3Ik\/vN+7zohaagLjJT8KDBw8K0+ypRL3P4+OCIYAx38uXL91uF5ovAvkARSQC4gYvCfEt8eTkJJMQwAxhUBMOrPURkSKTD5BFJiBuapLwS0xM8B1xuXyAt+wAzBAFrPPV63Wn+8WEA2O+sN2uF6ZYBFQF3H\/wdImmxBdxPjY2SiQsszZU4BQ0xngPxgXb281PeGmpxbSMK5isxqlyIgLigfcf1i5IYV8j1woVdMnQhvC0xaF4CLRpPdrhIOuWqyeOzywlqWliAuLh6JIbprhG86FzOAcRJyYmyN+gdr8GC\/E4REQA9nzY1\/XYiC7T9tpHcbpc\/6NTIaAq1NGGtn0ZSzVIAwFHR0dZIyqAchb7iUdkWcXWWtNYJZXapEpAJdG9B0+v0O8\/\/EURERrRJeMYa0UFkoYxxnf4LHdnZ9sxJMA5ApHEMVQuWcZS3LFet+pmQkD1ML9GVOkgIxazS6USddeITXWJ4z4hAHLhD9ZO2OHCX4BjgmVpyxuGJa6nTTxVzUwJqB6y8rg2T2tGNFmR72DpRqV7Y2hJLGpjWQfHiNUfSKqCe71dbJVP5RmGWBHIX1eszSHgVw+UBsM6ncqvSNa00\/PfjvNlyvsNNcJy80vJoDyppbW3ZGrFdi+IJiwVmrAsEEBYQzxFa0jVbqTsXgpfSQUBuOWDZzSbnFNJYxnuMrLSdN3k7TsBuwmy8lutSo6TqkTICkhpCatCv6Z9HPlp4FulyAm4jiUfdY6YlGVHmvd6EY+p4daoB13rqFvzp9cofY2Wx5zr9NNsDwxhrDXop7EIq1Ua+aymMYPteHaMhP8DKleEJHlBQFwAAAAASUVORK5CYII=" + }, + "copyright": "Copyright 2023 Dify", + "privacy_policy": "https:\/\/dify.ai\n", + "position": 1, + "chunk_structure": "text_model", + "language": "en-US" + }, + { + "id": "9553b1e0-0c26-445b-9e18-063ad7eca0b4", + "name": "Parent-child-HQ", + "description": "This template uses an advanced chunking strategy that organizes document text into a hierarchical structure of larger \"parent\" chunks and smaller \"child\" chunks to balance retrieval precision and contextual richness.", + "icon": { + "icon_type": "image", + "icon": "ab8da246-37ba-4bbb-9b24-e7bda0778005", + "icon_background": null, + "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAYkklEQVR4Ae2dz28cx5XHq2f4m5JIyo4R2+t46B+H1S5gGUiwa1\/EAFmvkUtsIHGOq6y9Z1vJHyDpD0iknG2vneMmBmxfFo5twPTFzmIDRAYS7cFKSMU\/FCS2RVKiSIpk975PNWtYU9M9nB\/dM8PueoLY3TXVVV2vv\/N+1auaQA0JLV27XpNHqe3K\/yAIZ1WkZitK3c\/jhUEwG8g150I1\/df+E8hn+5\/bnxT3PFArMuaVhgFyTfkeBSpa5jRU6irlUVhZrsafL8\/fPac\/4\/NBUtDvzpeWrs\/ujquFqgpPhZWgJsA6Kc9Q6\/dz+P6EA5G6FFXUsoqij6Kocqm6pRbn5+fqAO4Hj\/oCQJFuCzKYU5GKOPK\/iSqViqoEgaqOVFUgR\/5TBgVy5Bqq7pXpi70\/pr5dVvTzKBJuyn+buA6tsnB3V+oIzqJQ1w1DOYaR2pUj54kkoBTJuahGKr+Yv2vuUmKdDAtzAyCSLpwMTwdR8D153gXzzIBlpFrVQKvKcXR0tA44U8cf+8OBXQEoYNzZ3la7O7tqe2fH7XhZoHr+obvvfNX9IKvrzAEI8NSEej4KoheMXQboxsfH1OjYmAafkWZZDcK3kx0HAOHtrS21vb1jS8ll0Umvit14Prue4pYyBeCVz794qhJULkjTNZofHRlRE1OT+si1p8PFga2t2zEY9yVj5hIxEwDiwYpF8oqwdwEWe+DBheIQUnH95npdIkaBeqMSBWey8KR7BuDVv1x\/Xkzdc6hbVOvk5KSamBgvDvf9SOocQCJubGzEQJRwThiFZ3q1D7sGoLb1JtVZ8bxe4AnHxkbV9PR03VutP7U\/KRQH8J4BIWCExNa\/+ODX7zjT7SC7AqBWuVH0ugQ3T3qp1y3rD\/d9m5tbGog6FEToJgie7kYldwzAPXvvPWFfjTjdsWNH6\/G6w81S\/\/SdcgBpuLZ2w9iGeMrf7hSEHQHQBh8xvKNHj3jwdfrWClYfEN64cVMRUxTqGIRtA9AFH5LPx\/MKhqYuh4MaRhJ2A8K2AOjB1+WbKdFt3YIwnmw9gFHS+OtSpYba9ZLvAGaV9GO0IdgAI2AFzOhIyQH8OBCAS3+5fkGJt4vDgc3n1e4BHC3xx2Cj7hcIZiQX4OxB7Gipgq9c++K05Ki8QsMzM8e8w3EQN\/3nmgM4JqurazoDRyThmQfvueNiGmtSAajtviD6HTMcU1NTfnYjjYO+PJEDxAlv3boluXRqRTKiHk0Lz6Sr4CC6APjIYvFTa4k89oUtOABmmB0DQ3t5Aom1EwGI6hXP+insPuZ2PXkOdMMBa2p24crn159KaiMRgGL3aeMR8Jms5KSbfZnnQCsO4DsYAVYRjZrkFTcBUGw\/wFcDeKhfT54DvXAAVUx6nlAtnAh14ordXhMARV+fpsL0kWm7nj\/3HOiaAyQlQyIRn3elYAMAsf2kXg3E7qGW+zx5DvTEgTqexCEJx8PTdmMNADS239i4Tyi1meTPe+eAJQVZpFanOgCXPr1+Ukq97VdnjT\/JkgNIQZwSoQXxMxZM23UAhpVYNI6OaoPRfO6PngOZccA4tbLUc8E0WgegJBOeotCrX8Maf8yaAyzLhQzWONcA1J6JTB5T4J0PuOApDw6wIUFdDbN+XEgDcHd8d4ELDz644CkvDgA+QKhpSi1w1ACUD7T0q8i+LJ48B\/LkAHv\/QOFubAdqAMraukcoHB2RyWNPngM5cmAvYRU7sEY32uUV51hfVKsxHvnA0z4H1rYj9dZnW+ry6q7683qoLq\/sqFUpo9zQfVMV9XfTVfWPs1V1YmZEPXbXqKLMUyMH2IxKU6C00ItjLnsOiEFn4y3lvAJcL368qT7827b+fxAXPrkVKv5T39A\/CBife2jSg9EwRI57TgglNf4EewuOlkg+mJ2doazUZID30scbDRKuV6Y8UxtXPz4x5aWiMHJlZVWvJRY1PI8ErMHcpI0fKS8T\/fTyhsoaeIZ\/v1zeUvwHhD85Ue4cS1sKVnajXR2PCSpiCZaUUJ1PvLuifnb5VqrUe\/xro+o\/Hp5Q\/\/n4UYU0S6L7pqoaXNRNI\/r45\/++rtV1Wp2il4\/secKyPWZtpFoJZAmd6GJRwWUkpNLZj9YTgXdsNNCge+7hScU59FMBEPe49OQ9Y+rcyem6itX24F+3E9vWgH9nRV381hH1r3Jf2chIQFkrMjsiWwbPwlr2Zy4bAaafidp1CbChJgGeIUDz7Ac31B\/EA3bpJ6JWf5ygVl+6spkIbO7H1vx3aa+MKtkAUGIxsyMCuxoMqRdyUQJKAx9qFlAYiQcrfv35bXX20nqT2kTlPvfweANQW9WnTTt0Q11UMlQmu9As85D0v\/vrqS9lAiCASpJ85x+ZagJTGlAB368WjtVVrkaR\/Dmo\/q8\/EzCLyrcJEBIzTLMt7bpFOxfXI7ifQVXMHF3RRuiMB1X6wv\/ebChFMr126lgD+Kh39qNkFY2954Kv3frPiYR9+zuzDRKWhwGUtFEGMsJOFq3P1SVgGQbOGH+wuNqkBl87NaMIGhsCCNRLAkSSvddp\/WNjstOEo45Rzc9+sKbBaZ6jqMe6wytsKBUAUY8uqFC7Nvio85LMgLi2Gir35cePSN1GlmVVH7D9YWVXmwZJDk1RwViREEycl1VwLxjguXYfNpft6Rr7LQl8qNwk8NFmr\/VtcL2oZ2CKrYqtSY+aJOrHADR62WZGkc6Nt2nGhETD24UAZ6sQC3ab7RVnWR+v+78krmhAzPGlj5kx2Q8BmWcu4rEU0WcA4waPecF4nnyGvdcqvueCL8v65x6ZlhBM\/EUwACuDFDRjbTRoTGnBjh\/KjIRNSD\/Ub1b2W6\/2IRKWZymjFCyFBHz5SuNsxzO1sXqIxbx0A1ATYrHtPaSkCcnkVd\/uj2f5wErrMs9WxGNsAzIXLP+KSIDn9+Jd2kTWSxJlEWIxKp2jS520T17h2nYotmfxZETd3xD\/o8L+bTCqqNkwrvp1QcE1KpRwjGv4M2OSFA\/Mu755xrdk1qSIVAegYK\/wNuDl1ebkAfulAiZ3VoPPTUjGrst53vXt\/lgCUHQqPABd9Wu\/UFRiUoiFQDSJqS7lXf8xySO0U\/pZf1J0KjwAP11PliKd2GOAoB\/1fyCeOcmqhlj8VHQqPABdZwAVmueUWi\/tux42K++KToUHoPsCh8nec+1JO+DNc7uAdMdShOvSAdBeq4t0HNQUXJo9WQRQdTKGwgMQqWJLEhNbyyrLGSnWSVb0QfU7eXlFqFt4ALp5d6syK\/fix8mJpq5KNC94UCEZW1qbZynasfAAZIrrk1v7Ad0zkg1thzrMC3VXtVGOik4LyeRdn\/7vk60+ik6FB+B9041TWUng60eIxZ1lAdxJsyw24OxEWbu8SOeFB+CJmXQpgspNCsm0sg\/zrO8Ci02Oik6FH+GT946rM79tXIXGSx02ey8JaOywVXQqPADxgt0pLnYjYFcCO+426JAMz2Iv18R29U5IQb5+j39tpMHxwA50wZdmj\/XLPrSn4GD7cw9NFIT7rYdReAmoX6ZsscFefyYeyJFr1mMMQ1Y0ywWQwDaVQf0y3lIAEGkXg20\/w4VFSp\/qMMt+mQFA3iEWu32A5y6YYrlAGdRvaQDIQFl+6UrBtJSrTkImvapowOdKP7Naz3whinxsDJIVeKRGCqYNEa+431nRfCHc1XoAuizSj3dRChVsQIdkeevz7aYlmIMIybALwjlnkyKew5W+5tmLeiyNBDQv8GXZ4dT2gClflcU\/a7f3nQBUolkFZ+4zR+w3N6Wr0\/p44d9\/f9U0qY88E+2WjUolAXm5qLfzshj8zG\/3d8jCK37i3VXFIvEn7x1LnSLr1d6jf9SuK\/kop98yqV7GDAV\/uvaVTrs9fnwuLinJXwDo2l8MHUlkwjWGFajGpCm4TkI4tGk2QTftukdMhLJsVPnVV\/HSg9JJQF46KjNtuWYS+FyVSxudpGgh9fB23bZpxybqHOQs2fWLcF46AAK+tFkP94UCBpJNbeL+drKoARvAS\/vZBwM06tjARD2Tw1iW3VJLpYLTwEeQ+q3PtkUyJq+gA4DMJzOllzRrAZgADD\/PgIPBUtCktC8DZOZ5cYaw+WKHZM18VD9e+OaRQoPQqOBDA0CkBL\/X9uEXOzqM8omsmTWSAwCQ98eLfezOUW3QU2YTdfE8CX\/YZDsWqMC0bTvse7o9N1LPDTQDatspMu3bIOx1\/KbNYTkeGgAitV6WReL2HnrtMBGJxIs2nuX3319rkkrU4SXbRH8AMclBset1cm6AZ\/\/eiHt\/GggZww0JE\/U6fre\/QV8PPQD5xh\/kNbbDRHY+oC0XUEjLt7+T\/tt4ABFH5WX5rY\/fd7lAHJX8mKjtVsCzx5AGQrtOp+eMH8962DY5GmoAptlqnTI\/rT7gY1d8V02n1TdgZJ8ZVPgnstsCZYZoB8eBdjEFyMImEbbd9k07HPMAIVrgVwszdW1g9zeocwPAofOCecHsFm+\/YMMko8pwCPhtXqNekXDscEoq\/UHORBzTa54NMX0kHennPlHXSu17xPe+9mW9Kv3\/3\/eO1697OQHEjJM2Xep2\/OYLjeND+8NEQ+WEGEa54AM0F741rT3RdpiHFGHz8CSvFskHgHslG4C09dn37+i1Sf2lSwoRZTX+YZKERgIOzVww3\/gk5hMieftfZjoCDc4F93CvSyzLZHH6sFE\/xm++4MM0\/qEBIA6HK\/kIkTA\/240txT3xBuCNu83TR56hlm6BXdbxDwUAAYWbHIr0yiI1iTCGKwlZbO6CvVvgZHFfmcc\/FAAk7mYTNo8brLU\/7\/Q8jgc2rg8mtjgsVObxDxyA2D5ujA7J143aTQMUbeHE2BQHdgdvC5Z9\/AMHoLsRN9IPJyJrwvO1Qc2Ld\/vOus922nOfoWzjHzgAP\/yi8Udknry39xBJ2ot3bUHmlQdNZR\/\/wAHo7oPMrgV5kRv\/cxMT8uq3VbtlH\/\/AAejuBJ\/njlDMntjElNqgqezjHzgAscVsynPS3Ezdmf7cvk15P4\/uM5Rt\/AMHYD9ftu9r+DgwcADaninsyTNA3CxtGpNWB\/F6yj7+gQPwG84Opmk\/LJMFONzfBB6GLXDLPv6BA\/CEkx704d\/yC42QrmVTng6P3U+r87KPf+AAfOzOxvw0fi08L3KDvqwfaZdQ379c3tRrN554d6XpNsrMWmNX1TdVtgoOy\/itR870dOAAdDOHeXmtVpR1O3qm+1z7sp2gN\/ewVPKf5Dfc2OqXdpLih5TxGSD8+ze\/0ke3v6RnH\/bxJz1zlmUDByBG+A+dqbesc\/YAtTvhz3Rfq5AH97A\/DDuXumt323kBgJF72Xa3Vf7dsI6\/nTFmUWfgAGQQz8refTYhObLM2UvKtWuVbUP\/T7yz0pQiZj9ju+ekfj3xzmqT9LXvH7bx28+W93mjAZZ3byntEyBmnhZJY4gXh4Tqda+UeP+WRruSvtygtOk3jzUpAJps77Q1GcM0fsOHfh2HZk0IKi+WFI3TY90uK6Q9JJ+b6Eq2Cen6bvwNhhugcLSJe7JYkwLQ0lanDcP47THnfW7WhAwNABlwDABWxDWCkBeHymw3TQsnBjsyCUhJGw3RdwyAlaZ7kJb0nQRY7ksj2sPutKU6dRlL\/AVotn4GOf60ceRVPpQAZLCxCrzRBEI+4+Wxjx4ZM2b5IuW8OALYH0gMMW0zIKRYrAIbExK4H8LhcKWlvW1HXKvzv4DQtWeR6uxRmESDGn\/Ss+RZNrQAZNBpkqBbhgC+NMln+nN\/pwPJx6KmLIgwjisJf\/PduVQ7tN\/jz2KMnbZhANisBzptKYf6Rk0Bgl6JNlB5tJlGbogGwLbyktPaSSunLdq0qdWalH6P336ufp8PlQ2YNHikAQAhrtYumdga4Y1WwKM9bDUCxzbZu1LZ5b2cu9uw8Yz\/893ZlrFI+st7\/L2MqZd7jQQcegCaQQIUptJIYb8ssw5\/FpuPMoiX+Q1JNj0xW5Xt2UY62pfFzF6YfpBUvxFg5EEA3Twz7V\/45rQ4Vu1J+bzGn8c422nTAHAo4oDtPDAgwwtu1xNup03q9HtNhu2QsCblmVp7T5rX+NvrPb9a6YZRfn0OVctlX5Mx6JdRUYHSqR1R2JgaP+gH61f\/ZV+T0S8+2\/1E0R7WBHsVFe0BUE7KSLZNxvhbJSj0yh\/XIXL77rX9w3J\/HYCCvdKr4MPy0or6nKUHIMa9TYQ98iJX4rl959XvMLdbegCWfU3GoMFZegCWfU3GIAAY2k6IKKBlHmI3zE\/1DGKQ7fZZ9jUZ7fIpy3reCbG4WfY1GRYrBnJakfBfqeOAOALDuCZlIGgYQKeVIIj0LydHUTlVMDwv85qMAWBOhbtxwnGgguXSOyG8AALEbuoXa1LsedtuX1Sna1K67ecw3Wd8EJ65IvMfy5yEJXVCGDuUlLNHGthByyrju5v\/EvMjy5rfK7Ep61xDu+3Dcm60bajCq5XK3lxw3TU+LKPI+DmxBeOs6cbEUbOsspN8RHL\/kpZ1Aj76KHsA2vaCgyvXvjhdUZVXxsfH1PR0NinoGWOjr82VZU1GX5nqdHbzxk11e3tbBZXg6WDp2vWFSEXvVatVNTNzzKlazssyrMkY5Ju9sXZDbe\/sSCJW8G2ckGUepi4WuSg5lWlNxiBetTXpsaxn4v907SudizU3O4tYHMQzDW2fRV2TMUiGm3T8B+4+HhgALskD1WZnZ1Sl4iMzSS8HrzaPNSlJfRW5bEdigGura0r076UHvn78Ub0mROIylwSKtW0xDMfHs\/+RmCIwFM81jzUpReBNJ2MwQWgVqqvctyfuIn0BOj15DuTJgR1xPqAoiC5x1AAUL3iRi3DHAxA+eMqPA7t7GBNTbx+A1a3qIl0iAcu6OCk\/lvuWbQ4QftF0Sy1y1BJwfn5uRbyRRUIxO6GXgppB\/k\/mHKiDTxwQMEcHdZc3VNH7FNy+3biTPGWePAey4MDtzXh7FdGyGmu0WQegTMctUnB7ywMQPnjKngNGAlZGKq+a1usAnL97btGoYVPRVPJHz4FeObC1tWUyrpbn75rTDght1gGoOwiiNzlu3mpMIdKf+T+eAz1wwGhWmf89bzfTCMANEY2SnoUE9FLQZpM\/74UDFp6WRdO+arfVAEA8E\/GEf04FLwVtNvnzXjiwfnNd3y7x5l+47YjZ10hLS9dno4nod1Jam5qaVBMT7e1f19iKv\/IciDmA7be+fouLZUk+mHf50iAB+VDHBKPgDOcbG5s+MA0jPHXFAdKuwBDk2n6mwSYA8sH8PXNviGjUgemb67H4NDf4o+dAuxzAjGOtURSoN1zbz7SRCMD4w+BH2iGRDJnNzf1fMDI3+qPnQCsObErQeYtJDfYA3NOoSfVTASiIXQ7C2GVGjFpZrEnt+DLPgToHYtUbh\/ICAR9Yqn\/onKQCkHqiii\/iFTNHTB6\/B6HDPX\/ZxAEwAlbADNhJU73mxiYv2HxgjtorHo\/eE1F6koVLx44e9Wn7hjn+2MABQLeGoCKvVJKcH7jn+KMNFRIuWkpA6muvOAieltNlGl67Iegu6X7SCfzzRXscaACfYCWIMXMgfw6UgKYFWb5ZY\/mmXNe8JDRc8Uc40AQ+WW7Zyu6zudY2ALnJBeGRo0dU1S9isvlZunNsPhaaa7WL5OsAfDCrIwBygw1CVtAdPXbUgxDGlJCMw7G3r1DH4INlHQOQmzQIo+h1ufuk6Ho1OTnhp+xgTImION\/GxoZWvzgc2Hztql2bTV0B0DTwx8+\/vCgdP8\/1+NiYmpC5Y6+SDXeKecTeI7mAvV0guf55ZatyzqTYdzrqngBIZyINT8sSuwvyLZhFJSMN\/driTl\/D4ajfIPVkhkOiIecfvOeOi708fc8ApHNUsqjjC\/JteIprgDh9ZFqNjhya30LksT2lcIB8PuZ1rRzRRXE2ftSNynW7yASAplEtDVV0Vq5rlAHAMdn2zUtEuHH4KAF4y3pqTZJVshpNpgA0D\/XHa1+ek2\/Iv8l1jTIkogbjxLiXijBkSAn7jrXh25JEsCWL07jWhLrF1tusXOzW1ksbci4ANJ25EtGUA8bqSFWNyLEi03sj8t9TfzkAuPjPfkDE8NixQG9MYEAXP86iOJlvqg31atbAM6PNFYCmk6W\/Xj8Z7oSnRSqeUhK6MeX2ESmJB01Yp1KNj5zH1\/sA1ddSbpOpZ5cV\/dwAyB2nSRiJyMPbA5POydsD3I4AjfIWe4IvCjTfZ5mu2HiLbvtZXze+yaxbT2iP5AY1rhbCIDwpvxHxiPw6BA5MIigTbvdF2XJA5mzVpTCMrup14VtqMS9Jl\/bYfQdg2oNoTxqbUcI5sli0FkbhrGRK3B\/XD2rmPvnyyi6a8t8mrikvE4ldJmNecYcsL3RZl+nPI\/25\/ALM1UpQWdmV+qJL+JzVaXE9XXlwf\/4f1AC7LPmFaqYAAAAASUVORK5CYII=" + }, + "copyright": "Copyright 2023 Dify", + "privacy_policy": "https:\/\/dify.ai\n", + "position": 2, + "chunk_structure": "hierarchical_model", + "language": "en-US" + }, + { + "id": "9ef3e66a-11c7-4227-897c-3b0f9a42da1a", + "name": "Simple Q&A", + "description": "This template generates structured Q&A pairs by extracting selected columns from a table. These pairs are indexed by questions, enabling efficient retrieval of relevant answers based on query similarity.", + "icon": { + "icon_type": "image", + "icon": "ae0993dc-ff90-48ac-9e35-c31ebae5124b", + "icon_background": null, + "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAUPklEQVR4Ae1dW4wcxRWt6pl92rseQ7xgYocdIALFeRglkSBEYkkkwF\/YEoT8RDiKwkd+wEryG+P8JpHNTySEQuwkHzEgYX6C4AM2UghISYTzMMrDySzYeION4\/Wu7X3NdOWe6qnempru3Znpefbca427uroe3afP3lv3Vk2NFF0ihdnZSZEVkyUpJqWSOSFUzlPezbg9X6qcFILySOi6Plb8R+WVCq5X5Kf4RMo5wog+liiB8zCPcJzBVV\/67xFwc0r6MxlF9YpiJr99u76G650Ueq\/tlcKlQq5UGprKKO9eXxDZpNgtVBSp2ntffdrbSSXEDBH5z0qqk5nM8nR+az4kcDswaQsBCxdmp4Tw7lVC0VHgUyWe5wmP2JjJZoSkIz7Ig0g64hySKefpk\/J\/prydl\/a0UoQmfWzBuW\/l+aUSlSF6KV+X9X06+kqU6Ih0jJwkpKeF8o7lJyZOxpRpWnbLCAhN5xdH9lMHD9HdTpk7BlmymYwmWoaOAwMDIeFMGT62B4ESERRkLK6uilKxJFaLxcqOpZjxfXXotontRysvNO+s6QQE8URx9AklxZP0Z5fDrYJ0Q0ODYmBwUJPPaLPmPQa31CwEQMKV5WWxulpc05JERBpPHs1vu+FQs\/ox7TSVgKc\/PLfXy3iHzZhuIJsVw6MjAkeW3kNgeXklIKPRjC3QiE0hYOHS7KQqyp8TxFOAmYkHFNIj0IpXr1wNNSINK094WXUgvzW5J52YgO9dPP9ESamnYG5hWkdGRsTw8FB60OcnCRGARlxcXDREnCOH50DS8WHDBAzGeiMH6a\/hSdzh4OCA2LRpU+ithnfNiVQhAO8ZJAQZIUp4R27dNnGg0YdsiIBlk\/sSdbqbtV6j0Pd2vaWlZU3EcijopMyqfY2Y5LoJqMlXkm\/A0UCcbnx8LIzX9TakfPf1IgBtOD+\/EJhkeMoZdV+9JKyLgDb5EMMbG9vM5Kv3raWsPEi4sHBFIKZI06R1k7BmArrkg+bjeF7K2NTg48AMQxM2QsKaCMjka\/DN9FG1RkkYTLZuABTF+F7CmA9mlzXfBmD16WVYQ3ADHAFXwBkdKdkAjw0JWLjw38PUxm44HBjzsdndANE+vgxuWH7Bbr+46eBGcKxrgk+fn91PK1R+joa3bBlnh2MjNPm6RgCOyeXL83oFjiqJA7feeOOROGhiCRiM+7x3MMMxOjrKsxtxCHJ+JAKIE167dg3X5ihGeGdceCbeBBexqEDlsIqFp9YiMebMdRAAZzA7RpIrrxOILB1JQJheWu64F+M+zO2yMAKNIGBNzU6d\/ujc3qg2IgnoeVIPHkE+syo5qjLnMQLrIQDfwSgwWu9+OMorriJg4eKHB800G8wvCyOQBAGYYr0elEIz\/sqwXrhit1dFQAoo7keBTZs32eU4zQg0jAAWJUOkJ59wtWAFATH2g\/YDY3kVc8N4c0UHAYtP+ntC9uUKApqx3+AQLyi1QeJ0cgRCLRh8SS1sMCRg4fxZ\/f1cOB089gvx4USTEIAWLM+iTQVf0w0aDgnoe95+ZA0M8BeIAmj4\/2YjYBQbTZRMmbZDAkqVuReZbH4NNHxsNgL4Wi6EnBHNNaQ1AQuXLuVoCcNuZLDzARRYWoEANiQIzTC+P06iCVgqrUzhhMkHFFhahQDIBxJqKY1O4agJKJWvtZ9H+7KwMAKtRAB7\/0B8vzSFY3kMKD+Hk4GsnjxGkoURaAkCesEqtSwp3owOAg0o5CSlaTVrmY84YWEEWoAANqPSkvG00iszLnBADDtb0C83yQhoBMpOiF62jwxP70yKBAWgWRiBViMAAhqugXsetsVFp1EbP7b6Zrj9\/kQg1ILEPa8kPR2PoeBgf6LBT912BLJlTxj7gXsZpSZxB9gGl4URaAcCRgNiM3qPdg0OItJkm1kYgXYgYAhInkjOM\/GYtcx23AL30c8IGCfEk97Nod1lAvYzJTr37PS9c3kzuvfMHF3n7oV77hMEjLJTpdLWUAP2ybPzY3YBAqHD63lbmIBd8EL6+RaySujfZdO\/UtQNQHzipz\/qhttI7T28\/53vd\/zZwkkPxAFpWUIQiOYwTMdfTD\/eAJvgfnzrXfTMTMAuehn9eCtMwH586130zJ7QPw5Nc8H0j4URaAcCJg5Iu3DkSAOWnRBeDdMO7LkPQiAkIO0dyCaYKdFRBJiAHYWfO2cCMgc6igATsKPwc+dMQOZARxFgAnYUfu6cCcgc6CgCTMCOws+dMwGZAx1FgAnYUfi5cyYgc6CjCDABOwo\/d84EZA50FIGu3xK\/G77D0NE3lLDzbv+ODWvAhC+YqydDgAmYDD+unRABJmBCALl6MgSYgMnw49oJEWACJgSQqydDgAmYDD+unRABJmBCALl6MgSYgMnw49oJEWACJgSQqydDgAmYDD+unRABJmBCALl6MgS6fi64kcd769z74t2PLoiz85fF\/Mqy2DE2LsaHhsVdN+0Uuz420UiTus788rJ4tfBPcXZhPmzjro\/vFHff9InwPEkC9+3Krusn9L27+Wk5Tw0BQY6f\/eWP9PmTQDpOdoxtEQe++CXxyB2fjisSmY92D\/\/hzeq2\/yCI4FvE8Ye+LnaOj0fWrSUT5Hv0xPGqorjXA1+8pyo\/LRmpMMGnPjov9jx\/jAjy+2qCOG\/q7MJl8d3XX6GX\/WtxZn5NkznFKk5BvEO\/ez22bbT56Mu1t1fRePnkxb+fisoWrxVOR+anJbPnCQjy6ZdPJKhH3jp3pibSwNyC2LaMDw2JnWTWbQEJv\/f6b+ysutKvFv4VWR7P99YHZyKvpSGzp00wyPH4KyeqNBNMIkzsp2i8B7JAXvz738Tb9CLPWEQ1pDm+9+ux7xLaz5Zvffbz2oRjTKk1H5lN0yZIPb+8VPeY7dX\/nK56BrvPt8k8301jzTRKT2tAkMO8fPNyQJDff+NxTZIH8reRgwAnYaf4yVf2iON7HxUP5D9piuojSIOxY5zAkTECMh\/88ldCgoHoT9IYzRbbQbHz10u\/+I+\/VVx2HSWMP9MqPUtAvOgXSKvZAvKBIHECwjy7Z2+VJxyMHZfiqoX544PDYdokovLMtVqOgWddaX4Pfvm+UHOjDZRJqxnuWQK6phHkgsdYi\/zgnkqSBiSIHuzD1BqByXUdlx+++bq5rL1hmP16xB374TnuorAOtLctr8WMEe0yvZjuWQJicG4Lxkg2WexrbhplYZZteZtMcZQgzmeLcTSggbUnbY0p6w3toF2MTW0xxHv49s\/Y2eIFMtMYX6ZNepKA0FjvOgR8uM643v23OGPBGE\/zkds\/TR7vlvC9Y8z47VdeEg8+f1QgbQQB41o1sKkDEtttIN+QOPiDChwo5OOZT1FwPW3SkwQ8dfHDqvew6\/ptVXnrZezYvEYqlIN5jRI4Hj8mB8aWVyk2B0IYgTaFg1OvvPXB+xVVYH5tEw7y2\/LcX+OdJbtcL6V7koBRANdqfk3dXduqCXvG8nhNORyhjVzv2VyH04MwTr39o36c+TVt3+967KSl02aGU0NA89JaccQsiOssoB9ox\/snK015rf2vZ35NG1FmGNo3TdK3BLy8vFL1HreUg9bmAszsnuPH9PyyybOPuP44jQdtrQRTji+Dm48bKjL1XUK75teUc82wqzVNuV499iQBbafAAB9nPs1192gHmM0114weohDLqYuV3jYWBtj94\/qh371hmqgKjJuZmLBAOfHcnyuDy9B2CKq7H3tMiKpwWmzCu+322nlPTsVFBX\/fJSLsHK90LNZ7Ge86jow7+4DpMVd7YawHh+ORO3aRF3wsdEQQItlBK2FATiwDs8UlNa7Bm3VncNCX25\/djp1Gf9\/67BfsrJ5N96QGhFapiuNFhFG+S4sD7vnlM\/oDU2oHkd3VJ66mcafHEB4xfcJcYvmVLZhNwZSeq9mivPPn1pn6s9uMS79GfxxpkZ4kIMB3A8TQCjbBUAYa6TItSD1D8TaYSozXINA0rgZy44iumXOvQ2NiftkWmGK73QduuS3SO8aiiCSSJjPckyYYLw8myF58ahwCxOOM2YOmevbBfXrZFeqAhFgL6BIA5Yx2Q7ko0WNGZ\/YEWhHerDstaOpHechYeGqTFGWf3bNPe9SmXtQRwW879ohnT8NC1Z7VgDDDWHxgCwiGVcW2JsTg3n5RUdovagbDNckwra5WRN+oGxUjxJSamdWw79E1\/dCk9qod\/CFEfVxv2P0jsvvopXTPEhAgg1iu8wAS3vOrZ\/Q8LTQTPiBOnDcKEkcRxQ0Co90Hn\/8FeaHva00EbYQ0NKobUsG9naXV1lGEdYnzMDk0tYh7PzDDaVgh07Mm2Lw0LK\/SWs+ZStMvyJqrNeXtIzRX3PItaM7AzK9Nf5kFqHYbcWkQFmPCn3x1bZwIz9o1v1FmOqpNE5S2zXAaFqr2tAbEi8L47ZWvPRapxaJepJ0XFQu0r2NdXj3hDmhTO0YIx8geH742U7nuD9q7ntCRa4bTsFC15wkIwsC8wiPFSmiY0zhzi3x7vBZoqbX1fDb5TBokRNuuqTfX0SbGbIgRBvPCcILWVrEgPINxJzSXG+er1fyavlwzrIcBCT1q03anjvI\/F\/6r0Pl1123t1D1U9OvuadzoHtEgF14QtNwOClBDU5ovEmEdH0y0kVo1HcZ0py4G3zdG3U9tIw22OfjOsWmr247NwrPZz\/W\/\/13STfb8GDAOGKzP0+KETpCHsAe+xmnGY9BSWIUcp+WChqBph4NwTUSbpgwf60MgtQRcDwaYyDfJXLN0HoFUjAE7DyPfQaMIMAEbRY7rNQUBJmBTYORGGkWACdgoclyvKQgwAZsCIzfSKAJMwEaR43pNQYAJ2BQYuZFGESACyjlUVr6eEGm0Ha7HCNSMgFIh1+Y8IVVAQBFm1twQF2QEGkEgJKAUc10\/E+LOZTbywFynexHgMWD3vpu+uDMmYF+85u59SCZg976b1N6Zb5wQJeeyUokZcj8mS74vPK\/zfGx0\/V9q31YKHyx0QoQiL5iFEeggAp4vBMcBO\/gC+r1rTyqld2ZUiqjIwgi0AQG\/VNK9SCln2AS3AXDuohIB44Mg11NSzCDhkxPCwgi0AwFjbX3lv0d+bzAXHLrG7bgD7qOvEVjzguWcVyrPBQtbL\/Y1PPzwrUbALwXW1sMYMENxQHRYLAYDw1Z3zu0zAqEGVD7FAYsBAcNMxocRaDECmPTQQtzz8tu3z+AETgivCdSw8H8tRsA4vOBeEIYpe8KK1wS2GHpuvliOAdJC6JNAQxOQ\/A99srq6yggxAi1FwAShhV96Dx2VNaCvT9bY2dJ74Mb7GIFisaifXnm2BhSZaeT67AlrcPi\/1iFQKnPMk96aCc5kBqfRJTQgOyJAgqVVCKyWNaDIXJtGH9oE57dunZNCTCMUU\/Q5Htgq8Pu93ZB85IDkt+bnQgIiQUGY3+K4slL9G2rIZ2EEkiKwshT8xK1SJc01tBc4IUFiGhkrET\/ih3wWRiApAkYDeiJ71LQVEjC\/bfu0McOmoCnER0YgKQLLtF2yDkDT1G9+YkI7IGgzJGC5g5dxXLq2WD7lAyPQHASMZZVCHbJbrCRgdugotuqABmQtaMPE6SQIhHzS2m87cWxNKggIb1gJ\/2lcZi24BhKnkiFw9cpV3QBFWY65LdGwr1IKly7l1OryO0KKydHRETE8PFxZgM8YgToQwNjv6tVrtPuVmLll4sa8W7VCA+Kijgl68gDSi4tLHJgGECwNIYBlV+AQxB37mQarCIgL+Y\/dcIJUow5MX7kaqE9TgY+MQK0IYBinl\/kJcSI\/UTn2M21EElBfLKpvaoeEVsgsLQUsNpX4yAhshMASBZ2X9aQGfe+jqLRFjaoTS0AsFpSidAiVoEbDVaxRrXAeI2AhEJjeIJQnlX\/ALHq2ioTJWAKiRH7bTUeU9J\/GHPHC\/AKTMISNE3EIgHzgCjhDX798Os70mvo01FtftFdcXHmD3JjdmUxGjI+NCeltWG39RvlqKhEA6eahqLDqmRY5k9d750YPuq4GRGXtFRf9fXCj0fD8ArGb95PeCNe+u+6Qb0aW1L5aQKhZlRVmZydVRr6B+CBrwlqg7Z8yEeS7b71xn41MzQREJZeEm8c2i0wX7CloPxCn24sAxnxXFq4YswvNVzP5cKd1ERAVbBJiQ8ux8TEmIYDpQzEOh1nlUi\/5AFndBEQlTcKs9xIcE9piS4yMDPOUHYDpI0Gcb3FxUXu7cDgw5qvV7NowNURA08C\/Pzp3RCrvCZwPDQ6KYZo7ZpNs0EnnEeM9LC5YKX+FF6EW7+ryU\/l8sMS+3qdOREB0Vjg\/u19J7zBpwxxMMrThEP0iOUv6EKjQerRsjyJ9h27dduORJE+amIDoHCZZZOVh2ux8L85BxE2bN4mBbNf\/Dg5ul2UDBLCeD\/O61hrRaUlTtY2YXLerphDQNKq1oZAHEapBHgg4ODTIGtEA1GPHKuJRLFhPrd1w04lmPUpTCWhu6t8XZp+SSj5miAiNqMk4PMRa0YDUhUeM7\/Dd8FVaRLBMX07DeSAwtzTWu7J0pNGxXtzjtoSApjNXI5p8kDGTzYgsHT2a3svSh6W9CIBc+GA\/IMxwYccCvTFBSDp9P9NEkJfFlcWjzSaeedqWEtB0Ujh\/frcvivulzNyL0I3Jt4\/QkvCgEdbxMsER6eB8jaD6nPJtMeXsvLSnDYHc50RsDqLoaDSYXpNXJhw2IkW+jt25lYPzaaLmb2mOdhrflIwu0rzcyjfZvHZjWyoUCjkxNjpFG1Tv9oT3OVLyk3GkjG2ELzQHAdqWj4ZKJ31Vos3CaX+ghWvTrdJ0cTfcdgLG3UjgSRMZpZejP9FJ+vvNecq7WZeXatLUU0LmhFQ5c66PivKofEVe6k9oc3mzv7f1rPjpteCUrqvgR4h8SbvRU9gE+4HrLZlpZ9JmeLBWtw0n\/w+IOsoy1qfzJgAAAABJRU5ErkJggg==" + }, + "copyright": "Copyright 2023 Dify", + "privacy_policy": "https:\/\/dify.ai\n", + "position": 3, + "chunk_structure": "qa_model", + "language": "en-US" + }, + { + "id": "982d1788-837a-40c8-b7de-d37b09a9b2bc", + "name": "Convert to Markdown", + "description": "This template is designed for converting native Office files such as DOCX, XLSX, and PPTX into Markdown to facilitate better information processing. PDF files are not recommended.", + "icon": { + "icon_type": "image", + "icon": "9d658c3a-b22f-487d-8223-db51e9012505", + "icon_background": null, + "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAQfElEQVR4Ae2dT4wbVx3H35vxrjd\/dmMnIZA0UrxtilQuTYUEB5CySD2CSJE4Vl0uHIpQk1sFh7YHqt7aCsGBS7fqEQlSwRGpi8QFJMRyQoKEdaR2U9qkdva\/vfYMv+8b\/7zjsZ2xPTP22PN70u6bP2\/en+\/7+Pf+zMwbrVLiNu9XSpSVUpP+tOsUlKsKtH\/l4Z6rXNrW2uyrc6cthAs6hMVfllyVCou\/Y+eq6sM9x3+sfO6Uxvl7Squqq6yyTT7tl5cvFss4MWmXG3cGNjcrhWZerWjlXFdKlyj9a\/RXcogyOCMX\/nsbBJ93vOWZMPLPKFCg\/\/g7dqRZl070y2Wn6VfteHKqu1tfUGC1QTqX6aJ\/utrasGtqfXm5CEDH5o5zl2CSZN1WKPrrBNMKlR\/bXc6yLKUtrXK2rTSJhj8c+3zboeN0riXkVwrdvxkO3xXpDB\/AD5N\/nFxM7P\/vEbUhLec0m+r8okXhHBPWcRwCkCBskk\/bPZ2B0l23ctb7yxeKGz3DxHgwMQBh6Zy8s0oofd8PHWCxc7YBzSbY5ubm2sD1KtdnBKDfXViy\/LuyHVBgGL2aBChgPGocqQZtN44agdhU2XWcN65ePr8WPBHXfuwAAjy1oF6hX9pNyqRpIgBdPj+v5ufmDXxszQYpxDCCDhLfrIeJqhcgrNVr6oh8n5UsW1qvUb\/xjbj1ixXAO1sPblDD+TZlsoSM5uZy6uTCCeNjfxQXVdBR0pzma+LUq1arGxh9ljF2ixgLgBjBUv\/jPW5q4wCPIYhTUI5zlv0k9AKAu3t7fot4myzirThG0pE7VJufVtDc\/gPwoWk9efKkWlpcjGT1ZhmQaSwbDEqhcEadOnXKDAypDDdQ53c+frAatTwjA4i+3uZW5W3Hcd+hTBTm5+dMJhcW8lHzJNenVAH045eWFk1\/HnVOsxPv3d16iC7XyG6kJhhNLoH3e5pDugard+LECZUUeEk0KSOrNQUXjkuvw8OaOjg48KaCaOrGsvQLozTJQ1tAA5\/rfgT4ME935sxSYvBNQX1nNoswOKh7MAAWqEn+CGwMK8hQALbho1Eu5vBgjk0Ghk1Vws+EAqh7MAAWyOFu1tAQDgygwDcTzMReiKgQDgRgL\/iGmUyOvdQSYaoUAAujWsKBADQDDl+zK\/Clqv5TkZkuCGmQau6KheQuFEBMtaCTCVO7uHi6\/VBASLxyOoMKAEIwYsYFGJjkndfCZHgsgHfuP1il5yhuMt0m4rAY5XymFeA+oddK6ps0T4hnAvq6vgCi36ddc1\/XzPMJfH01lBMBBcAK5oY9p18DS4Eg7d2+ANKQGjPcBcx+JzXJ3M6FbMycAmAGd8fIFfCcQL8C9gQQTS9dcKOT5H5RyHFRoLcCuHeMphjPCdzZqtzoFaongNT0ms4jzKg0vb1kk2ODKAD4uCkmDN\/uNSruAvDu\/QrgKwE8NL\/iRIEoCqApxtM05ErOvNM1IOkCkO4uryL0aTKf4kSBOBTAQ8nGaf1K0Ap2ANjq+5VAbIvaONKXODKugI8n856QX44OALnvl5+XZ\/r8Isl2dAXYCuIlNX9sbQA3P65coxPS9\/OrI9uxKQAryCNimhdc4YjbANKboqs4OOd1GPm8+KJAbArwoJbetlvhSNsAKktfx0Fpflka8eNWAK\/lwpElNKyZbfzDyMTJuxVsnz1bhJcaF3zEPDUZm5KMpOlFfqzcUK0+Mo\/xWzVdxDIgxgI2880V6Ckj3ymhakqziT4gVsWAw\/pA8A2A2tUYgKic5Z3EtjhRIAkFsPaPca1+oNcH1PpZHMzROi3iRIEkFWi9P4KOYAnp8FJTZse2PR5xIi0uTX2YtGgyzfnAYlRw1Bobo8fEmSa4Tec0l1DynmoF0A9suRJ8ix8WlKdeWrKIl6gCAJBZA3sWrQhXQopWCpvfRJWQyCemgN8KWtptFpATWu1oYhmShLOlQI6nYprNEi2Kq0sovqW5O4g9caJAcgqwBaQlmQu0gHBrFVNCUZwoMA4FGECwZ7na6wO2D44jB5JGphXgQYilrCvtdlcAzDQTEys8AaivIHVbbsNNrBKyljAbu6Zyi20LmDURpLyTU4AHvDTsOCMATq4eJGVSAGNfMw+IrxSJEwXGoQDf9HDxCggl6AEoE9Hj0F7SCCggTXBAENkdrwIC4Hj1ltQCCuQ+33EVlo+pWw49pRA4G8Nu1Of5vvpqNYZcZDeKf79lelgjC5DEOzn4Bt32jvcRShp6uNIHHLl65MJRFOB5QLqW7gXLIGQUDeWaCAoEAYwQlVwqCkRTIIcvasOdjelD0En0GaIVUa6OU4GofXrOS67hcZfAsIOTEF8UCFdAAAzXSEIkqIAAmKC4EnW4AgJguEYSIkEFBMAExZWowxUQAMM1khAJKiAAJiiuRB2ugAAYrpGESFABATBBcSXqcAUEwHCNJESCCgiACYorUYcrIACGayQhElRAAExQXIk6XAEBMFwjCZGgAgJgguJK1OEK8BrR4SGnNETwnYhXf7uvfvf3+kilWf12Xv3su\/wpei+KqO+sBPMXNb6RCjbBizJnAd\/64Un1zMXhP0fxzCW7C74J1tvMJJ05AFFzH\/z4tLo8xLI4CPvrF+X7yUlQn0kAl05oA+HSQvhyJIAPwD4xBLBJVNSsxplJAFGZAApghblfkeUT+MJUGv18ZgGEZOjXoU\/Yz\/38eydMmH7n5Xh0BTIH4F\/\/Sx+m8LkffH1e\/fT5Bd8RbxPHXvpW55fj\/7XV7AonB6IpkDkAf\/LBnvq44i0LwdIFYcN0SxBKXPMyXSsuXgUyB+D2gate\/M1uF4Robr\/5ZM40ucG5PsCHaz4JgBtvVWQztswBiGoGSLCE24e0RKLPYcARnG5BGIQV+HxCxbiZSQChH\/pzb\/7hoENKTM8ER7wII32\/Dpli3cksgFARt+R++afDvoLi3Ki37fyRYqCDv1Hd81+bi3T9qOmO47qZvxccJiIgg+ULjnjX\/lJ7LJxh8fJ5gOef6hkW6KjXcz7S6mfaAnKl\/IKaWf\/0zN9oqubNP3Y2zxx2GD8ID0AcxhL2uh4DpVlys1WaCDWDUe44HFvDMEsYhI\/z9g0C0P9j4ePT6osFTLDmABke\/wq6MEvYDz50Fx7XZw2mMw37YgETriW2dGz5OLngPh\/PEnwos1hArvkE\/cdZwmCyvcCcRcvH5RYLyEok7PezhGHJRnmCOyzuNJwXCzjGWuhnCftlYdbhQ7kFwH61n9DxQSHMAnwCYEKQhUUbBmFW4BMAw0hJ8Hw\/CLMEnwCYIGCDRB2EMGvwQaOZHwXH\/Z5t3PEBQnb+bT426\/7MAzgNFZhF8LheZBTMSog\/EQUEwInILomyAgIgKyH+RBQQACciuyTKCgiArIT4E1FAAJyI7JIoKyAAshLiT0QBAXAiskuirIAAyEqIPxEFBMCJyC6JsgICICsh\/kQUEAAnIrskygoIgKyE+BNRQACciOySKCuQe7DjLdbYyHUu2sgBxBcF\/Ap8th0PJ9UWd2IB\/erK9tgVAIBVpOq6nYs1jj0nkmBmFPCxVrVcpQXAzFR9OgrqB1Df3fpik7JVKhTOKMuSFjkdVTTbuXAcR1Wrj1DIshA323Wd+tIJgKmvotnOoAA42\/WbytK5TnvAi0GIKiOXTjOe+Z1UllgylSoFeBBCn4qsigVMVdVkLzMWKESxHZkHzF7tp6DE1AS7ZjzsutIEp6A+MpGFpuN99FG7WqZhMlHjKSukv7G1tNsahNDkoDhRYBwKcGvrKOeepXTrXvDx0HgceZA0MqwAj4LBnuVq17sXrNpzMxmWRoo+DgWardbWVVaZBiF2GYk2GvI18HGIL2kcP3llwwLSAoFliNI2i6KQKJCwAr6bHmVr+WKxjPTwhILMBSasvERvFABrcGCP74SUzRH\/+NgckH+iQLwKNI+7ehuImZfoxU7p6OhI5fP5eFOMGFtc7yBEzMbUXn5hiW1MOorAk9Bk6+4hR17uHNfs+OhMR24lFzOnQKPRMGXSyjUW0ADoWu46jjZat0hMCPknCiSgQKPpzba42joG0K7Z60gLFlAGIgmoLlG2FWgceRbQrql1HDR9wOXlYvXO1hfrNBez4hCE1hx3DdvXpWYjbX2a1AjTykia+8wMH2V1A8why+0eKs0D\/hkH6vXjD6dgX5woEJcCh\/WaiYqeiDasYacNIL0St44DNQEQMohLQAG2gPa8tcbRtwF8+mJxne4Gr+OOCAfkQOKLAlEVqNVq5mYHxVNevlA0AxDE2QYQOzQ0\/hD+\/uEBPHGiQGwKcMvqOvoNf6QdAFo1YxqrsIBiBf0yyXYUBXw8la9eLq754+oAECMTmoZ5FwHECvplku0oCuzu7XmXu+77wXg6AMTJXN16h7wyqD08PAyGl31RYCgF\/H2\/p54493rw4i4AYQVpwaJbCHhwcCgT00HFZH9gBfDYFRiCC\/b9OJIuAHHi6qXibR4R7+22zCdfIb4oMKAC6Ma1Hr26Hez7cRQ9AcRJW+sfkVfFEzLSFLNc4g+qwOFhTdVr5qZG1dJei9rr2r4Aeg+qekNm0xTL0h299JNjPRTwml5vKo+a3lv80HOPoJ3zgMEAT10qvkO3Td7F5PT2zo6sHxMUSPa7FAB8YAXMgJ1+TS9f2NcCcgD7yHpd081jtOU7u7syKGFhxO9SANAZRvDIvas2rl4+d7MrUOBAKIAYFWutX6Dryk16lmtnmywhJSROFPArYFpJYgOMkCtblmHGH6TndiiAuMq8PKL1d2hTIOwpY7YPdsFHrDyu3+dXayAAcUFPCGVg4tcyk9umz+e3fEPAB8EGBhCBgxDKwASqZNfxgKPd7A4JH5QbCkBcwBDywOTR9rbME0KYjDnM86HuzUQzDThorm\/gZtcv1dAA4mJA+OSls8\/xFM3+\/oHCDWf8IsTNtgI80t3f329PtVj10eCDUiMByBJjmO227phg1htNMm4+i5tNBWD18H2Po\/oRClh1lHsLDPD7HaOUOhKASPDqxeIamd\/n6HHW2zDHe3v7JpPyPOEo1ZHOa1CXMC5s9aj7tY46f\/rSOTw5FclRXPG5O\/crq9p1X6MYS4g1R2\/X5efnI622EHzLS96Kg7L9XZx6ATw8UOAzJmU8KYWHVfrnYLgzsQLISf\/nk4ev0y\/kJdov4Rg+AQYYF+bzxsexQV2cgg6a5jSHi6IX+nd4N7x+VKeuVN308VpamAeV8axolOa2l66JAMgJBS0iHweMOdtWuVxO2Zat7JzNp7r8KIJ2RZaBA4PqBdjwh6edMI2CFQsAH46xIzjoRTX9oVVTa3GD50uDN5PzNz+rXGvWnVW6PXOdinetV0qwkpZNKwZrTVB6PrYf7NA6mgQpuy+fsZXGxyV8DuHwlyXHAAXL\/GnFW3kA6zAjzJdocSL0zTk8FiLFtpk+CV5M+4CuiXfE6TVdvCnZI0ish8Zea5ublUIzr1a061wjap6lDJT6QYmS8hfdudTnFyOPmziqmfSH1KtMImzQdNo9AIflMpKydP3EHjuA\/TKyeb9Sot9uiVbtLwBKepanQGGvPNwzTUKJrzt\/2irQEZzzO+wHj\/nPz+J2lQqFvw73cNcp4wAZOXqIRFXPnTJVfI+ajapL+6RdmRZeKWMuF+Em7f4PpXL0Ed9VCt8AAAAASUVORK5CYII=" + }, + "copyright": "Copyright 2023 Dify", + "privacy_policy": "https:\/\/dify.ai\n", + "position": 5, + "chunk_structure": "hierarchical_model", + "language": "en-US" + }, + { + "id": "98374ab6-9dcd-434d-983e-268bec156b43", + "name": "LLM Generated Q&A", + "description": "This template is designed to use LLM to extract key information from the input document and generate Q&A pairs indexed by questions, enabling efficient retrieval of relevant answers based on query similarity.", + "icon": { + "icon_type": "image", + "icon": "e4ea16ed-9690-4de9-ab80-5b622ecbcc04", + "icon_background": null, + "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAQjUlEQVR4Ae1dTYwcxRWuqpnd2R\/veqzgxXaw2YEgRSDBEkJEwsFLDkE5xRwicogUR0g55GJWKGfjXBPJyyU3hLkFKRLmkohD4uVgHIVEOCggRTGZNTbesDbysj\/end3prryveqq3Z6bnv3t2tvu91Uz9dHVV99ffvqpX9bpGigGR4tLStMiKaUeKaallXgidV1o9iMtzpc5LISiPhI6bsOqLymvtHa\/KT3BCyhXCiD4B0QJpP49wXMRRV7rXCbgVLd3FjKbzymKxcPSoOYbjeyn0XPsrxbvFvOPkZjNanXQFkU2KGaHDSNXf60ppa1e1EItE5H9qqa9mMqWFwqGCT+B+YNIXAhZvL80KoU5qoSkU+NSJUkooYmMmmxGSQnyQB5EUIg3JVPJMovJlywfzkh7XmtCkT1CQdgN5ruNQGaKXdk1Z16XQ1cKhEPEGcpWQXhBavVmYmrraoExk2bEREJrOLY+epgZ+RFc7a68YZMlmMoZoGQqHhoZ8wtkyHPYHAYcICjKWd3aEU3bETrlc3bAUi66rz31j6uiF6gPRpSInIIgnymNntBQv079dHpcK0uVyw2JoeNiQz2qz6G6Da4oKAZBwu1QSOzvlXS1JRKTx5IXC4fvPRdWOrSdSAl774tYplVHn7ZhuKJsVI2OjAiHL\/kOgVNr2yGg1YwwaMRICFu8uTeuyfIMgngXMTDygkByBVtxY3\/A1Ig0rL6qsnisc6t2S7pmA179cPuNo\/Sq6W3Sto6OjYmQklxz0+U58BKARNzc3LRFXyOCZ63V82DUBvbHe6Fn6b3gZVzg8PCTGx8d9a9W\/ao4kCgFYzyAhyAjRQs0\/fHhqrtub7IqAlS73bWp0hrVet9Dv7\/O2tkqGiJWpoKsyq1\/opkvumICGfI68BEMD83STkxP+fN3+hpSvvlMEoA1XV9e8LhmWckY\/1ykJOyJgkHyYw5uYOMDk6\/SpJaw8SLi2ti4wp0jLpB2TsG0C1pIPmo\/n8xLGpi5vB90wNGE3JGyLgEy+Lp9Mik7rloTeYmsLoGiO722M+dDtsuZrAVZKD6M3BDfAEXAFnDEzJS3waEnA4u3\/nac6ZmBwYMzH3W4LRFN8GNwI2AUzbnn8bCs4mnbB15aXTpOHyhuo+ODBSTY4WqHJxw0CMEy++mrVeOBoR8w9fOTIfCNoGhLQG\/epD7HCMTY2xqsbjRDk\/FAEME947949HFuhOcInG03PNO6Cy3Aq0Hl4sfDSWijGnNkEAXAGq2Mk+YqfQGjpUAKi6yV3x1MY92Ftl4UR6AaBwNLs7LU7t06F1RFKQKWkGTyCfNYrOexkzmMEmiEA28EqMPJ3Px9mFdcRsPjlF2ftMhu6XxZGoBcE0BUbf1CamnG3R4zjSrC+OgLShOJpFBg\/MB4sx3FGoGsE4JQMkUqeqdWCVQTE2A\/aD4xlL+au8eYTaxAI8Mm8JxQ8XEVAO\/YbzrFDaRAkjveOgK8FvZfU\/Ap9AhaXb5r3c2F08NjPx4cjESEALVhZRZv1XtP1KvYJ6Cp1GllDQ\/wCkQcNf0eNgFVstFAya+v2CSh15iQyufu10HAYNQJ4LRdCxojhGuKGgMW7d\/PkwjCDDDY+gAJLHAhgQwK\/G8b74ySGgI6zPYsEkw8osMSFAMgHEhpxxmYRGgJK7Rrtp2hfFhZGIE4EsPcPxHWdWYSVMaB8AomhrFk8RpSFEYgFAeOwSjVLmm9GA54GFHKa4uTNWuEjEiyMQAwIYDMqIxlllF6FcZ4BYtkZQ7tcJSNgEKgYIcZtHxnK7EyKCE1AszACcSMAAlqugXsK2+Ki0bCNH+O+GK4\/nQj4WpC4pxypzHwMTQ6mEw2+674jkK1YwtgPXGW0nsYVYBtcFkagHwhYDYjN6BXtGuzNSFPfzMII9AMBS0CyRPLKzsfsZvbjEriNNCNgjRAl1YN+v8sETDMl9u7e6b1z+SCaV3aNbu+uhVtOCQJW2WnHOeRrwJTcO9\/mACDgG7xKHWQCDsADSfMlKC3wu2zUBbMVnGYe9PXe\/UUPzAOSW4I3Ec0E7OtD4MY8BFL7AsiJ3\/0m0Rz47Je\/2hf3x2PAffGYknuRTMDkPtt9cWdKmB+HprVg+mNhBPqBgJ0HpF048qQBK0YIe8P0A3tugxDwCUh7B3IXzJTYUwSYgHsKPzfOBGQO7CkCTMA9hZ8bZwIyB\/YUASbgnsLPjTMBmQN7isDArgUnfa12T5\/6ADXOGnCAHkYaL4UJmManPkD3zAQcoIeRxksZ2DFg7cPYL\/5ttdfdbjqtY17WgO0yhMvFggATMBZYudJ2EWACtosUl4sFASZgLLBype0iwARsFykuFwsC+8YKjuXuG1R65dZn4sWLb1UdfevUT8R3jx2vyuNE7wiwBgzBcHVruy735upXdXmc0TsCTMAQDFe3t0JyOSsOBJiAIajeXKvXdmF5IadyVocIMAFDAPvkzu263Jtrq3V5nNE7AkzAEAxvhGjAK5\/fCCnJWb0iwASsQRCa7pM7yzW5QqALvsGGSB0uvWYwAWsQvPL5ZzU5u8k\/\/PtfuwmORYIAE7AGxvkP3q\/J2U2+\/tE\/xGqJLeRdRHqPMQEDGJ7\/4LIIG\/\/ZIqulkjjfhKC2HIftI8AErGAF8rVDLmhBlGWJBoHUL8V5Wu2yALHaFRAV5809\/T0xmRtp9zQuF4JAagkIAr3+0d8N8RDvVEDYd4vXDAmfOXZCHJ+c7LQKLk8IJJ6AcCyw67iYYsHnr2Tp3ohgYhlTM6\/85U+GSI99bUo8QCR89D4KJyaNZpzM5ciB4QQTrQkCiSdgrVdLEyx6OvTxl8sCH2jFoCT9XZbgvXYTZyOkG9T4nMgQYAJGBiVX1A0CTMBuUONzIkMg8WNAeDLDysUKBowGeLog\/DhkvbcXVI+T4fHM108YA+SBiYOmqgcmvbCXepN+buIJ2MiNHiSEhwuW3pqtfjQjAKzclx7\/Nn2+xfOBzYBqcizxBGx079BSP\/7mQfF84REzF9jp6sZLjz8V60R0Wqzn1BLQEhNaDCsakHZJOPf0s\/45th4Ou0OAjZAKbiAhutNWYjVfq3J8vD0EmIABnLy13VwgpzqKbttqy+ojnOoWASZgADnPqHgqkFMdfekJNjaqEek9xQSswbBZN\/yD6UdqSnOyVwSYgDUIQguGebY8Rk4Gx3lerwat3pNMwBAMnwnZggOeLizRI8AEDMHUrmQEDz1K7lYs0SPABAzBNIyAYXkhp3JWhwgwAUMAmxyud7PH2JAlegSYgCGYTo4M1+Xyux91kESSkfqluDAU4UaflrXYsPvvZx5rwH6izW3VIbBvNGC3v6PRjSbr9Y25OpQ5oyEC+4aADe8g4gPv\/vc\/4teXL3XtIxjx5SS+OiZg5RHj9c35v70vrtzibdj6yfrUExDvCb\/y5z8y8frJukBbA0vAbsZuuK92x4p2nNdsPxg4nrK7fYAtMUQHloAx3Kup0hLP22otfEsOvEfy2+\/\/kJ0P4noIgXpTRcBWBgaI9\/J3nuXfAwkQJO5oKgjYysDAOu\/ZZ58Tzz\/E\/n5xE662fiKgXBFC57WrhVSy9vi+T7948fcNDQzPA5pfq+z3Q9Za2yZXskLqFaFFXtOXpL+kSaNpFTYw9u5J+wSUggiYMmEDY7AeeGoIyAbGYBHPXk3iCcgGhn3UgxkmloBsYAwm4XBVrjVCtFzJSi0WySaZdlxXKJUM7yw2MAaXfLgy3wgROnlGyOWf\/oJXMAabf1VXp1whaB6QWEnzgEkQfnd3fz1FJbU2P46rNVGRhRHoAwKu45hWpJSLyRj09QE0biI6BKwNghqVlmIREZeMEBZGoB8I2N7W1e51snuxFhwwjftxBdxGqhHYtYLlinKwFgwJ6sVUw8M3HzcCruP1tgpjwAzNA6LBctkbGMbdONfPCPgaULsrSpQ9AvqZjA8jEDMCWPQwQtxThaNHF5GAEZKUuUBzc\/w1sAhYgxfc86ZhKpYwfAJZGIE4EShX5gDJEfoq2jEEJPvDJHZ2duJsm+tmBISdhKbIdcBR0YCuSeyyk5FiBOJBoFwum4q1CmpAkVlArsuWsAGHv+JDwKlwTEm12wVnMsMLaBIakA0RIMESFwI7FQ0oMvcW0IbpgguHDq3Q60gLmIopuzwfGBf4aa\/XJx8ZIIVDhRWfgIjQJMx7CLe3txGwMAKRI7C95e1EobVjuIYGPCPEiywgY7vEBAQOLNEjYDWgEtkLtnafgIXDRxdsN2wL2kIcMgK9IlCiHw03E9C09FuYmjIGCOr0CVhp4B2EW\/c2K0kOGIFoELA9qxT6XLDGagJmcxewVQc0IGvBIEwc7wUBn09G+x0lju1KFQFhDWvhvobDrAV3QeJYbwhsrG+YCmiW5c3ammjYVy3Fu3fzeqf0IW0TMz02NipGRup\/tKX6DE4xAo0RwNhvY+Me+ZuKxYemjhRqS1ZpQBw0c4JKziG+ubnFE9MAgqUrBOB2BQ5Basd+tsI6AuJA4b77L5JqNBPT6xue+rQncMgItIsAhnHGzU+Ii4Wp6rGfrSOUgOZgWf\/cGCTkIbO15bHYnsQhI9AKgS2adC6ZRQ1676OsTY8adk5DAsJZUArnHE6CGvW9WMNq4TxGIICA1\/V6U3lSu3PW6TlQxI82JCBKFA4fm9fSfQ1rxGura0xCHzaONEIA5ANXwBl6\/fK1Rl2vPZ+Ges3FWMXl7UtkxsxkMhkxOTGRyK18m6PAR9tBAKRbhaKC1zM5OZPV+2Sr85pqQJxsrOKy+wLMaFS8ukbsTsg+Mq3A4ePtI1BDvkXp6BfaObulBrSVFJeWpnVGXsL8IGtCiwqHQCCEfM81G\/cFUWubgDiploQHJg6ITEL2FAyCwvH2EcCYb31t3Xa70Hxtkw+tdERAnBAkITa0nJicYBICmBSKNTisl0un5ANkHRMQJxkSZtXbMExoiy0xOjrCS3YAJkWCeb7NzU3T\/cLgwJiv3W43CFNXBLQVfHrn1rzU6gzSueFhMUJrx9wlW3SSGWK8B+eC7corvJhqURulVwsFz8W+07vuiYBorLi8dFpLdZ60YR5dMrRhLpfr9Dq4\/D5AoErrkdsezfSde\/jwkfleLr1nAqJxdMkiK8\/TvgqnkAYRxw+Mi6FsYjfhx22mRuDPh3XdgI\/ogqSl2m663FrQIiGgrdRoQyHPYqoGeSDgcG6YNaIFaJ+FdcSjuWCztHb\/sYtR3UqkBLQX9entpVellj+zRIRGNGQcybFWtCANYIjxHd4N3yEnghK9nIa0J+huaay3vjXf7Viv0e3GQkDbWK1GtPkgYyabEVkKFS3vZenD0l8EQC58sB8QVriwY4HZmMAnnbmeBSLIO2J980LUxLN3GysBbSPF5eUZV5RPS5k5iakbmx8MoSVhQWNaR2W8EHEvvUtQk6b8oNhywbykxy2Bau8Tc3MQTaHVYMYnr0I4bESKfDN3V3uyl14gar5Ha7QLeFMyvEh0udVPMrp6G9ZULBbzYmJsljaonlFCPUFKfroRKRtWwgeiQYC25aOh0lVXO7RZOO0PtHZvIS5N1+iC+07ARhfiWdJERqny9C86Tf+\/eaXVg6a81NP2PC1kXkidt2kTasqj8lV5iU\/Q5vJ2f+\/AveKn17wkHdfejxC5knajp2kT7AdutmSmnUmjsGADzXYd\/T+j7cbUE7Qx3wAAAABJRU5ErkJggg==" + }, + "copyright": "Copyright 2023 Dify", + "privacy_policy": "https:\/\/dify.ai\n", + "position": 6, + "chunk_structure": "qa_model", + "language": "en-US" + } + ] + }, + "9f5ea5a7-7796-49f3-9e9a-ae2d8e84cfa3": { + "chunk_structure": "text_model", + "description": "In this template, the document content is divided into smaller paragraphs, known as general chunks, which are directly used for matching user queries and retrieval in Economical indexing mode.", + "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/dify_extractor:0.0.5@ba7e2fd9165eda73bfcc68e31a108855197e88706e5556c058e0777ab08409b3\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/general_chunker:0.0.7@a685cc66820d0471545499d2ff5c87ed7e51525470155dbc2f82e1114cd2a9d6\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/notion_datasource:0.1.12@2855c4a7cffd3311118ebe70f095e546f99935e47f12c841123146f728534f55\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina_datasource:0.0.5@75942f5bbde870ad28e0345ff5ebf54ebd3aec63f0e66344ef76b88cf06b85c3\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/google_drive:0.1.6@4bc0cf8f8979ebd7321b91506b4bc8f090b05b769b5d214f2da4ce4c04ce30bd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/firecrawl_datasource:0.2.4@37b490ebc52ac30d1c6cbfa538edcddddcfed7d5f5de58982edbd4e2094eb6e2\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: d86a91f4-9a03-4680-a040-e5210e5595e6\n icon_background: '#FFEAD5'\n icon_type: image\n icon_url: \n name: General Mode-ECO\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751337124089-source-1750836372241-target\n selected: false\n source: '1751337124089'\n sourceHandle: source\n target: '1750836372241'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: variable-aggregator\n targetType: tool\n id: 1753346901505-source-1751337124089-target\n selected: false\n source: '1753346901505'\n sourceHandle: source\n target: '1751337124089'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: variable-aggregator\n id: 1750836391776-source-1753346901505-target\n selected: false\n source: '1750836391776'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: document-extractor\n targetType: variable-aggregator\n id: 1753349228522-source-1753346901505-target\n selected: false\n source: '1753349228522'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1754023419266-source-1753346901505-target\n selected: false\n source: '1754023419266'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756442998557-source-1756442986174-target\n selected: false\n source: '1756442998557'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: if-else\n id: 1756442986174-source-1756443014860-target\n selected: false\n source: '1756442986174'\n sourceHandle: source\n target: '1756443014860'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1750836380067-source-1756442986174-target\n selected: false\n source: '1750836380067'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: tool\n id: 1756443014860-true-1750836391776-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'true'\n target: '1750836391776'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: document-extractor\n id: 1756443014860-false-1753349228522-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'false'\n target: '1753349228522'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756896212061-source-1753346901505-target\n source: '1756896212061'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756907397615-source-1753346901505-target\n source: '1756907397615'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: text_model\n index_chunk_variable_selector:\n - '1751337124089'\n - result\n indexing_technique: economy\n keyword_number: 10\n retrieval_model:\n score_threshold: 0.5\n score_threshold_enabled: false\n search_method: keyword_search\n top_k: 3\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750836372241'\n position:\n x: 479.7628208876065\n y: 326\n positionAbsolute:\n x: 479.7628208876065\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750836380067'\n position:\n x: -1371.6520723158733\n y: 224.87938381325645\n positionAbsolute:\n x: -1371.6520723158733\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n documents:\n description: the documents extracted from the file\n items:\n type: object\n type: array\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,\n jpg, jpeg)\n zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n params:\n file: ''\n provider_id: langgenius/dify_extractor/dify_extractor\n provider_name: langgenius/dify_extractor/dify_extractor\n provider_type: builtin\n selected: false\n title: Dify Extractor\n tool_configurations: {}\n tool_description: Dify Extractor\n tool_label: Dify Extractor\n tool_name: dify_extractor\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756442986174'\n - output\n type: tool\n height: 52\n id: '1750836391776'\n position:\n x: -417.5334221022782\n y: 268.1692071834485\n positionAbsolute:\n x: -417.5334221022782\n y: 268.1692071834485\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 252\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1124\n height: 252\n id: '1751252161631'\n position:\n x: -1371.6520723158733\n y: -123.758428116601\n positionAbsolute:\n x: -1371.6520723158733\n y: -123.758428116601\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1124\n - data:\n author: TenTen\n desc: ''\n height: 388\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 285\n height: 388\n id: '1751252440357'\n position:\n x: -1723.9942193415582\n y: 224.87938381325645\n positionAbsolute:\n x: -1723.9942193415582\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 285\n - data:\n author: TenTen\n desc: ''\n height: 430\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor in Retrieval-Augmented Generation (RAG) is a tool or\n component that automatically identifies, extracts, and structures text and\n data from various types of documents—such as PDFs, images, scanned files,\n handwritten notes, and more—into a format that can be effectively used by\n language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify\n Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is\n a built-in document parser developed by Dify. It supports a wide range of\n common file formats and offers specialized handling for certain formats,\n such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\n In addition to text extraction, it can extract images embedded within documents,\n store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 430\n id: '1751253091602'\n position:\n x: -417.5334221022782\n y: 532.832924599999\n positionAbsolute:\n x: -417.5334221022782\n y: 532.832924599999\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 265\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"General\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" divides\n content into chunks and retrieves the most relevant ones based on the user’s\n query for LLM processing. You can customize chunking rules—such as delimiter,\n maximum length, and overlap—to fit different document formats or scenarios.\n Preprocessing options are also available to clean up the text by removing\n excess spaces, URLs, and emails.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 265\n id: '1751253953926'\n position:\n x: 184.46657789772178\n y: 407.42301051148354\n positionAbsolute:\n x: 184.46657789772178\n y: 407.42301051148354\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 344\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 344\n id: '1751254117904'\n position:\n x: 479.7628208876065\n y: 472.46585541244207\n positionAbsolute:\n x: 479.7628208876065\n y: 472.46585541244207\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: The result of the general chunk tool.\n properties:\n general_chunks:\n items:\n description: The chunk of the text.\n type: string\n type: array\n type: object\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The text you want to chunk.\n ja_JP: The text you want to chunk.\n pt_BR: The text you want to chunk.\n zh_Hans: 你想要分块的文本。\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Input Content\n zh_Hans: 输入变量\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_variable\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The delimiter of the chunks.\n ja_JP: The delimiter of the chunks.\n pt_BR: The delimiter of the chunks.\n zh_Hans: 块的分隔符。\n label:\n en_US: Delimiter\n ja_JP: Delimiter\n pt_BR: Delimiter\n zh_Hans: 分隔符\n llm_description: The delimiter of the chunks, the format of the delimiter\n must be a string.\n max: null\n min: null\n name: delimiter\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The maximum chunk length.\n ja_JP: The maximum chunk length.\n pt_BR: The maximum chunk length.\n zh_Hans: 最大块的长度。\n label:\n en_US: Maximum Chunk Length\n ja_JP: Maximum Chunk Length\n pt_BR: Maximum Chunk Length\n zh_Hans: 最大块的长度\n llm_description: The maximum chunk length, the format of the chunk size\n must be an integer.\n max: null\n min: null\n name: max_chunk_length\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The chunk overlap length.\n ja_JP: The chunk overlap length.\n pt_BR: The chunk overlap length.\n zh_Hans: 块的重叠长度。\n label:\n en_US: Chunk Overlap Length\n ja_JP: Chunk Overlap Length\n pt_BR: Chunk Overlap Length\n zh_Hans: 块的重叠长度\n llm_description: The chunk overlap length, the format of the chunk overlap\n length must be an integer.\n max: null\n min: null\n name: chunk_overlap_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Replace consecutive spaces, newlines and tabs\n zh_Hans: 替换连续的空格、换行符和制表符\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Replace consecutive spaces, newlines and tabs\n zh_Hans: 替换连续的空格、换行符和制表符\n llm_description: Replace consecutive spaces, newlines and tabs, the format\n of the replace must be a boolean.\n max: null\n min: null\n name: replace_consecutive_spaces_newlines_tabs\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Delete all URLs and email addresses\n zh_Hans: 删除所有URL和电子邮件地址\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Delete all URLs and email addresses\n zh_Hans: 删除所有URL和电子邮件地址\n llm_description: Delete all URLs and email addresses, the format of the\n delete must be a boolean.\n max: null\n min: null\n name: delete_all_urls_and_email_addresses\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n chunk_overlap_length: ''\n delete_all_urls_and_email_addresses: ''\n delimiter: ''\n input_variable: ''\n max_chunk_length: ''\n replace_consecutive_spaces_newlines_tabs: ''\n provider_id: langgenius/general_chunker/general_chunker\n provider_name: langgenius/general_chunker/general_chunker\n provider_type: builtin\n selected: false\n title: General Chunker\n tool_configurations: {}\n tool_description: A tool for general text chunking mode, the chunks retrieved\n and recalled are the same.\n tool_label: General Chunker\n tool_name: general_chunker\n tool_node_version: '2'\n tool_parameters:\n chunk_overlap_length:\n type: variable\n value:\n - rag\n - shared\n - Chunk_Overlap_Length\n delete_all_urls_and_email_addresses:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n delimiter:\n type: mixed\n value: '{{#rag.shared.Dilmiter#}}'\n input_variable:\n type: mixed\n value: '{{#1753346901505.output#}}'\n max_chunk_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Chunk_Length\n replace_consecutive_spaces_newlines_tabs:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n type: tool\n height: 52\n id: '1751337124089'\n position:\n x: 184.46657789772178\n y: 326\n positionAbsolute:\n x: 184.46657789772178\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n output_type: string\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836391776'\n - text\n - - '1753349228522'\n - text\n - - '1754023419266'\n - content\n - - '1756896212061'\n - content\n height: 187\n id: '1753346901505'\n position:\n x: -117.24452412456148\n y: 326\n positionAbsolute:\n x: -117.24452412456148\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_array_file: false\n selected: false\n title: Doc Extractor\n type: document-extractor\n variable_selector:\n - '1756442986174'\n - output\n height: 92\n id: '1753349228522'\n position:\n x: -417.5334221022782\n y: 417.25474169825833\n positionAbsolute:\n x: -417.5334221022782\n y: 417.25474169825833\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Notion\n datasource_name: notion_datasource\n datasource_parameters: {}\n plugin_id: langgenius/notion_datasource\n provider_name: notion_datasource\n provider_type: online_document\n selected: false\n title: Notion\n type: datasource\n height: 52\n id: '1754023419266'\n position:\n x: -1369.6904698303242\n y: 440.01452302398053\n positionAbsolute:\n x: -1369.6904698303242\n y: 440.01452302398053\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n output_type: file\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836380067'\n - file\n - - '1756442998557'\n - file\n height: 135\n id: '1756442986174'\n position:\n x: -1067.06980963949\n y: 236.10252072775984\n positionAbsolute:\n x: -1067.06980963949\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Google Drive\n datasource_name: google_drive\n datasource_parameters: {}\n plugin_id: langgenius/google_drive\n provider_name: google_drive\n provider_type: online_drive\n selected: false\n title: Google Drive\n type: datasource\n height: 52\n id: '1756442998557'\n position:\n x: -1371.6520723158733\n y: 326\n positionAbsolute:\n x: -1371.6520723158733\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n cases:\n - case_id: 'true'\n conditions:\n - comparison_operator: is\n id: 1581dd11-7898-41f4-962f-937283ba7e01\n value: .xlsx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 92abb46d-d7e4-46e7-a5e1-8a29bb45d528\n value: .xls\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1dde5ae7-754d-4e83-96b2-fe1f02995d8b\n value: .md\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 7e1a80e5-c32a-46a4-8f92-8912c64972aa\n value: .markdown\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 53abfe95-c7d0-4f63-ad37-17d425d25106\n value: .mdx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 436877b8-8c0a-4cc6-9565-92754db08571\n value: .html\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 5e3e375e-750b-4204-8ac3-9a1174a5ab7c\n value: .htm\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1a84a784-a797-4f96-98a0-33a9b48ceb2b\n value: .docx\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 62d11445-876a-493f-85d3-8fc020146bdd\n value: .csv\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 02c4bce8-7668-4ccd-b750-4281f314b231\n value: .txt\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n id: 'true'\n logical_operator: or\n selected: false\n title: IF/ELSE\n type: if-else\n height: 358\n id: '1756443014860'\n position:\n x: -733.5977815139424\n y: 236.10252072775984\n positionAbsolute:\n x: -733.5977815139424\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Jina Reader\n datasource_name: jina_reader\n datasource_parameters:\n crawl_sub_pages:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_subpages\n limit:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_limit\n url:\n type: mixed\n value: '{{#rag.1756896212061.jina_url#}}'\n use_sitemap:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jian_sitemap\n plugin_id: langgenius/jina_datasource\n provider_name: jinareader\n provider_type: website_crawl\n selected: false\n title: Jina Reader\n type: datasource\n height: 52\n id: '1756896212061'\n position:\n x: -1371.6520723158733\n y: 538.9988445953813\n positionAbsolute:\n x: -1371.6520723158733\n y: 538.9988445953813\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Firecrawl\n datasource_name: crawl\n datasource_parameters:\n crawl_subpages:\n type: variable\n value:\n - rag\n - '1756907397615'\n - firecrawl_subpages\n exclude_paths:\n type: mixed\n value: '{{#rag.1756907397615.exclude_paths#}}'\n include_paths:\n type: mixed\n value: '{{#rag.1756907397615.include_paths#}}'\n limit:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_pages\n max_depth:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_depth\n only_main_content:\n type: variable\n value:\n - rag\n - '1756907397615'\n - main_content\n url:\n type: mixed\n value: '{{#rag.1756907397615.firecrawl_url1#}}'\n plugin_id: langgenius/firecrawl_datasource\n provider_name: firecrawl\n provider_type: website_crawl\n selected: false\n title: Firecrawl\n type: datasource\n height: 52\n id: '1756907397615'\n position:\n x: -1371.6520723158733\n y: 644.3296146102903\n positionAbsolute:\n x: -1371.6520723158733\n y: 644.3296146102903\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 1463.3408543698197\n y: 224.29398382646679\n zoom: 0.6387381963193622\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_reader_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_reader_imit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Crawl sub-pages\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: Crawl_sub_pages_2\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: Use_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_limit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl\n iteratively based on page relevance, yielding fewer but higher-quality pages.\n type: checkbox\n unit: null\n variable: jian_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Crawl subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: jina_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: firecrawl_url1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: firecrawl_subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: firecrawl_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: Exclude paths\n max_length: 256\n options: []\n placeholder: blog/*,/about/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: exclude_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: include_paths\n max_length: 256\n options: []\n placeholder: articles/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: include_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 0\n label: Max depth\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes\n the page of the entered url, depth 1 scrapes the url and everything after enteredURL\n + one /, and so on.\n type: number\n unit: null\n variable: max_depth\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: null\n variable: max_pages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: Extract only main content (no headers, navs, footers, etc.)\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: main_content\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Dilmiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n\\n and \\n are\n commonly used delimiters for separating paragraphs and lines. Combined with\n commas (\\n\\n,\\n), paragraphs will be segmented by lines when exceeding the maximum\n chunk length. You can also use special delimiters defined by yourself (e.g.\n ***).\n type: text-input\n unit: null\n variable: Dilmiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Chunk Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Chunk_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 128\n label: Chunk Overlap Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: Setting the chunk overlap can maintain the semantic relevance between\n them, enhancing the retrieve effect. It is recommended to set 10%-25% of the\n maximum chunk size.\n type: number\n unit: tokens\n variable: Chunk_Overlap_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_2\n", + "graph": { + "edges": [ + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "knowledge-index" + }, + "id": "1751337124089-source-1750836372241-target", + "selected": false, + "source": "1751337124089", + "sourceHandle": "source", + "target": "1750836372241", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "variable-aggregator", + "targetType": "tool" + }, + "id": "1753346901505-source-1751337124089-target", + "selected": false, + "source": "1753346901505", + "sourceHandle": "source", + "target": "1751337124089", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "variable-aggregator" + }, + "id": "1750836391776-source-1753346901505-target", + "selected": false, + "source": "1750836391776", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "document-extractor", + "targetType": "variable-aggregator" + }, + "id": "1753349228522-source-1753346901505-target", + "selected": false, + "source": "1753349228522", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1754023419266-source-1753346901505-target", + "selected": false, + "source": "1754023419266", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1756442998557-source-1756442986174-target", + "selected": false, + "source": "1756442998557", + "sourceHandle": "source", + "target": "1756442986174", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInIteration": false, + "isInLoop": false, + "sourceType": "variable-aggregator", + "targetType": "if-else" + }, + "id": "1756442986174-source-1756443014860-target", + "selected": false, + "source": "1756442986174", + "sourceHandle": "source", + "target": "1756443014860", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1750836380067-source-1756442986174-target", + "selected": false, + "source": "1750836380067", + "sourceHandle": "source", + "target": "1756442986174", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "if-else", + "targetType": "tool" + }, + "id": "1756443014860-true-1750836391776-target", + "selected": false, + "source": "1756443014860", + "sourceHandle": "true", + "target": "1750836391776", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "if-else", + "targetType": "document-extractor" + }, + "id": "1756443014860-false-1753349228522-target", + "selected": false, + "source": "1756443014860", + "sourceHandle": "false", + "target": "1753349228522", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1756896212061-source-1753346901505-target", + "source": "1756896212061", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1756907397615-source-1753346901505-target", + "source": "1756907397615", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + } + ], + "nodes": [ + { + "data": { + "chunk_structure": "text_model", + "index_chunk_variable_selector": [ + "1751337124089", + "result" + ], + "indexing_technique": "economy", + "keyword_number": 10, + "retrieval_model": { + "score_threshold": 0.5, + "score_threshold_enabled": false, + "search_method": "keyword_search", + "top_k": 3 + }, + "selected": false, + "title": "Knowledge Base", + "type": "knowledge-index" + }, + "height": 114, + "id": "1750836372241", + "position": { + "x": 479.7628208876065, + "y": 326 + }, + "positionAbsolute": { + "x": 479.7628208876065, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "File", + "datasource_name": "upload-file", + "datasource_parameters": {}, + "fileExtensions": [ + "txt", + "markdown", + "mdx", + "pdf", + "html", + "xlsx", + "xls", + "vtt", + "properties", + "doc", + "docx", + "csv", + "eml", + "msg", + "pptx", + "xml", + "epub", + "ppt", + "md" + ], + "plugin_id": "langgenius/file", + "provider_name": "file", + "provider_type": "local_file", + "selected": false, + "title": "File", + "type": "datasource" + }, + "height": 52, + "id": "1750836380067", + "position": { + "x": -1371.6520723158733, + "y": 224.87938381325645 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": 224.87938381325645 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "is_team_authorization": true, + "output_schema": { + "properties": { + "documents": { + "description": "the documents extracted from the file", + "items": { + "type": "object" + }, + "type": "array" + }, + "images": { + "description": "The images extracted from the file", + "items": { + "type": "object" + }, + "type": "array" + } + }, + "type": "object" + }, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "zh_Hans": "用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)" + }, + "label": { + "en_US": "file", + "ja_JP": "file", + "pt_BR": "file", + "zh_Hans": "file" + }, + "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "max": null, + "min": null, + "name": "file", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "file" + } + ], + "params": { + "file": "" + }, + "provider_id": "langgenius/dify_extractor/dify_extractor", + "provider_name": "langgenius/dify_extractor/dify_extractor", + "provider_type": "builtin", + "selected": false, + "title": "Dify Extractor", + "tool_configurations": {}, + "tool_description": "Dify Extractor", + "tool_label": "Dify Extractor", + "tool_name": "dify_extractor", + "tool_node_version": "2", + "tool_parameters": { + "file": { + "type": "variable", + "value": [ + "1756442986174", + "output" + ] + } + }, + "type": "tool" + }, + "height": 52, + "id": "1750836391776", + "position": { + "x": -417.5334221022782, + "y": 268.1692071834485 + }, + "positionAbsolute": { + "x": -417.5334221022782, + "y": 268.1692071834485 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 252, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 1124 + }, + "height": 252, + "id": "1751252161631", + "position": { + "x": -1371.6520723158733, + "y": -123.758428116601 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": -123.758428116601 + }, + "selected": true, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 1124 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 388, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 285 + }, + "height": 388, + "id": "1751252440357", + "position": { + "x": -1723.9942193415582, + "y": 224.87938381325645 + }, + "positionAbsolute": { + "x": -1723.9942193415582, + "y": 224.87938381325645 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 285 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 430, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor in Retrieval-Augmented Generation (RAG) is a tool or component that automatically identifies, extracts, and structures text and data from various types of documents—such as PDFs, images, scanned files, handwritten notes, and more—into a format that can be effectively used by language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is a built-in document parser developed by Dify. It supports a wide range of common file formats and offers specialized handling for certain formats, such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\". In addition to text extraction, it can extract images embedded within documents, store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 430, + "id": "1751253091602", + "position": { + "x": -417.5334221022782, + "y": 532.832924599999 + }, + "positionAbsolute": { + "x": -417.5334221022782, + "y": 532.832924599999 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 265, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"General Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" divides content into chunks and retrieves the most relevant ones based on the user’s query for LLM processing. You can customize chunking rules—such as delimiter, maximum length, and overlap—to fit different document formats or scenarios. Preprocessing options are also available to clean up the text by removing excess spaces, URLs, and emails.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 265, + "id": "1751253953926", + "position": { + "x": 184.46657789772178, + "y": 407.42301051148354 + }, + "positionAbsolute": { + "x": 184.46657789772178, + "y": 407.42301051148354 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 344, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 344, + "id": "1751254117904", + "position": { + "x": 479.7628208876065, + "y": 472.46585541244207 + }, + "positionAbsolute": { + "x": 479.7628208876065, + "y": 472.46585541244207 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "is_team_authorization": true, + "output_schema": { + "properties": { + "result": { + "description": "The result of the general chunk tool.", + "properties": { + "general_chunks": { + "items": { + "description": "The chunk of the text.", + "type": "string" + }, + "type": "array" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "The text you want to chunk.", + "ja_JP": "The text you want to chunk.", + "pt_BR": "The text you want to chunk.", + "zh_Hans": "你想要分块的文本。" + }, + "label": { + "en_US": "Input Content", + "ja_JP": "Input Content", + "pt_BR": "Input Content", + "zh_Hans": "输入变量" + }, + "llm_description": "The text you want to chunk.", + "max": null, + "min": null, + "name": "input_variable", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "The delimiter of the chunks.", + "ja_JP": "The delimiter of the chunks.", + "pt_BR": "The delimiter of the chunks.", + "zh_Hans": "块的分隔符。" + }, + "label": { + "en_US": "Delimiter", + "ja_JP": "Delimiter", + "pt_BR": "Delimiter", + "zh_Hans": "分隔符" + }, + "llm_description": "The delimiter of the chunks, the format of the delimiter must be a string.", + "max": null, + "min": null, + "name": "delimiter", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "The maximum chunk length.", + "ja_JP": "The maximum chunk length.", + "pt_BR": "The maximum chunk length.", + "zh_Hans": "最大块的长度。" + }, + "label": { + "en_US": "Maximum Chunk Length", + "ja_JP": "Maximum Chunk Length", + "pt_BR": "Maximum Chunk Length", + "zh_Hans": "最大块的长度" + }, + "llm_description": "The maximum chunk length, the format of the chunk size must be an integer.", + "max": null, + "min": null, + "name": "max_chunk_length", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "number" + }, + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "The chunk overlap length.", + "ja_JP": "The chunk overlap length.", + "pt_BR": "The chunk overlap length.", + "zh_Hans": "块的重叠长度。" + }, + "label": { + "en_US": "Chunk Overlap Length", + "ja_JP": "Chunk Overlap Length", + "pt_BR": "Chunk Overlap Length", + "zh_Hans": "块的重叠长度" + }, + "llm_description": "The chunk overlap length, the format of the chunk overlap length must be an integer.", + "max": null, + "min": null, + "name": "chunk_overlap_length", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "number" + }, + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "Replace consecutive spaces, newlines and tabs", + "ja_JP": "Replace consecutive spaces, newlines and tabs", + "pt_BR": "Replace consecutive spaces, newlines and tabs", + "zh_Hans": "替换连续的空格、换行符和制表符" + }, + "label": { + "en_US": "Replace consecutive spaces, newlines and tabs", + "ja_JP": "Replace consecutive spaces, newlines and tabs", + "pt_BR": "Replace consecutive spaces, newlines and tabs", + "zh_Hans": "替换连续的空格、换行符和制表符" + }, + "llm_description": "Replace consecutive spaces, newlines and tabs, the format of the replace must be a boolean.", + "max": null, + "min": null, + "name": "replace_consecutive_spaces_newlines_tabs", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + }, + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "Delete all URLs and email addresses", + "ja_JP": "Delete all URLs and email addresses", + "pt_BR": "Delete all URLs and email addresses", + "zh_Hans": "删除所有URL和电子邮件地址" + }, + "label": { + "en_US": "Delete all URLs and email addresses", + "ja_JP": "Delete all URLs and email addresses", + "pt_BR": "Delete all URLs and email addresses", + "zh_Hans": "删除所有URL和电子邮件地址" + }, + "llm_description": "Delete all URLs and email addresses, the format of the delete must be a boolean.", + "max": null, + "min": null, + "name": "delete_all_urls_and_email_addresses", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + } + ], + "params": { + "chunk_overlap_length": "", + "delete_all_urls_and_email_addresses": "", + "delimiter": "", + "input_variable": "", + "max_chunk_length": "", + "replace_consecutive_spaces_newlines_tabs": "" + }, + "provider_id": "langgenius/general_chunker/general_chunker", + "provider_name": "langgenius/general_chunker/general_chunker", + "provider_type": "builtin", + "selected": false, + "title": "General Chunker", + "tool_configurations": {}, + "tool_description": "A tool for general text chunking mode, the chunks retrieved and recalled are the same.", + "tool_label": "General Chunker", + "tool_name": "general_chunker", + "tool_node_version": "2", + "tool_parameters": { + "chunk_overlap_length": { + "type": "variable", + "value": [ + "rag", + "shared", + "Chunk_Overlap_Length" + ] + }, + "delete_all_urls_and_email_addresses": { + "type": "variable", + "value": [ + "rag", + "shared", + "clean_2" + ] + }, + "delimiter": { + "type": "mixed", + "value": "{{#rag.shared.Dilmiter#}}" + }, + "input_variable": { + "type": "mixed", + "value": "{{#1753346901505.output#}}" + }, + "max_chunk_length": { + "type": "variable", + "value": [ + "rag", + "shared", + "Maximum_Chunk_Length" + ] + }, + "replace_consecutive_spaces_newlines_tabs": { + "type": "variable", + "value": [ + "rag", + "shared", + "clean_1" + ] + } + }, + "type": "tool" + }, + "height": 52, + "id": "1751337124089", + "position": { + "x": 184.46657789772178, + "y": 326 + }, + "positionAbsolute": { + "x": 184.46657789772178, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "output_type": "string", + "selected": false, + "title": "Variable Aggregator", + "type": "variable-aggregator", + "variables": [ + [ + "1750836391776", + "text" + ], + [ + "1753349228522", + "text" + ], + [ + "1754023419266", + "content" + ], + [ + "1756896212061", + "content" + ] + ] + }, + "height": 187, + "id": "1753346901505", + "position": { + "x": -117.24452412456148, + "y": 326 + }, + "positionAbsolute": { + "x": -117.24452412456148, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "is_array_file": false, + "selected": false, + "title": "Doc Extractor", + "type": "document-extractor", + "variable_selector": [ + "1756442986174", + "output" + ] + }, + "height": 92, + "id": "1753349228522", + "position": { + "x": -417.5334221022782, + "y": 417.25474169825833 + }, + "positionAbsolute": { + "x": -417.5334221022782, + "y": 417.25474169825833 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "Notion", + "datasource_name": "notion_datasource", + "datasource_parameters": {}, + "plugin_id": "langgenius/notion_datasource", + "provider_name": "notion_datasource", + "provider_type": "online_document", + "selected": false, + "title": "Notion", + "type": "datasource" + }, + "height": 52, + "id": "1754023419266", + "position": { + "x": -1369.6904698303242, + "y": 440.01452302398053 + }, + "positionAbsolute": { + "x": -1369.6904698303242, + "y": 440.01452302398053 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "output_type": "file", + "selected": false, + "title": "Variable Aggregator", + "type": "variable-aggregator", + "variables": [ + [ + "1750836380067", + "file" + ], + [ + "1756442998557", + "file" + ] + ] + }, + "height": 135, + "id": "1756442986174", + "position": { + "x": -1067.06980963949, + "y": 236.10252072775984 + }, + "positionAbsolute": { + "x": -1067.06980963949, + "y": 236.10252072775984 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "Google Drive", + "datasource_name": "google_drive", + "datasource_parameters": {}, + "plugin_id": "langgenius/google_drive", + "provider_name": "google_drive", + "provider_type": "online_drive", + "selected": false, + "title": "Google Drive", + "type": "datasource" + }, + "height": 52, + "id": "1756442998557", + "position": { + "x": -1371.6520723158733, + "y": 326 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "cases": [ + { + "case_id": "true", + "conditions": [ + { + "comparison_operator": "is", + "id": "1581dd11-7898-41f4-962f-937283ba7e01", + "value": ".xlsx", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "92abb46d-d7e4-46e7-a5e1-8a29bb45d528", + "value": ".xls", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "1dde5ae7-754d-4e83-96b2-fe1f02995d8b", + "value": ".md", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "7e1a80e5-c32a-46a4-8f92-8912c64972aa", + "value": ".markdown", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "53abfe95-c7d0-4f63-ad37-17d425d25106", + "value": ".mdx", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "436877b8-8c0a-4cc6-9565-92754db08571", + "value": ".html", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "5e3e375e-750b-4204-8ac3-9a1174a5ab7c", + "value": ".htm", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "1a84a784-a797-4f96-98a0-33a9b48ceb2b", + "value": ".docx", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "62d11445-876a-493f-85d3-8fc020146bdd", + "value": ".csv", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "02c4bce8-7668-4ccd-b750-4281f314b231", + "value": ".txt", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + } + ], + "id": "true", + "logical_operator": "or" + } + ], + "selected": false, + "title": "IF/ELSE", + "type": "if-else" + }, + "height": 358, + "id": "1756443014860", + "position": { + "x": -733.5977815139424, + "y": 236.10252072775984 + }, + "positionAbsolute": { + "x": -733.5977815139424, + "y": 236.10252072775984 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "Jina Reader", + "datasource_name": "jina_reader", + "datasource_parameters": { + "crawl_sub_pages": { + "type": "variable", + "value": [ + "rag", + "1756896212061", + "jina_subpages" + ] + }, + "limit": { + "type": "variable", + "value": [ + "rag", + "1756896212061", + "jina_limit" + ] + }, + "url": { + "type": "mixed", + "value": "{{#rag.1756896212061.jina_url#}}" + }, + "use_sitemap": { + "type": "variable", + "value": [ + "rag", + "1756896212061", + "jian_sitemap" + ] + } + }, + "plugin_id": "langgenius/jina_datasource", + "provider_name": "jinareader", + "provider_type": "website_crawl", + "selected": false, + "title": "Jina Reader", + "type": "datasource" + }, + "height": 52, + "id": "1756896212061", + "position": { + "x": -1371.6520723158733, + "y": 538.9988445953813 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": 538.9988445953813 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "Firecrawl", + "datasource_name": "crawl", + "datasource_parameters": { + "crawl_subpages": { + "type": "variable", + "value": [ + "rag", + "1756907397615", + "firecrawl_subpages" + ] + }, + "exclude_paths": { + "type": "mixed", + "value": "{{#rag.1756907397615.exclude_paths#}}" + }, + "include_paths": { + "type": "mixed", + "value": "{{#rag.1756907397615.include_paths#}}" + }, + "limit": { + "type": "variable", + "value": [ + "rag", + "1756907397615", + "max_pages" + ] + }, + "max_depth": { + "type": "variable", + "value": [ + "rag", + "1756907397615", + "max_depth" + ] + }, + "only_main_content": { + "type": "variable", + "value": [ + "rag", + "1756907397615", + "main_content" + ] + }, + "url": { + "type": "mixed", + "value": "{{#rag.1756907397615.firecrawl_url1#}}" + } + }, + "plugin_id": "langgenius/firecrawl_datasource", + "provider_name": "firecrawl", + "provider_type": "website_crawl", + "selected": false, + "title": "Firecrawl", + "type": "datasource" + }, + "height": 52, + "id": "1756907397615", + "position": { + "x": -1371.6520723158733, + "y": 644.3296146102903 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": 644.3296146102903 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + } + ], + "viewport": { + "x": 1463.3408543698197, + "y": 224.29398382646679, + "zoom": 0.6387381963193622 + } + }, + "icon_info": { + "icon": "52064ff0-26b6-47d0-902f-e331f94d959b", + "icon_background": null, + "icon_type": "image", + "icon_url": "" + }, + "id": "9f5ea5a7-7796-49f3-9e9a-ae2d8e84cfa3", + "name": "General Mode-ECO", + "icon": { + "icon": "52064ff0-26b6-47d0-902f-e331f94d959b", + "icon_background": null, + "icon_type": "image", + "icon_url": "" + }, + "language": "zh-Hans", + "position": 1 + }, + "9553b1e0-0c26-445b-9e18-063ad7eca0b4": { + "chunk_structure": "hierarchical_model", + "description": "This template uses an advanced chunking strategy that organizes document text into a hierarchical structure of larger \"parent\" chunks and smaller \"child\" chunks to balance retrieval precision and contextual richness.", + "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/notion_datasource:0.1.12@2855c4a7cffd3311118ebe70f095e546f99935e47f12c841123146f728534f55\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/dify_extractor:0.0.5@ba7e2fd9165eda73bfcc68e31a108855197e88706e5556c058e0777ab08409b3\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/google_drive:0.1.6@4bc0cf8f8979ebd7321b91506b4bc8f090b05b769b5d214f2da4ce4c04ce30bd\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina_datasource:0.0.5@75942f5bbde870ad28e0345ff5ebf54ebd3aec63f0e66344ef76b88cf06b85c3\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/firecrawl_datasource:0.2.4@37b490ebc52ac30d1c6cbfa538edcddddcfed7d5f5de58982edbd4e2094eb6e2\n version: null\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 6509176c-def5-421c-b966-5122ad6bf658\n icon_background: '#FFEAD5'\n icon_type: image\n icon_url: \n name: Parent-child-HQ\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: variable-aggregator\n id: 1750836391776-source-1753346901505-target\n selected: false\n source: '1750836391776'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: document-extractor\n targetType: variable-aggregator\n id: 1753349228522-source-1753346901505-target\n selected: false\n source: '1753349228522'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1754023419266-source-1753346901505-target\n selected: false\n source: '1754023419266'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756442998557-source-1756442986174-target\n selected: false\n source: '1756442998557'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: if-else\n id: 1756442986174-source-1756443014860-target\n selected: false\n source: '1756442986174'\n sourceHandle: source\n target: '1756443014860'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1750836380067-source-1756442986174-target\n selected: false\n source: '1750836380067'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: tool\n id: 1756443014860-true-1750836391776-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'true'\n target: '1750836391776'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: document-extractor\n id: 1756443014860-false-1753349228522-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'false'\n target: '1753349228522'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756896212061-source-1753346901505-target\n source: '1756896212061'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756907397615-source-1753346901505-target\n source: '1756907397615'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: tool\n id: 1753346901505-source-1756972161593-target\n source: '1753346901505'\n sourceHandle: source\n target: '1756972161593'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1756972161593-source-1750836372241-target\n source: '1756972161593'\n sourceHandle: source\n target: '1750836372241'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius/jina/jina\n index_chunk_variable_selector:\n - '1756972161593'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: true\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: jina-reranker-v1-base-en\n reranking_provider_name: langgenius/jina/jina\n score_threshold: 0\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n weights: null\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750836372241'\n position:\n x: 479.7628208876065\n y: 326\n positionAbsolute:\n x: 479.7628208876065\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750836380067'\n position:\n x: -1371.6520723158733\n y: 224.87938381325645\n positionAbsolute:\n x: -1371.6520723158733\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n documents:\n description: the documents extracted from the file\n items:\n type: object\n type: array\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,\n jpg, jpeg)\n zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n params:\n file: ''\n provider_id: langgenius/dify_extractor/dify_extractor\n provider_name: langgenius/dify_extractor/dify_extractor\n provider_type: builtin\n selected: false\n title: Dify Extractor\n tool_configurations: {}\n tool_description: Dify Extractor\n tool_label: Dify Extractor\n tool_name: dify_extractor\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756442986174'\n - output\n type: tool\n height: 52\n id: '1750836391776'\n position:\n x: -417.5334221022782\n y: 268.1692071834485\n positionAbsolute:\n x: -417.5334221022782\n y: 268.1692071834485\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 252\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1124\n height: 252\n id: '1751252161631'\n position:\n x: -1371.6520723158733\n y: -123.758428116601\n positionAbsolute:\n x: -1371.6520723158733\n y: -123.758428116601\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1124\n - data:\n author: TenTen\n desc: ''\n height: 388\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 285\n height: 388\n id: '1751252440357'\n position:\n x: -1723.9942193415582\n y: 224.87938381325645\n positionAbsolute:\n x: -1723.9942193415582\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 285\n - data:\n author: TenTen\n desc: ''\n height: 430\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor in Retrieval-Augmented Generation (RAG) is a tool or\n component that automatically identifies, extracts, and structures text and\n data from various types of documents—such as PDFs, images, scanned files,\n handwritten notes, and more—into a format that can be effectively used by\n language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify\n Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is\n a built-in document parser developed by Dify. It supports a wide range of\n common file formats and offers specialized handling for certain formats,\n such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\n In addition to text extraction, it can extract images embedded within documents,\n store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 430\n id: '1751253091602'\n position:\n x: -417.5334221022782\n y: 547.4103414077279\n positionAbsolute:\n x: -417.5334221022782\n y: 547.4103414077279\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 638\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections—such\n as a paragraph, a section, or even an entire document—that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM). length, and overlap—to fit different\n document formats or scenarios. Preprocessing options are also available\n to clean up the text by removing excess spaces, URLs, and emails.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 638\n id: '1751253953926'\n position:\n x: 184.46657789772178\n y: 407.42301051148354\n positionAbsolute:\n x: 184.46657789772178\n y: 407.42301051148354\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 410\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only\n support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 410\n id: '1751254117904'\n position:\n x: 479.7628208876065\n y: 472.46585541244207\n positionAbsolute:\n x: 479.7628208876065\n y: 472.46585541244207\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n output_type: string\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836391776'\n - text\n - - '1753349228522'\n - text\n - - '1754023419266'\n - content\n - - '1756896212061'\n - content\n - - '1756907397615'\n - content\n height: 213\n id: '1753346901505'\n position:\n x: -117.24452412456148\n y: 326\n positionAbsolute:\n x: -117.24452412456148\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_array_file: false\n selected: false\n title: Doc Extractor\n type: document-extractor\n variable_selector:\n - '1756442986174'\n - output\n height: 92\n id: '1753349228522'\n position:\n x: -417.5334221022782\n y: 417.25474169825833\n positionAbsolute:\n x: -417.5334221022782\n y: 417.25474169825833\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Notion\n datasource_name: notion_datasource\n datasource_parameters: {}\n plugin_id: langgenius/notion_datasource\n provider_name: notion_datasource\n provider_type: online_document\n selected: false\n title: Notion\n type: datasource\n height: 52\n id: '1754023419266'\n position:\n x: -1369.6904698303242\n y: 440.01452302398053\n positionAbsolute:\n x: -1369.6904698303242\n y: 440.01452302398053\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n output_type: file\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836380067'\n - file\n - - '1756442998557'\n - file\n height: 135\n id: '1756442986174'\n position:\n x: -1054.415447856335\n y: 236.10252072775984\n positionAbsolute:\n x: -1054.415447856335\n y: 236.10252072775984\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Google Drive\n datasource_name: google_drive\n datasource_parameters: {}\n plugin_id: langgenius/google_drive\n provider_name: google_drive\n provider_type: online_drive\n selected: false\n title: Google Drive\n type: datasource\n height: 52\n id: '1756442998557'\n position:\n x: -1371.6520723158733\n y: 326\n positionAbsolute:\n x: -1371.6520723158733\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n cases:\n - case_id: 'true'\n conditions:\n - comparison_operator: is\n id: 1581dd11-7898-41f4-962f-937283ba7e01\n value: .xlsx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 92abb46d-d7e4-46e7-a5e1-8a29bb45d528\n value: .xls\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1dde5ae7-754d-4e83-96b2-fe1f02995d8b\n value: .md\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 7e1a80e5-c32a-46a4-8f92-8912c64972aa\n value: .markdown\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 53abfe95-c7d0-4f63-ad37-17d425d25106\n value: .mdx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 436877b8-8c0a-4cc6-9565-92754db08571\n value: .html\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 5e3e375e-750b-4204-8ac3-9a1174a5ab7c\n value: .htm\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1a84a784-a797-4f96-98a0-33a9b48ceb2b\n value: .docx\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 62d11445-876a-493f-85d3-8fc020146bdd\n value: .csv\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 02c4bce8-7668-4ccd-b750-4281f314b231\n value: .txt\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n id: 'true'\n logical_operator: or\n selected: false\n title: IF/ELSE\n type: if-else\n height: 358\n id: '1756443014860'\n position:\n x: -733.5977815139424\n y: 236.10252072775984\n positionAbsolute:\n x: -733.5977815139424\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Jina Reader\n datasource_name: jina_reader\n datasource_parameters:\n crawl_sub_pages:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_subpages\n limit:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_limit\n url:\n type: mixed\n value: '{{#rag.1756896212061.jina_url#}}'\n use_sitemap:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jian_sitemap\n plugin_id: langgenius/jina_datasource\n provider_name: jinareader\n provider_type: website_crawl\n selected: false\n title: Jina Reader\n type: datasource\n height: 52\n id: '1756896212061'\n position:\n x: -1371.6520723158733\n y: 538.9988445953813\n positionAbsolute:\n x: -1371.6520723158733\n y: 538.9988445953813\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Firecrawl\n datasource_name: crawl\n datasource_parameters:\n crawl_subpages:\n type: variable\n value:\n - rag\n - '1756907397615'\n - firecrawl_subpages\n exclude_paths:\n type: mixed\n value: '{{#rag.1756907397615.exclude_paths#}}'\n include_paths:\n type: mixed\n value: '{{#rag.1756907397615.include_paths#}}'\n limit:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_pages\n max_depth:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_depth\n only_main_content:\n type: variable\n value:\n - rag\n - '1756907397615'\n - main_content\n url:\n type: mixed\n value: '{{#rag.1756907397615.firecrawl_url1#}}'\n plugin_id: langgenius/firecrawl_datasource\n provider_name: firecrawl\n provider_type: website_crawl\n selected: false\n title: Firecrawl\n type: datasource\n height: 52\n id: '1756907397615'\n position:\n x: -1371.6520723158733\n y: 644.3296146102903\n positionAbsolute:\n x: -1371.6520723158733\n y: 644.3296146102903\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The text you want to chunk.\n ja_JP: The text you want to chunk.\n pt_BR: Conteúdo de Entrada\n zh_Hans: 输入文本\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conteúdo de Entrada\n zh_Hans: 输入文本\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em parágrafos com base no separador e no comprimento\n máximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuperá-lo.\n zh_Hans: 根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: 父块模式\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - icon: ''\n label:\n en_US: paragraph\n ja_JP: paragraph\n pt_BR: paragraph\n zh_Hans: paragraph\n value: paragraph\n - icon: ''\n label:\n en_US: full_doc\n ja_JP: full_doc\n pt_BR: full_doc\n zh_Hans: full_doc\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divisão\n zh_Hans: 用于分块的分隔符\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: 父块分隔符\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento máximo para divisão\n zh_Hans: 用于分块的最大长度\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento Máximo do Bloco Pai\n zh_Hans: 最大父块长度\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivisão\n zh_Hans: 用于子分块的分隔符\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivisão\n zh_Hans: 子分块分隔符\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento máximo para subdivisão\n zh_Hans: 用于子分块的最大长度\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento Máximo de Subdivisão\n zh_Hans: 子分块最大长度\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espaços extras no texto\n zh_Hans: 是否移除文本中的连续空格、换行符和制表符\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espaços consecutivos, novas linhas e guias\n zh_Hans: 替换连续空格、换行符和制表符\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: 是否移除文本中的URL和电子邮件地址\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: 删除所有URL和电子邮件地址\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius/parentchild_chunker/parentchild_chunker\n provider_name: langgenius/parentchild_chunker/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1753346901505.output#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - parent_length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - parent_mode\n remove_extra_spaces:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n remove_urls_emails:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n separator:\n type: mixed\n value: '{{#rag.shared.parent_dilmiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - child_length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.child_delimiter#}}'\n type: tool\n height: 52\n id: '1756972161593'\n position:\n x: 184.46657789772178\n y: 326\n positionAbsolute:\n x: 184.46657789772178\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 947.2141381290828\n y: 179.30600859363653\n zoom: 0.47414481289660987\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_reader_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_reader_imit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Crawl sub-pages\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: Crawl_sub_pages_2\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: Use_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_limit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl\n iteratively based on page relevance, yielding fewer but higher-quality pages.\n type: checkbox\n unit: null\n variable: jian_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Crawl subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: jina_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: firecrawl_url1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: firecrawl_subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: firecrawl_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: Exclude paths\n max_length: 256\n options: []\n placeholder: blog/*,/about/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: exclude_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: include_paths\n max_length: 256\n options: []\n placeholder: articles/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: include_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 0\n label: Max depth\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes\n the page of the entered url, depth 1 scrapes the url and everything after enteredURL\n + one /, and so on.\n type: number\n unit: null\n variable: max_depth\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: null\n variable: max_pages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: Extract only main content (no headers, navs, footers, etc.)\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: main_content\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: parent_mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: parent_dilmiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: parent_length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: child_delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: child_length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_2\n", + "graph": { + "edges": [ + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "variable-aggregator" + }, + "id": "1750836391776-source-1753346901505-target", + "selected": false, + "source": "1750836391776", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "document-extractor", + "targetType": "variable-aggregator" + }, + "id": "1753349228522-source-1753346901505-target", + "selected": false, + "source": "1753349228522", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1754023419266-source-1753346901505-target", + "selected": false, + "source": "1754023419266", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1756442998557-source-1756442986174-target", + "selected": false, + "source": "1756442998557", + "sourceHandle": "source", + "target": "1756442986174", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInIteration": false, + "isInLoop": false, + "sourceType": "variable-aggregator", + "targetType": "if-else" + }, + "id": "1756442986174-source-1756443014860-target", + "selected": false, + "source": "1756442986174", + "sourceHandle": "source", + "target": "1756443014860", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1750836380067-source-1756442986174-target", + "selected": false, + "source": "1750836380067", + "sourceHandle": "source", + "target": "1756442986174", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "if-else", + "targetType": "tool" + }, + "id": "1756443014860-true-1750836391776-target", + "selected": false, + "source": "1756443014860", + "sourceHandle": "true", + "target": "1750836391776", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "if-else", + "targetType": "document-extractor" + }, + "id": "1756443014860-false-1753349228522-target", + "selected": false, + "source": "1756443014860", + "sourceHandle": "false", + "target": "1753349228522", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1756896212061-source-1753346901505-target", + "source": "1756896212061", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1756907397615-source-1753346901505-target", + "source": "1756907397615", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInIteration": false, + "isInLoop": false, + "sourceType": "variable-aggregator", + "targetType": "tool" + }, + "id": "1753346901505-source-1756972161593-target", + "source": "1753346901505", + "sourceHandle": "source", + "target": "1756972161593", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "knowledge-index" + }, + "id": "1756972161593-source-1750836372241-target", + "source": "1756972161593", + "sourceHandle": "source", + "target": "1750836372241", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + } + ], + "nodes": [ + { + "data": { + "chunk_structure": "hierarchical_model", + "embedding_model": "jina-embeddings-v2-base-en", + "embedding_model_provider": "langgenius/jina/jina", + "index_chunk_variable_selector": [ + "1756972161593", + "result" + ], + "indexing_technique": "high_quality", + "keyword_number": 10, + "retrieval_model": { + "reranking_enable": true, + "reranking_mode": "reranking_model", + "reranking_model": { + "reranking_model_name": "jina-reranker-v1-base-en", + "reranking_provider_name": "langgenius/jina/jina" + }, + "score_threshold": 0, + "score_threshold_enabled": false, + "search_method": "hybrid_search", + "top_k": 3, + "weights": null + }, + "selected": false, + "title": "Knowledge Base", + "type": "knowledge-index" + }, + "height": 114, + "id": "1750836372241", + "position": { + "x": 479.7628208876065, + "y": 326 + }, + "positionAbsolute": { + "x": 479.7628208876065, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "File", + "datasource_name": "upload-file", + "datasource_parameters": {}, + "fileExtensions": [ + "txt", + "markdown", + "mdx", + "pdf", + "html", + "xlsx", + "xls", + "vtt", + "properties", + "doc", + "docx", + "csv", + "eml", + "msg", + "pptx", + "xml", + "epub", + "ppt", + "md" + ], + "plugin_id": "langgenius/file", + "provider_name": "file", + "provider_type": "local_file", + "selected": false, + "title": "File", + "type": "datasource" + }, + "height": 52, + "id": "1750836380067", + "position": { + "x": -1371.6520723158733, + "y": 224.87938381325645 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": 224.87938381325645 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "is_team_authorization": true, + "output_schema": { + "properties": { + "documents": { + "description": "the documents extracted from the file", + "items": { + "type": "object" + }, + "type": "array" + }, + "images": { + "description": "The images extracted from the file", + "items": { + "type": "object" + }, + "type": "array" + } + }, + "type": "object" + }, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "zh_Hans": "用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)" + }, + "label": { + "en_US": "file", + "ja_JP": "file", + "pt_BR": "file", + "zh_Hans": "file" + }, + "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "max": null, + "min": null, + "name": "file", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "file" + } + ], + "params": { + "file": "" + }, + "provider_id": "langgenius/dify_extractor/dify_extractor", + "provider_name": "langgenius/dify_extractor/dify_extractor", + "provider_type": "builtin", + "selected": false, + "title": "Dify Extractor", + "tool_configurations": {}, + "tool_description": "Dify Extractor", + "tool_label": "Dify Extractor", + "tool_name": "dify_extractor", + "tool_node_version": "2", + "tool_parameters": { + "file": { + "type": "variable", + "value": [ + "1756442986174", + "output" + ] + } + }, + "type": "tool" + }, + "height": 52, + "id": "1750836391776", + "position": { + "x": -417.5334221022782, + "y": 268.1692071834485 + }, + "positionAbsolute": { + "x": -417.5334221022782, + "y": 268.1692071834485 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 252, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 1124 + }, + "height": 252, + "id": "1751252161631", + "position": { + "x": -1371.6520723158733, + "y": -123.758428116601 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": -123.758428116601 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 1124 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 388, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 285 + }, + "height": 388, + "id": "1751252440357", + "position": { + "x": -1723.9942193415582, + "y": 224.87938381325645 + }, + "positionAbsolute": { + "x": -1723.9942193415582, + "y": 224.87938381325645 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 285 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 430, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor in Retrieval-Augmented Generation (RAG) is a tool or component that automatically identifies, extracts, and structures text and data from various types of documents—such as PDFs, images, scanned files, handwritten notes, and more—into a format that can be effectively used by language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is a built-in document parser developed by Dify. It supports a wide range of common file formats and offers specialized handling for certain formats, such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\". In addition to text extraction, it can extract images embedded within documents, store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 430, + "id": "1751253091602", + "position": { + "x": -417.5334221022782, + "y": 547.4103414077279 + }, + "positionAbsolute": { + "x": -417.5334221022782, + "y": 547.4103414077279 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 638, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" addresses the dilemma of context and precision by leveraging a two-tier hierarchical approach that effectively balances the trade-off between accurate matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Query Matching with Child Chunks: Small, focused pieces of information, often as concise as a single sentence within a paragraph, are used to match the user's query. These child chunks enable precise and relevant initial retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Contextual Enrichment with Parent Chunks: Larger, encompassing sections—such as a paragraph, a section, or even an entire document—that include the matched child chunks are then retrieved. These parent chunks provide comprehensive context for the Language Model (LLM). length, and overlap—to fit different document formats or scenarios. Preprocessing options are also available to clean up the text by removing excess spaces, URLs, and emails.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 638, + "id": "1751253953926", + "position": { + "x": 184.46657789772178, + "y": 407.42301051148354 + }, + "positionAbsolute": { + "x": 184.46657789772178, + "y": 407.42301051148354 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 410, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 410, + "id": "1751254117904", + "position": { + "x": 479.7628208876065, + "y": 472.46585541244207 + }, + "positionAbsolute": { + "x": 479.7628208876065, + "y": 472.46585541244207 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "output_type": "string", + "selected": false, + "title": "Variable Aggregator", + "type": "variable-aggregator", + "variables": [ + [ + "1750836391776", + "text" + ], + [ + "1753349228522", + "text" + ], + [ + "1754023419266", + "content" + ], + [ + "1756896212061", + "content" + ], + [ + "1756907397615", + "content" + ] + ] + }, + "height": 213, + "id": "1753346901505", + "position": { + "x": -117.24452412456148, + "y": 326 + }, + "positionAbsolute": { + "x": -117.24452412456148, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "is_array_file": false, + "selected": false, + "title": "Doc Extractor", + "type": "document-extractor", + "variable_selector": [ + "1756442986174", + "output" + ] + }, + "height": 92, + "id": "1753349228522", + "position": { + "x": -417.5334221022782, + "y": 417.25474169825833 + }, + "positionAbsolute": { + "x": -417.5334221022782, + "y": 417.25474169825833 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "Notion", + "datasource_name": "notion_datasource", + "datasource_parameters": {}, + "plugin_id": "langgenius/notion_datasource", + "provider_name": "notion_datasource", + "provider_type": "online_document", + "selected": false, + "title": "Notion", + "type": "datasource" + }, + "height": 52, + "id": "1754023419266", + "position": { + "x": -1369.6904698303242, + "y": 440.01452302398053 + }, + "positionAbsolute": { + "x": -1369.6904698303242, + "y": 440.01452302398053 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "output_type": "file", + "selected": false, + "title": "Variable Aggregator", + "type": "variable-aggregator", + "variables": [ + [ + "1750836380067", + "file" + ], + [ + "1756442998557", + "file" + ] + ] + }, + "height": 135, + "id": "1756442986174", + "position": { + "x": -1054.415447856335, + "y": 236.10252072775984 + }, + "positionAbsolute": { + "x": -1054.415447856335, + "y": 236.10252072775984 + }, + "selected": true, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "Google Drive", + "datasource_name": "google_drive", + "datasource_parameters": {}, + "plugin_id": "langgenius/google_drive", + "provider_name": "google_drive", + "provider_type": "online_drive", + "selected": false, + "title": "Google Drive", + "type": "datasource" + }, + "height": 52, + "id": "1756442998557", + "position": { + "x": -1371.6520723158733, + "y": 326 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "cases": [ + { + "case_id": "true", + "conditions": [ + { + "comparison_operator": "is", + "id": "1581dd11-7898-41f4-962f-937283ba7e01", + "value": ".xlsx", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "92abb46d-d7e4-46e7-a5e1-8a29bb45d528", + "value": ".xls", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "1dde5ae7-754d-4e83-96b2-fe1f02995d8b", + "value": ".md", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "7e1a80e5-c32a-46a4-8f92-8912c64972aa", + "value": ".markdown", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "53abfe95-c7d0-4f63-ad37-17d425d25106", + "value": ".mdx", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "436877b8-8c0a-4cc6-9565-92754db08571", + "value": ".html", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "5e3e375e-750b-4204-8ac3-9a1174a5ab7c", + "value": ".htm", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "1a84a784-a797-4f96-98a0-33a9b48ceb2b", + "value": ".docx", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "62d11445-876a-493f-85d3-8fc020146bdd", + "value": ".csv", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "02c4bce8-7668-4ccd-b750-4281f314b231", + "value": ".txt", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + } + ], + "id": "true", + "logical_operator": "or" + } + ], + "selected": false, + "title": "IF/ELSE", + "type": "if-else" + }, + "height": 358, + "id": "1756443014860", + "position": { + "x": -733.5977815139424, + "y": 236.10252072775984 + }, + "positionAbsolute": { + "x": -733.5977815139424, + "y": 236.10252072775984 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "Jina Reader", + "datasource_name": "jina_reader", + "datasource_parameters": { + "crawl_sub_pages": { + "type": "variable", + "value": [ + "rag", + "1756896212061", + "jina_subpages" + ] + }, + "limit": { + "type": "variable", + "value": [ + "rag", + "1756896212061", + "jina_limit" + ] + }, + "url": { + "type": "mixed", + "value": "{{#rag.1756896212061.jina_url#}}" + }, + "use_sitemap": { + "type": "variable", + "value": [ + "rag", + "1756896212061", + "jian_sitemap" + ] + } + }, + "plugin_id": "langgenius/jina_datasource", + "provider_name": "jinareader", + "provider_type": "website_crawl", + "selected": false, + "title": "Jina Reader", + "type": "datasource" + }, + "height": 52, + "id": "1756896212061", + "position": { + "x": -1371.6520723158733, + "y": 538.9988445953813 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": 538.9988445953813 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "Firecrawl", + "datasource_name": "crawl", + "datasource_parameters": { + "crawl_subpages": { + "type": "variable", + "value": [ + "rag", + "1756907397615", + "firecrawl_subpages" + ] + }, + "exclude_paths": { + "type": "mixed", + "value": "{{#rag.1756907397615.exclude_paths#}}" + }, + "include_paths": { + "type": "mixed", + "value": "{{#rag.1756907397615.include_paths#}}" + }, + "limit": { + "type": "variable", + "value": [ + "rag", + "1756907397615", + "max_pages" + ] + }, + "max_depth": { + "type": "variable", + "value": [ + "rag", + "1756907397615", + "max_depth" + ] + }, + "only_main_content": { + "type": "variable", + "value": [ + "rag", + "1756907397615", + "main_content" + ] + }, + "url": { + "type": "mixed", + "value": "{{#rag.1756907397615.firecrawl_url1#}}" + } + }, + "plugin_id": "langgenius/firecrawl_datasource", + "provider_name": "firecrawl", + "provider_type": "website_crawl", + "selected": false, + "title": "Firecrawl", + "type": "datasource" + }, + "height": 52, + "id": "1756907397615", + "position": { + "x": -1371.6520723158733, + "y": 644.3296146102903 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": 644.3296146102903 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "is_team_authorization": true, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "The text you want to chunk.", + "ja_JP": "The text you want to chunk.", + "pt_BR": "Conteúdo de Entrada", + "zh_Hans": "输入文本" + }, + "label": { + "en_US": "Input Content", + "ja_JP": "Input Content", + "pt_BR": "Conteúdo de Entrada", + "zh_Hans": "输入文本" + }, + "llm_description": "The text you want to chunk.", + "max": null, + "min": null, + "name": "input_text", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": "paragraph", + "form": "llm", + "human_description": { + "en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", + "ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", + "pt_BR": "Dividir texto em parágrafos com base no separador e no comprimento máximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuperá-lo.", + "zh_Hans": "根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。" + }, + "label": { + "en_US": "Parent Mode", + "ja_JP": "Parent Mode", + "pt_BR": "Modo Pai", + "zh_Hans": "父块模式" + }, + "llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", + "max": null, + "min": null, + "name": "parent_mode", + "options": [ + { + "icon": "", + "label": { + "en_US": "paragraph", + "ja_JP": "paragraph", + "pt_BR": "paragraph", + "zh_Hans": "paragraph" + }, + "value": "paragraph" + }, + { + "icon": "", + "label": { + "en_US": "full_doc", + "ja_JP": "full_doc", + "pt_BR": "full_doc", + "zh_Hans": "full_doc" + }, + "value": "full_doc" + } + ], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "select" + }, + { + "auto_generate": null, + "default": "\n\n", + "form": "llm", + "human_description": { + "en_US": "Separator used for chunking", + "ja_JP": "Separator used for chunking", + "pt_BR": "Separador usado para divisão", + "zh_Hans": "用于分块的分隔符" + }, + "label": { + "en_US": "Parent Delimiter", + "ja_JP": "Parent Delimiter", + "pt_BR": "Separador de Pai", + "zh_Hans": "父块分隔符" + }, + "llm_description": "The separator used to split chunks", + "max": null, + "min": null, + "name": "separator", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": 1024, + "form": "llm", + "human_description": { + "en_US": "Maximum length for chunking", + "ja_JP": "Maximum length for chunking", + "pt_BR": "Comprimento máximo para divisão", + "zh_Hans": "用于分块的最大长度" + }, + "label": { + "en_US": "Maximum Parent Chunk Length", + "ja_JP": "Maximum Parent Chunk Length", + "pt_BR": "Comprimento Máximo do Bloco Pai", + "zh_Hans": "最大父块长度" + }, + "llm_description": "Maximum length allowed per chunk", + "max": null, + "min": null, + "name": "max_length", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "number" + }, + { + "auto_generate": null, + "default": ". ", + "form": "llm", + "human_description": { + "en_US": "Separator used for subchunking", + "ja_JP": "Separator used for subchunking", + "pt_BR": "Separador usado para subdivisão", + "zh_Hans": "用于子分块的分隔符" + }, + "label": { + "en_US": "Child Delimiter", + "ja_JP": "Child Delimiter", + "pt_BR": "Separador de Subdivisão", + "zh_Hans": "子分块分隔符" + }, + "llm_description": "The separator used to split subchunks", + "max": null, + "min": null, + "name": "subchunk_separator", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": 512, + "form": "llm", + "human_description": { + "en_US": "Maximum length for subchunking", + "ja_JP": "Maximum length for subchunking", + "pt_BR": "Comprimento máximo para subdivisão", + "zh_Hans": "用于子分块的最大长度" + }, + "label": { + "en_US": "Maximum Child Chunk Length", + "ja_JP": "Maximum Child Chunk Length", + "pt_BR": "Comprimento Máximo de Subdivisão", + "zh_Hans": "子分块最大长度" + }, + "llm_description": "Maximum length allowed per subchunk", + "max": null, + "min": null, + "name": "subchunk_max_length", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "number" + }, + { + "auto_generate": null, + "default": 0, + "form": "llm", + "human_description": { + "en_US": "Whether to remove consecutive spaces, newlines and tabs", + "ja_JP": "Whether to remove consecutive spaces, newlines and tabs", + "pt_BR": "Se deve remover espaços extras no texto", + "zh_Hans": "是否移除文本中的连续空格、换行符和制表符" + }, + "label": { + "en_US": "Replace consecutive spaces, newlines and tabs", + "ja_JP": "Replace consecutive spaces, newlines and tabs", + "pt_BR": "Substituir espaços consecutivos, novas linhas e guias", + "zh_Hans": "替换连续空格、换行符和制表符" + }, + "llm_description": "Whether to remove consecutive spaces, newlines and tabs", + "max": null, + "min": null, + "name": "remove_extra_spaces", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + }, + { + "auto_generate": null, + "default": 0, + "form": "llm", + "human_description": { + "en_US": "Whether to remove URLs and emails in the text", + "ja_JP": "Whether to remove URLs and emails in the text", + "pt_BR": "Se deve remover URLs e e-mails no texto", + "zh_Hans": "是否移除文本中的URL和电子邮件地址" + }, + "label": { + "en_US": "Delete all URLs and email addresses", + "ja_JP": "Delete all URLs and email addresses", + "pt_BR": "Remover todas as URLs e e-mails", + "zh_Hans": "删除所有URL和电子邮件地址" + }, + "llm_description": "Whether to remove URLs and emails in the text", + "max": null, + "min": null, + "name": "remove_urls_emails", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + } + ], + "params": { + "input_text": "", + "max_length": "", + "parent_mode": "", + "remove_extra_spaces": "", + "remove_urls_emails": "", + "separator": "", + "subchunk_max_length": "", + "subchunk_separator": "" + }, + "provider_id": "langgenius/parentchild_chunker/parentchild_chunker", + "provider_name": "langgenius/parentchild_chunker/parentchild_chunker", + "provider_type": "builtin", + "selected": false, + "title": "Parent-child Chunker", + "tool_configurations": {}, + "tool_description": "Process documents into parent-child chunk structures", + "tool_label": "Parent-child Chunker", + "tool_name": "parentchild_chunker", + "tool_node_version": "2", + "tool_parameters": { + "input_text": { + "type": "mixed", + "value": "{{#1753346901505.output#}}" + }, + "max_length": { + "type": "variable", + "value": [ + "rag", + "shared", + "parent_length" + ] + }, + "parent_mode": { + "type": "variable", + "value": [ + "rag", + "shared", + "parent_mode" + ] + }, + "remove_extra_spaces": { + "type": "variable", + "value": [ + "rag", + "shared", + "clean_1" + ] + }, + "remove_urls_emails": { + "type": "variable", + "value": [ + "rag", + "shared", + "clean_2" + ] + }, + "separator": { + "type": "mixed", + "value": "{{#rag.shared.parent_dilmiter#}}" + }, + "subchunk_max_length": { + "type": "variable", + "value": [ + "rag", + "shared", + "child_length" + ] + }, + "subchunk_separator": { + "type": "mixed", + "value": "{{#rag.shared.child_delimiter#}}" + } + }, + "type": "tool" + }, + "height": 52, + "id": "1756972161593", + "position": { + "x": 184.46657789772178, + "y": 326 + }, + "positionAbsolute": { + "x": 184.46657789772178, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + } + ], + "viewport": { + "x": 947.2141381290828, + "y": 179.30600859363653, + "zoom": 0.47414481289660987 + } + }, + "icon_info": { + "icon": "ab8da246-37ba-4bbb-9b24-e7bda0778005", + "icon_background": null, + "icon_type": "image", + "icon_url": "" + }, + "id": "9553b1e0-0c26-445b-9e18-063ad7eca0b4", + "name": "Parent-child-HQ", + "icon": { + "icon": "ab8da246-37ba-4bbb-9b24-e7bda0778005", + "icon_background": null, + "icon_type": "image", + "icon_url": "" + }, + "language": "zh-Hans", + "position": 2 + }, + "9ef3e66a-11c7-4227-897c-3b0f9a42da1a": { + "chunk_structure": "qa_model", + "description": "This template generates structured Q&A pairs by extracting selected columns from a table. These pairs are indexed by questions, enabling efficient retrieval of relevant answers based on query similarity.", + "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/qa_chunk:0.0.8@1fed9644646bdd48792cdf5a1d559a3df336bd3a8edb0807227499fb56dce3af\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n version: null\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 769900fc-8a31-4584-94f6-f227357c00c8\n icon_background: null\n icon_type: image\n icon_url: \n name: Simple Q&A\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1750836380067-source-1753253430271-target\n source: '1750836380067'\n sourceHandle: source\n target: '1753253430271'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1753253430271-source-1750836372241-target\n source: '1753253430271'\n sourceHandle: source\n target: '1750836372241'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: qa_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius/jina/jina\n index_chunk_variable_selector:\n - '1753253430271'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: false\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: null\n reranking_provider_name: null\n score_threshold: 0\n score_threshold_enabled: false\n search_method: semantic_search\n top_k: 3\n weights: null\n selected: true\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750836372241'\n position:\n x: 160\n y: 326\n positionAbsolute:\n x: 160\n y: 326\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - csv\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750836380067'\n position:\n x: -714.4192784522008\n y: 326\n positionAbsolute:\n x: -714.4192784522008\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 249\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1115\n height: 249\n id: '1751252161631'\n position:\n x: -714.4192784522008\n y: -19.94142868660783\n positionAbsolute:\n x: -714.4192784522008\n y: -19.94142868660783\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1115\n - data:\n author: TenTen\n desc: ''\n height: 281\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 415\n height: 281\n id: '1751252440357'\n position:\n x: -1206.996048993409\n y: 311.5998178583933\n positionAbsolute:\n x: -1206.996048993409\n y: 311.5998178583933\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 415\n - data:\n author: TenTen\n desc: ''\n height: 403\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only\n support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 403\n id: '1751254117904'\n position:\n x: 160\n y: 471.1516409864865\n positionAbsolute:\n x: 160\n y: 471.1516409864865\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 341\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Processor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" extracts\n specified columns from tables to generate structured Q&A pairs. Users can\n independently designate which columns to use for questions and which for\n answers.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"These\n pairs are indexed by the question field, so user queries are matched directly\n against the questions to retrieve the corresponding answers. This \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q-to-Q\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" matching\n strategy improves clarity and precision, especially in scenarios involving\n high-frequency or highly similar user questions.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 341\n id: '1751356019653'\n position:\n x: -282.74494795239\n y: 411.6979750489463\n positionAbsolute:\n x: -282.74494795239\n y: 411.6979750489463\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: The result of the general chunk tool.\n properties:\n qa_chunks:\n items:\n description: The QA chunk.\n properties:\n answer:\n description: The answer of the QA chunk.\n type: string\n question:\n description: The question of the QA chunk.\n type: string\n type: object\n type: array\n type: object\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The file you want to extract QA from.\n ja_JP: The file you want to extract QA from.\n pt_BR: The file you want to extract QA from.\n zh_Hans: 你想要提取 QA 的文件。\n label:\n en_US: Input File\n ja_JP: Input File\n pt_BR: Input File\n zh_Hans: 输入文件\n llm_description: The file you want to extract QA from.\n max: null\n min: null\n name: input_file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Column number for question.\n ja_JP: Column number for question.\n pt_BR: Column number for question.\n zh_Hans: 问题所在的列。\n label:\n en_US: Column number for question\n ja_JP: Column number for question\n pt_BR: Column number for question\n zh_Hans: 问题所在的列\n llm_description: The column number for question, the format of the column\n number must be an integer.\n max: null\n min: null\n name: question_column\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 1\n form: llm\n human_description:\n en_US: Column number for answer.\n ja_JP: Column number for answer.\n pt_BR: Column number for answer.\n zh_Hans: 答案所在的列。\n label:\n en_US: Column number for answer\n ja_JP: Column number for answer\n pt_BR: Column number for answer\n zh_Hans: 答案所在的列\n llm_description: The column number for answer, the format of the column\n number must be an integer.\n max: null\n min: null\n name: answer_column\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n params:\n answer_column: ''\n input_file: ''\n question_column: ''\n provider_id: langgenius/qa_chunk/qa_chunk\n provider_name: langgenius/qa_chunk/qa_chunk\n provider_type: builtin\n selected: false\n title: Q&A PROCESSOR\n tool_configurations: {}\n tool_description: A tool for QA chunking mode.\n tool_label: QA Chunk\n tool_name: qa_chunk\n tool_node_version: '2'\n tool_parameters:\n answer_column:\n type: variable\n value:\n - rag\n - shared\n - Column_Number_for_Answers\n input_file:\n type: variable\n value:\n - '1750836380067'\n - file\n question_column:\n type: variable\n value:\n - rag\n - shared\n - Column_Number_for_Questions\n type: tool\n height: 52\n id: '1753253430271'\n position:\n x: -282.74494795239\n y: 326\n positionAbsolute:\n x: -282.74494795239\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 173\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Simple\n Q&A Template\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" requires\n a pre-prepared table of question-answer pairs. As a result, it only supports\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"File\n Upload\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" data\n source, accepting \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\"csv\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" file\n formats.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 173\n id: '1753411065636'\n position:\n x: -714.4192784522008\n y: 411.6979750489463\n positionAbsolute:\n x: -714.4192784522008\n y: 411.6979750489463\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n viewport:\n x: 698.8920691163195\n y: 311.46417000656925\n zoom: 0.41853867943092266\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1\n label: Column Number for Questions\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: Specify a column in the table as Questions. The number of first column is\n 0.\n type: number\n unit: ''\n variable: Column_Number_for_Questions\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 2\n label: Column Number for Answers\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: Specify a column in the table as Answers. The number of first column is\n 0.\n type: number\n unit: null\n variable: Column_Number_for_Answers\n", + "graph": { + "edges": [ + { + "data": { + "isInIteration": false, + "isInLoop": false, + "sourceType": "datasource", + "targetType": "tool" + }, + "id": "1750836380067-source-1753253430271-target", + "source": "1750836380067", + "sourceHandle": "source", + "target": "1753253430271", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "knowledge-index" + }, + "id": "1753253430271-source-1750836372241-target", + "source": "1753253430271", + "sourceHandle": "source", + "target": "1750836372241", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + } + ], + "nodes": [ + { + "data": { + "chunk_structure": "qa_model", + "embedding_model": "jina-embeddings-v2-base-en", + "embedding_model_provider": "langgenius/jina/jina", + "index_chunk_variable_selector": [ + "1753253430271", + "result" + ], + "indexing_technique": "high_quality", + "keyword_number": 10, + "retrieval_model": { + "reranking_enable": false, + "reranking_mode": "reranking_model", + "reranking_model": { + "reranking_model_name": null, + "reranking_provider_name": null + }, + "score_threshold": 0, + "score_threshold_enabled": false, + "search_method": "semantic_search", + "top_k": 3, + "weights": null + }, + "selected": true, + "title": "Knowledge Base", + "type": "knowledge-index" + }, + "height": 114, + "id": "1750836372241", + "position": { + "x": 160, + "y": 326 + }, + "positionAbsolute": { + "x": 160, + "y": 326 + }, + "selected": true, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "File", + "datasource_name": "upload-file", + "datasource_parameters": {}, + "fileExtensions": [ + "csv" + ], + "plugin_id": "langgenius/file", + "provider_name": "file", + "provider_type": "local_file", + "selected": false, + "title": "File", + "type": "datasource" + }, + "height": 52, + "id": "1750836380067", + "position": { + "x": -714.4192784522008, + "y": 326 + }, + "positionAbsolute": { + "x": -714.4192784522008, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 249, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 1115 + }, + "height": 249, + "id": "1751252161631", + "position": { + "x": -714.4192784522008, + "y": -19.94142868660783 + }, + "positionAbsolute": { + "x": -714.4192784522008, + "y": -19.94142868660783 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 1115 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 281, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 415 + }, + "height": 281, + "id": "1751252440357", + "position": { + "x": -1206.996048993409, + "y": 311.5998178583933 + }, + "positionAbsolute": { + "x": -1206.996048993409, + "y": 311.5998178583933 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 415 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 403, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 403, + "id": "1751254117904", + "position": { + "x": 160, + "y": 471.1516409864865 + }, + "positionAbsolute": { + "x": 160, + "y": 471.1516409864865 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 341, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Processor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" extracts specified columns from tables to generate structured Q&A pairs. Users can independently designate which columns to use for questions and which for answers.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"These pairs are indexed by the question field, so user queries are matched directly against the questions to retrieve the corresponding answers. This \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q-to-Q\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" matching strategy improves clarity and precision, especially in scenarios involving high-frequency or highly similar user questions.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 341, + "id": "1751356019653", + "position": { + "x": -282.74494795239, + "y": 411.6979750489463 + }, + "positionAbsolute": { + "x": -282.74494795239, + "y": 411.6979750489463 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "is_team_authorization": true, + "output_schema": { + "properties": { + "result": { + "description": "The result of the general chunk tool.", + "properties": { + "qa_chunks": { + "items": { + "description": "The QA chunk.", + "properties": { + "answer": { + "description": "The answer of the QA chunk.", + "type": "string" + }, + "question": { + "description": "The question of the QA chunk.", + "type": "string" + } + }, + "type": "object" + }, + "type": "array" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "The file you want to extract QA from.", + "ja_JP": "The file you want to extract QA from.", + "pt_BR": "The file you want to extract QA from.", + "zh_Hans": "你想要提取 QA 的文件。" + }, + "label": { + "en_US": "Input File", + "ja_JP": "Input File", + "pt_BR": "Input File", + "zh_Hans": "输入文件" + }, + "llm_description": "The file you want to extract QA from.", + "max": null, + "min": null, + "name": "input_file", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "file" + }, + { + "auto_generate": null, + "default": 0, + "form": "llm", + "human_description": { + "en_US": "Column number for question.", + "ja_JP": "Column number for question.", + "pt_BR": "Column number for question.", + "zh_Hans": "问题所在的列。" + }, + "label": { + "en_US": "Column number for question", + "ja_JP": "Column number for question", + "pt_BR": "Column number for question", + "zh_Hans": "问题所在的列" + }, + "llm_description": "The column number for question, the format of the column number must be an integer.", + "max": null, + "min": null, + "name": "question_column", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "number" + }, + { + "auto_generate": null, + "default": 1, + "form": "llm", + "human_description": { + "en_US": "Column number for answer.", + "ja_JP": "Column number for answer.", + "pt_BR": "Column number for answer.", + "zh_Hans": "答案所在的列。" + }, + "label": { + "en_US": "Column number for answer", + "ja_JP": "Column number for answer", + "pt_BR": "Column number for answer", + "zh_Hans": "答案所在的列" + }, + "llm_description": "The column number for answer, the format of the column number must be an integer.", + "max": null, + "min": null, + "name": "answer_column", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "number" + } + ], + "params": { + "answer_column": "", + "input_file": "", + "question_column": "" + }, + "provider_id": "langgenius/qa_chunk/qa_chunk", + "provider_name": "langgenius/qa_chunk/qa_chunk", + "provider_type": "builtin", + "selected": false, + "title": "Q&A PROCESSOR", + "tool_configurations": {}, + "tool_description": "A tool for QA chunking mode.", + "tool_label": "QA Chunk", + "tool_name": "qa_chunk", + "tool_node_version": "2", + "tool_parameters": { + "answer_column": { + "type": "variable", + "value": [ + "rag", + "shared", + "Column_Number_for_Answers" + ] + }, + "input_file": { + "type": "variable", + "value": [ + "1750836380067", + "file" + ] + }, + "question_column": { + "type": "variable", + "value": [ + "rag", + "shared", + "Column_Number_for_Questions" + ] + } + }, + "type": "tool" + }, + "height": 52, + "id": "1753253430271", + "position": { + "x": -282.74494795239, + "y": 326 + }, + "positionAbsolute": { + "x": -282.74494795239, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 173, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Simple Q&A Template\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" requires a pre-prepared table of question-answer pairs. As a result, it only supports \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"File Upload\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" data source, accepting \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\"csv\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" file formats.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 173, + "id": "1753411065636", + "position": { + "x": -714.4192784522008, + "y": 411.6979750489463 + }, + "positionAbsolute": { + "x": -714.4192784522008, + "y": 411.6979750489463 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + } + ], + "viewport": { + "x": 698.8920691163195, + "y": 311.46417000656925, + "zoom": 0.41853867943092266 + } + }, + "icon_info": { + "icon": "ae0993dc-ff90-48ac-9e35-c31ebae5124b", + "icon_background": null, + "icon_type": "image", + "icon_url": "" + }, + "id": "9ef3e66a-11c7-4227-897c-3b0f9a42da1a", + "name": "Simple Q&A", + "icon": { + "icon": "ae0993dc-ff90-48ac-9e35-c31ebae5124b", + "icon_background": null, + "icon_type": "image", + "icon_url": "" + }, + "language": "zh-Hans", + "position": 3 + }, + "982d1788-837a-40c8-b7de-d37b09a9b2bc": { + "chunk_structure": "hierarchical_model", + "description": "This template is designed for converting native Office files such as DOCX, XLSX, and PPTX into Markdown to facilitate better information processing. PDF files are not recommended.", + "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: yevanchen/markitdown:0.0.4@776b3e2e930e2ffd28a75bb20fecbe7a020849cf754f86e604acacf1258877f6\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 9d658c3a-b22f-487d-8223-db51e9012505\n icon_background: null\n icon_type: image\n icon_url: \n name: Convert to Markdown\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751336942081-source-1750400198569-target\n selected: false\n source: '1751336942081'\n sourceHandle: source\n target: '1750400198569'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1750400203722-source-1751359716720-target\n selected: false\n source: '1750400203722'\n sourceHandle: source\n target: '1751359716720'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: tool\n id: 1751359716720-source-1751336942081-target\n source: '1751359716720'\n sourceHandle: source\n target: '1751336942081'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius/jina/jina\n index_chunk_variable_selector:\n - '1751336942081'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n hybridSearchMode: weighted_score\n score_threshold: 0.5\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n vector_setting:\n embedding_model_name: jina-embeddings-v2-base-en\n embedding_provider_name: langgenius/jina/jina\n selected: true\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750400198569'\n position:\n x: 357.7591396590142\n y: 282\n positionAbsolute:\n x: 357.7591396590142\n y: 282\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - html\n - xlsx\n - xls\n - doc\n - docx\n - csv\n - pptx\n - xml\n - ppt\n - txt\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750400203722'\n position:\n x: -580.684520226929\n y: 282\n positionAbsolute:\n x: -580.684520226929\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 316\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 374\n height: 316\n id: '1751264451381'\n position:\n x: -1034.2054006208518\n y: 282\n positionAbsolute:\n x: -1034.2054006208518\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 374\n - data:\n author: TenTen\n desc: ''\n height: 260\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1182\n height: 260\n id: '1751266376760'\n position:\n x: -580.684520226929\n y: -21.891401375096322\n positionAbsolute:\n x: -580.684520226929\n y: -21.891401375096322\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1182\n - data:\n author: TenTen\n desc: ''\n height: 417\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor in Retrieval-Augmented Generation (RAG) is a tool or\n component that automatically identifies, extracts, and structures text and\n data from various types of documents—such as PDFs, images, scanned files,\n handwritten notes, and more—into a format that can be effectively used by\n language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Markitdown\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n serves as an excellent alternative to traditional document extraction nodes,\n offering robust file conversion capabilities within the Dify ecosystem.\n It leverages MarkItDown''s plugin-based architecture to provide seamless\n conversion of multiple file formats to Markdown.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 241\n height: 417\n id: '1751266402561'\n position:\n x: -266.96080929383595\n y: 372.64040589639495\n positionAbsolute:\n x: -266.96080929383595\n y: 372.64040589639495\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 241\n - data:\n author: TenTen\n desc: ''\n height: 554\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections—such\n as a paragraph, a section, or even an entire document—that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 554\n id: '1751266447821'\n position:\n x: 37.74090119950054\n y: 372.64040589639495\n positionAbsolute:\n x: 37.74090119950054\n y: 372.64040589639495\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 411\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only\n support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 411\n id: '1751266580099'\n position:\n x: 357.7591396590142\n y: 434.3959856026883\n positionAbsolute:\n x: 357.7591396590142\n y: 434.3959856026883\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: Parent child chunks result\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: ''\n ja_JP: ''\n pt_BR: ''\n zh_Hans: ''\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conteúdo de Entrada\n zh_Hans: 输入文本\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em parágrafos com base no separador e no comprimento\n máximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuperá-lo.\n zh_Hans: 根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: 父块模式\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - label:\n en_US: Paragraph\n ja_JP: Paragraph\n pt_BR: Parágrafo\n zh_Hans: 段落\n value: paragraph\n - label:\n en_US: Full Document\n ja_JP: Full Document\n pt_BR: Documento Completo\n zh_Hans: 全文\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divisão\n zh_Hans: 用于分块的分隔符\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: 父块分隔符\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento máximo para divisão\n zh_Hans: 用于分块的最大长度\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento Máximo do Bloco Pai\n zh_Hans: 最大父块长度\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivisão\n zh_Hans: 用于子分块的分隔符\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivisão\n zh_Hans: 子分块分隔符\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento máximo para subdivisão\n zh_Hans: 用于子分块的最大长度\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento Máximo de Subdivisão\n zh_Hans: 子分块最大长度\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espaços extras no texto\n zh_Hans: 是否移除文本中的连续空格、换行符和制表符\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espaços consecutivos, novas linhas e guias\n zh_Hans: 替换连续空格、换行符和制表符\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: 是否移除文本中的URL和电子邮件地址\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: 删除所有URL和电子邮件地址\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius/parentchild_chunker/parentchild_chunker\n provider_name: langgenius/parentchild_chunker/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1751359716720.text#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Parent_Length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - Parent_Mode\n separator:\n type: mixed\n value: '{{#rag.shared.Parent_Delimiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Child_Length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.Child_Delimiter#}}'\n type: tool\n height: 52\n id: '1751336942081'\n position:\n x: 37.74090119950054\n y: 282\n positionAbsolute:\n x: 37.74090119950054\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema: null\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Upload files for processing\n ja_JP: Upload files for processing\n pt_BR: Carregar arquivos para processamento\n zh_Hans: 上传文件进行处理\n label:\n en_US: Files\n ja_JP: Files\n pt_BR: Arquivos\n zh_Hans: 文件\n llm_description: ''\n max: null\n min: null\n name: files\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: files\n params:\n files: ''\n provider_id: yevanchen/markitdown/markitdown\n provider_name: yevanchen/markitdown/markitdown\n provider_type: builtin\n selected: false\n title: markitdown\n tool_configurations: {}\n tool_description: Python tool for converting files and office documents to\n Markdown.\n tool_label: markitdown\n tool_name: markitdown\n tool_node_version: '2'\n tool_parameters:\n files:\n type: variable\n value:\n - '1750400203722'\n - file\n type: tool\n height: 52\n id: '1751359716720'\n position:\n x: -266.96080929383595\n y: 282\n positionAbsolute:\n x: -266.96080929383595\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 301\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MarkItDown\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is\n recommended for converting and handling a wide range of file formats, particularly\n for transforming content into Markdown. It works especially well for converting\n native Office files—such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"DOCX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"XLSX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"PPTX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"—into\n Markdown to facilitate better information processing. However, as some users\n have noted its suboptimal performance in extracting content from PDF files,\n using it for PDFs is not recommended.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 301\n id: '1753425718313'\n position:\n x: -580.684520226929\n y: 372.64040589639495\n positionAbsolute:\n x: -580.684520226929\n y: 372.64040589639495\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n viewport:\n x: 747.6785299994758\n y: 94.6209873206409\n zoom: 0.8152773235379324\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: Parent_Mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Parent_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Parent_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Child_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Child_Length\n", + "graph": { + "edges": [ + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "knowledge-index" + }, + "id": "1751336942081-source-1750400198569-target", + "selected": false, + "source": "1751336942081", + "sourceHandle": "source", + "target": "1750400198569", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "tool" + }, + "id": "1750400203722-source-1751359716720-target", + "selected": false, + "source": "1750400203722", + "sourceHandle": "source", + "target": "1751359716720", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "tool" + }, + "id": "1751359716720-source-1751336942081-target", + "source": "1751359716720", + "sourceHandle": "source", + "target": "1751336942081", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + } + ], + "nodes": [ + { + "data": { + "chunk_structure": "hierarchical_model", + "embedding_model": "jina-embeddings-v2-base-en", + "embedding_model_provider": "langgenius/jina/jina", + "index_chunk_variable_selector": [ + "1751336942081", + "result" + ], + "indexing_technique": "high_quality", + "keyword_number": 10, + "retrieval_model": { + "hybridSearchMode": "weighted_score", + "score_threshold": 0.5, + "score_threshold_enabled": false, + "search_method": "hybrid_search", + "top_k": 3, + "vector_setting": { + "embedding_model_name": "jina-embeddings-v2-base-en", + "embedding_provider_name": "langgenius/jina/jina" + } + }, + "selected": true, + "title": "Knowledge Base", + "type": "knowledge-index" + }, + "height": 114, + "id": "1750400198569", + "position": { + "x": 357.7591396590142, + "y": 282 + }, + "positionAbsolute": { + "x": 357.7591396590142, + "y": 282 + }, + "selected": true, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "File", + "datasource_name": "upload-file", + "datasource_parameters": {}, + "fileExtensions": [ + "html", + "xlsx", + "xls", + "doc", + "docx", + "csv", + "pptx", + "xml", + "ppt", + "txt" + ], + "plugin_id": "langgenius/file", + "provider_name": "file", + "provider_type": "local_file", + "selected": false, + "title": "File", + "type": "datasource" + }, + "height": 52, + "id": "1750400203722", + "position": { + "x": -580.684520226929, + "y": 282 + }, + "positionAbsolute": { + "x": -580.684520226929, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 316, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 374 + }, + "height": 316, + "id": "1751264451381", + "position": { + "x": -1034.2054006208518, + "y": 282 + }, + "positionAbsolute": { + "x": -1034.2054006208518, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 374 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 260, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 1182 + }, + "height": 260, + "id": "1751266376760", + "position": { + "x": -580.684520226929, + "y": -21.891401375096322 + }, + "positionAbsolute": { + "x": -580.684520226929, + "y": -21.891401375096322 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 1182 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 417, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor in Retrieval-Augmented Generation (RAG) is a tool or component that automatically identifies, extracts, and structures text and data from various types of documents—such as PDFs, images, scanned files, handwritten notes, and more—into a format that can be effectively used by language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Markitdown\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" serves as an excellent alternative to traditional document extraction nodes, offering robust file conversion capabilities within the Dify ecosystem. It leverages MarkItDown's plugin-based architecture to provide seamless conversion of multiple file formats to Markdown.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 241 + }, + "height": 417, + "id": "1751266402561", + "position": { + "x": -266.96080929383595, + "y": 372.64040589639495 + }, + "positionAbsolute": { + "x": -266.96080929383595, + "y": 372.64040589639495 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 241 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 554, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" addresses the dilemma of context and precision by leveraging a two-tier hierarchical approach that effectively balances the trade-off between accurate matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Query Matching with Child Chunks: Small, focused pieces of information, often as concise as a single sentence within a paragraph, are used to match the user's query. These child chunks enable precise and relevant initial retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Contextual Enrichment with Parent Chunks: Larger, encompassing sections—such as a paragraph, a section, or even an entire document—that include the matched child chunks are then retrieved. These parent chunks provide comprehensive context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 554, + "id": "1751266447821", + "position": { + "x": 37.74090119950054, + "y": 372.64040589639495 + }, + "positionAbsolute": { + "x": 37.74090119950054, + "y": 372.64040589639495 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 411, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 411, + "id": "1751266580099", + "position": { + "x": 357.7591396590142, + "y": 434.3959856026883 + }, + "positionAbsolute": { + "x": 357.7591396590142, + "y": 434.3959856026883 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "is_team_authorization": true, + "output_schema": { + "properties": { + "result": { + "description": "Parent child chunks result", + "items": { + "type": "object" + }, + "type": "array" + } + }, + "type": "object" + }, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "", + "ja_JP": "", + "pt_BR": "", + "zh_Hans": "" + }, + "label": { + "en_US": "Input Content", + "ja_JP": "Input Content", + "pt_BR": "Conteúdo de Entrada", + "zh_Hans": "输入文本" + }, + "llm_description": "The text you want to chunk.", + "max": null, + "min": null, + "name": "input_text", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": "paragraph", + "form": "llm", + "human_description": { + "en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", + "ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", + "pt_BR": "Dividir texto em parágrafos com base no separador e no comprimento máximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuperá-lo.", + "zh_Hans": "根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。" + }, + "label": { + "en_US": "Parent Mode", + "ja_JP": "Parent Mode", + "pt_BR": "Modo Pai", + "zh_Hans": "父块模式" + }, + "llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", + "max": null, + "min": null, + "name": "parent_mode", + "options": [ + { + "label": { + "en_US": "Paragraph", + "ja_JP": "Paragraph", + "pt_BR": "Parágrafo", + "zh_Hans": "段落" + }, + "value": "paragraph" + }, + { + "label": { + "en_US": "Full Document", + "ja_JP": "Full Document", + "pt_BR": "Documento Completo", + "zh_Hans": "全文" + }, + "value": "full_doc" + } + ], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "select" + }, + { + "auto_generate": null, + "default": "\n\n", + "form": "llm", + "human_description": { + "en_US": "Separator used for chunking", + "ja_JP": "Separator used for chunking", + "pt_BR": "Separador usado para divisão", + "zh_Hans": "用于分块的分隔符" + }, + "label": { + "en_US": "Parent Delimiter", + "ja_JP": "Parent Delimiter", + "pt_BR": "Separador de Pai", + "zh_Hans": "父块分隔符" + }, + "llm_description": "The separator used to split chunks", + "max": null, + "min": null, + "name": "separator", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": 1024, + "form": "llm", + "human_description": { + "en_US": "Maximum length for chunking", + "ja_JP": "Maximum length for chunking", + "pt_BR": "Comprimento máximo para divisão", + "zh_Hans": "用于分块的最大长度" + }, + "label": { + "en_US": "Maximum Parent Chunk Length", + "ja_JP": "Maximum Parent Chunk Length", + "pt_BR": "Comprimento Máximo do Bloco Pai", + "zh_Hans": "最大父块长度" + }, + "llm_description": "Maximum length allowed per chunk", + "max": null, + "min": null, + "name": "max_length", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "number" + }, + { + "auto_generate": null, + "default": ". ", + "form": "llm", + "human_description": { + "en_US": "Separator used for subchunking", + "ja_JP": "Separator used for subchunking", + "pt_BR": "Separador usado para subdivisão", + "zh_Hans": "用于子分块的分隔符" + }, + "label": { + "en_US": "Child Delimiter", + "ja_JP": "Child Delimiter", + "pt_BR": "Separador de Subdivisão", + "zh_Hans": "子分块分隔符" + }, + "llm_description": "The separator used to split subchunks", + "max": null, + "min": null, + "name": "subchunk_separator", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": 512, + "form": "llm", + "human_description": { + "en_US": "Maximum length for subchunking", + "ja_JP": "Maximum length for subchunking", + "pt_BR": "Comprimento máximo para subdivisão", + "zh_Hans": "用于子分块的最大长度" + }, + "label": { + "en_US": "Maximum Child Chunk Length", + "ja_JP": "Maximum Child Chunk Length", + "pt_BR": "Comprimento Máximo de Subdivisão", + "zh_Hans": "子分块最大长度" + }, + "llm_description": "Maximum length allowed per subchunk", + "max": null, + "min": null, + "name": "subchunk_max_length", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "number" + }, + { + "auto_generate": null, + "default": 0, + "form": "llm", + "human_description": { + "en_US": "Whether to remove consecutive spaces, newlines and tabs", + "ja_JP": "Whether to remove consecutive spaces, newlines and tabs", + "pt_BR": "Se deve remover espaços extras no texto", + "zh_Hans": "是否移除文本中的连续空格、换行符和制表符" + }, + "label": { + "en_US": "Replace consecutive spaces, newlines and tabs", + "ja_JP": "Replace consecutive spaces, newlines and tabs", + "pt_BR": "Substituir espaços consecutivos, novas linhas e guias", + "zh_Hans": "替换连续空格、换行符和制表符" + }, + "llm_description": "Whether to remove consecutive spaces, newlines and tabs", + "max": null, + "min": null, + "name": "remove_extra_spaces", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + }, + { + "auto_generate": null, + "default": 0, + "form": "llm", + "human_description": { + "en_US": "Whether to remove URLs and emails in the text", + "ja_JP": "Whether to remove URLs and emails in the text", + "pt_BR": "Se deve remover URLs e e-mails no texto", + "zh_Hans": "是否移除文本中的URL和电子邮件地址" + }, + "label": { + "en_US": "Delete all URLs and email addresses", + "ja_JP": "Delete all URLs and email addresses", + "pt_BR": "Remover todas as URLs e e-mails", + "zh_Hans": "删除所有URL和电子邮件地址" + }, + "llm_description": "Whether to remove URLs and emails in the text", + "max": null, + "min": null, + "name": "remove_urls_emails", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + } + ], + "params": { + "input_text": "", + "max_length": "", + "parent_mode": "", + "remove_extra_spaces": "", + "remove_urls_emails": "", + "separator": "", + "subchunk_max_length": "", + "subchunk_separator": "" + }, + "provider_id": "langgenius/parentchild_chunker/parentchild_chunker", + "provider_name": "langgenius/parentchild_chunker/parentchild_chunker", + "provider_type": "builtin", + "selected": false, + "title": "Parent-child Chunker", + "tool_configurations": {}, + "tool_description": "Process documents into parent-child chunk structures", + "tool_label": "Parent-child Chunker", + "tool_name": "parentchild_chunker", + "tool_node_version": "2", + "tool_parameters": { + "input_text": { + "type": "mixed", + "value": "{{#1751359716720.text#}}" + }, + "max_length": { + "type": "variable", + "value": [ + "rag", + "shared", + "Maximum_Parent_Length" + ] + }, + "parent_mode": { + "type": "variable", + "value": [ + "rag", + "shared", + "Parent_Mode" + ] + }, + "separator": { + "type": "mixed", + "value": "{{#rag.shared.Parent_Delimiter#}}" + }, + "subchunk_max_length": { + "type": "variable", + "value": [ + "rag", + "shared", + "Maximum_Child_Length" + ] + }, + "subchunk_separator": { + "type": "mixed", + "value": "{{#rag.shared.Child_Delimiter#}}" + } + }, + "type": "tool" + }, + "height": 52, + "id": "1751336942081", + "position": { + "x": 37.74090119950054, + "y": 282 + }, + "positionAbsolute": { + "x": 37.74090119950054, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "is_team_authorization": true, + "output_schema": null, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "Upload files for processing", + "ja_JP": "Upload files for processing", + "pt_BR": "Carregar arquivos para processamento", + "zh_Hans": "上传文件进行处理" + }, + "label": { + "en_US": "Files", + "ja_JP": "Files", + "pt_BR": "Arquivos", + "zh_Hans": "文件" + }, + "llm_description": "", + "max": null, + "min": null, + "name": "files", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "files" + } + ], + "params": { + "files": "" + }, + "provider_id": "yevanchen/markitdown/markitdown", + "provider_name": "yevanchen/markitdown/markitdown", + "provider_type": "builtin", + "selected": false, + "title": "markitdown", + "tool_configurations": {}, + "tool_description": "Python tool for converting files and office documents to Markdown.", + "tool_label": "markitdown", + "tool_name": "markitdown", + "tool_node_version": "2", + "tool_parameters": { + "files": { + "type": "variable", + "value": [ + "1750400203722", + "file" + ] + } + }, + "type": "tool" + }, + "height": 52, + "id": "1751359716720", + "position": { + "x": -266.96080929383595, + "y": 282 + }, + "positionAbsolute": { + "x": -266.96080929383595, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 301, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MarkItDown\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is recommended for converting and handling a wide range of file formats, particularly for transforming content into Markdown. It works especially well for converting native Office files—such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"DOCX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"XLSX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"PPTX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"—into Markdown to facilitate better information processing. However, as some users have noted its suboptimal performance in extracting content from PDF files, using it for PDFs is not recommended.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 301, + "id": "1753425718313", + "position": { + "x": -580.684520226929, + "y": 372.64040589639495 + }, + "positionAbsolute": { + "x": -580.684520226929, + "y": 372.64040589639495 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + } + ], + "viewport": { + "x": 747.6785299994758, + "y": 94.6209873206409, + "zoom": 0.8152773235379324 + } + }, + "icon_info": { + "icon": "9d658c3a-b22f-487d-8223-db51e9012505", + "icon_background": null, + "icon_type": "image", + "icon_url": "" + }, + "id": "982d1788-837a-40c8-b7de-d37b09a9b2bc", + "name": "Convert to Markdown", + "icon": { + "icon": "9d658c3a-b22f-487d-8223-db51e9012505", + "icon_background": null, + "icon_type": "image", + "icon_url": "" + }, + "language": "zh-Hans", + "position": 4 + }, + "98374ab6-9dcd-434d-983e-268bec156b43": { + "chunk_structure": "qa_model", + "description": "This template is designed to use LLM to extract key information from the input document and generate Q&A pairs indexed by questions, enabling efficient retrieval of relevant answers based on query similarity.", + "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/dify_extractor:0.0.5@ba7e2fd9165eda73bfcc68e31a108855197e88706e5556c058e0777ab08409b3\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/notion_datasource:0.1.12@2855c4a7cffd3311118ebe70f095e546f99935e47f12c841123146f728534f55\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina_datasource:0.0.5@75942f5bbde870ad28e0345ff5ebf54ebd3aec63f0e66344ef76b88cf06b85c3\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/google_drive:0.1.6@4bc0cf8f8979ebd7321b91506b4bc8f090b05b769b5d214f2da4ce4c04ce30bd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/qa_chunk:0.0.8@1fed9644646bdd48792cdf5a1d559a3df336bd3a8edb0807227499fb56dce3af\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: bowenliang123/md_exporter:2.0.0@13e1aca1995328e41c080ff9f7f6d898df60ff74a3f4d98d6de4b18ab5b92c2e\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/firecrawl_datasource:0.2.4@37b490ebc52ac30d1c6cbfa538edcddddcfed7d5f5de58982edbd4e2094eb6e2\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/anthropic:0.2.0@a776815b091c81662b2b54295ef4b8a54b5533c2ec1c66c7c8f2feea724f3248\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 2b887f89-b6c9-4288-be43-635fee45216b\n icon_background: '#FFEAD5'\n icon_type: image\n icon_url: \n name: LLM Generated Q&A\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: variable-aggregator\n id: 1750836391776-source-1753346901505-target\n selected: false\n source: '1750836391776'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: document-extractor\n targetType: variable-aggregator\n id: 1753349228522-source-1753346901505-target\n selected: false\n source: '1753349228522'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1754023419266-source-1753346901505-target\n selected: false\n source: '1754023419266'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756442998557-source-1756442986174-target\n selected: false\n source: '1756442998557'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: if-else\n id: 1756442986174-source-1756443014860-target\n selected: false\n source: '1756442986174'\n sourceHandle: source\n target: '1756443014860'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1750836380067-source-1756442986174-target\n selected: false\n source: '1750836380067'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: tool\n id: 1756443014860-true-1750836391776-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'true'\n target: '1750836391776'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: document-extractor\n id: 1756443014860-false-1753349228522-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'false'\n target: '1753349228522'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756896212061-source-1753346901505-target\n source: '1756896212061'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756907397615-source-1753346901505-target\n source: '1756907397615'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: llm\n id: 1753346901505-source-1756912504019-target\n source: '1753346901505'\n sourceHandle: source\n target: '1756912504019'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: llm\n targetType: tool\n id: 1756912504019-source-1756912537172-target\n source: '1756912504019'\n sourceHandle: source\n target: '1756912537172'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: tool\n id: 1756912537172-source-1756912274158-target\n source: '1756912537172'\n sourceHandle: source\n target: '1756912274158'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1756912274158-source-1750836372241-target\n source: '1756912274158'\n sourceHandle: source\n target: '1750836372241'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: qa_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius/jina/jina\n index_chunk_variable_selector:\n - '1756912274158'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n hybridSearchMode: weighted_score\n reranking_enable: false\n score_threshold: 0.5\n score_threshold_enabled: false\n search_method: semantic_search\n top_k: 3\n vector_setting:\n embedding_model_name: jina-embeddings-v2-base-en\n embedding_provider_name: langgenius/jina/jina\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750836372241'\n position:\n x: 1150.8369138826617\n y: 326\n positionAbsolute:\n x: 1150.8369138826617\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750836380067'\n position:\n x: -1371.6520723158733\n y: 224.87938381325645\n positionAbsolute:\n x: -1371.6520723158733\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n documents:\n description: the documents extracted from the file\n items:\n type: object\n type: array\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,\n jpg, jpeg)\n zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n params:\n file: ''\n provider_id: langgenius/dify_extractor/dify_extractor\n provider_name: langgenius/dify_extractor/dify_extractor\n provider_type: builtin\n selected: false\n title: Dify Extractor\n tool_configurations: {}\n tool_description: Dify Extractor\n tool_label: Dify Extractor\n tool_name: dify_extractor\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756442986174'\n - output\n type: tool\n height: 52\n id: '1750836391776'\n position:\n x: -417.5334221022782\n y: 268.1692071834485\n positionAbsolute:\n x: -417.5334221022782\n y: 268.1692071834485\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 252\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1124\n height: 252\n id: '1751252161631'\n position:\n x: -1371.6520723158733\n y: -123.758428116601\n positionAbsolute:\n x: -1371.6520723158733\n y: -123.758428116601\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1124\n - data:\n author: TenTen\n desc: ''\n height: 388\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 285\n height: 388\n id: '1751252440357'\n position:\n x: -1723.9942193415582\n y: 224.87938381325645\n positionAbsolute:\n x: -1723.9942193415582\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 285\n - data:\n author: TenTen\n desc: ''\n height: 430\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor in Retrieval-Augmented Generation (RAG) is a tool or\n component that automatically identifies, extracts, and structures text and\n data from various types of documents—such as PDFs, images, scanned files,\n handwritten notes, and more—into a format that can be effectively used by\n language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify\n Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is\n a built-in document parser developed by Dify. It supports a wide range of\n common file formats and offers specialized handling for certain formats,\n such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\n In addition to text extraction, it can extract images embedded within documents,\n store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 430\n id: '1751253091602'\n position:\n x: -417.5334221022782\n y: 546.5283142529594\n positionAbsolute:\n x: -417.5334221022782\n y: 546.5283142529594\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 336\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Processor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" extracts\n specified columns from tables to generate structured Q&A pairs. Users can\n independently designate which columns to use for questions and which for\n answers.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"These\n pairs are indexed by the question field, so user queries are matched directly\n against the questions to retrieve the corresponding answers. This \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q-to-Q\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" matching\n strategy improves clarity and precision, especially in scenarios involving\n high-frequency or highly similar user questions.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 336\n id: '1751253953926'\n position:\n x: 794.2003154321724\n y: 417.25474169825833\n positionAbsolute:\n x: 794.2003154321724\n y: 417.25474169825833\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 410\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only\n support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 410\n id: '1751254117904'\n position:\n x: 1150.8369138826617\n y: 475.88970282568215\n positionAbsolute:\n x: 1150.8369138826617\n y: 475.88970282568215\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n output_type: string\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836391776'\n - text\n - - '1753349228522'\n - text\n - - '1754023419266'\n - content\n - - '1756896212061'\n - content\n height: 187\n id: '1753346901505'\n position:\n x: -117.24452412456148\n y: 326\n positionAbsolute:\n x: -117.24452412456148\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_array_file: false\n selected: false\n title: Doc Extractor\n type: document-extractor\n variable_selector:\n - '1756442986174'\n - output\n height: 92\n id: '1753349228522'\n position:\n x: -417.5334221022782\n y: 417.25474169825833\n positionAbsolute:\n x: -417.5334221022782\n y: 417.25474169825833\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Notion\n datasource_name: notion_datasource\n datasource_parameters: {}\n plugin_id: langgenius/notion_datasource\n provider_name: notion_datasource\n provider_type: online_document\n selected: false\n title: Notion\n type: datasource\n height: 52\n id: '1754023419266'\n position:\n x: -1369.6904698303242\n y: 440.01452302398053\n positionAbsolute:\n x: -1369.6904698303242\n y: 440.01452302398053\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n output_type: file\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836380067'\n - file\n - - '1756442998557'\n - file\n height: 135\n id: '1756442986174'\n position:\n x: -1067.06980963949\n y: 236.10252072775984\n positionAbsolute:\n x: -1067.06980963949\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Google Drive\n datasource_name: google_drive\n datasource_parameters: {}\n plugin_id: langgenius/google_drive\n provider_name: google_drive\n provider_type: online_drive\n selected: false\n title: Google Drive\n type: datasource\n height: 52\n id: '1756442998557'\n position:\n x: -1371.6520723158733\n y: 326\n positionAbsolute:\n x: -1371.6520723158733\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n cases:\n - case_id: 'true'\n conditions:\n - comparison_operator: is\n id: 1581dd11-7898-41f4-962f-937283ba7e01\n value: .xlsx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 92abb46d-d7e4-46e7-a5e1-8a29bb45d528\n value: .xls\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1dde5ae7-754d-4e83-96b2-fe1f02995d8b\n value: .md\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 7e1a80e5-c32a-46a4-8f92-8912c64972aa\n value: .markdown\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 53abfe95-c7d0-4f63-ad37-17d425d25106\n value: .mdx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 436877b8-8c0a-4cc6-9565-92754db08571\n value: .html\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 5e3e375e-750b-4204-8ac3-9a1174a5ab7c\n value: .htm\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1a84a784-a797-4f96-98a0-33a9b48ceb2b\n value: .docx\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 62d11445-876a-493f-85d3-8fc020146bdd\n value: .csv\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 02c4bce8-7668-4ccd-b750-4281f314b231\n value: .txt\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n id: 'true'\n logical_operator: or\n selected: false\n title: IF/ELSE\n type: if-else\n height: 358\n id: '1756443014860'\n position:\n x: -733.5977815139424\n y: 236.10252072775984\n positionAbsolute:\n x: -733.5977815139424\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Jina Reader\n datasource_name: jina_reader\n datasource_parameters:\n crawl_sub_pages:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_subpages\n limit:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_limit\n url:\n type: mixed\n value: '{{#rag.1756896212061.jina_url#}}'\n use_sitemap:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jian_sitemap\n plugin_id: langgenius/jina_datasource\n provider_name: jinareader\n provider_type: website_crawl\n selected: false\n title: Jina Reader\n type: datasource\n height: 52\n id: '1756896212061'\n position:\n x: -1371.6520723158733\n y: 538.9988445953813\n positionAbsolute:\n x: -1371.6520723158733\n y: 538.9988445953813\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Firecrawl\n datasource_name: crawl\n datasource_parameters:\n crawl_subpages:\n type: variable\n value:\n - rag\n - '1756907397615'\n - firecrawl_subpages\n exclude_paths:\n type: mixed\n value: '{{#rag.1756907397615.exclude_paths#}}'\n include_paths:\n type: mixed\n value: '{{#rag.1756907397615.include_paths#}}'\n limit:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_pages\n max_depth:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_depth\n only_main_content:\n type: variable\n value:\n - rag\n - '1756907397615'\n - main_content\n url:\n type: mixed\n value: '{{#rag.1756907397615.firecrawl_url1#}}'\n plugin_id: langgenius/firecrawl_datasource\n provider_name: firecrawl\n provider_type: website_crawl\n selected: false\n title: Firecrawl\n type: datasource\n height: 52\n id: '1756907397615'\n position:\n x: -1371.6520723158733\n y: 644.3296146102903\n positionAbsolute:\n x: -1371.6520723158733\n y: 644.3296146102903\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The file you want to extract QA from.\n ja_JP: The file you want to extract QA from.\n pt_BR: The file you want to extract QA from.\n zh_Hans: 你想要提取 QA 的文件。\n label:\n en_US: Input File\n ja_JP: Input File\n pt_BR: Input File\n zh_Hans: 输入文件\n llm_description: The file you want to extract QA from.\n max: null\n min: null\n name: input_file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Column number for question.\n ja_JP: Column number for question.\n pt_BR: Column number for question.\n zh_Hans: 问题所在的列。\n label:\n en_US: Column number for question\n ja_JP: Column number for question\n pt_BR: Column number for question\n zh_Hans: 问题所在的列\n llm_description: The column number for question, the format of the column\n number must be an integer.\n max: null\n min: null\n name: question_column\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 1\n form: llm\n human_description:\n en_US: Column number for answer.\n ja_JP: Column number for answer.\n pt_BR: Column number for answer.\n zh_Hans: 答案所在的列。\n label:\n en_US: Column number for answer\n ja_JP: Column number for answer\n pt_BR: Column number for answer\n zh_Hans: 答案所在的列\n llm_description: The column number for answer, the format of the column\n number must be an integer.\n max: null\n min: null\n name: answer_column\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n params:\n answer_column: ''\n input_file: ''\n question_column: ''\n provider_id: langgenius/qa_chunk/qa_chunk\n provider_name: langgenius/qa_chunk/qa_chunk\n provider_type: builtin\n selected: false\n title: Q&A Processor\n tool_configurations: {}\n tool_description: A tool for QA chunking mode.\n tool_label: QA Chunk\n tool_name: qa_chunk\n tool_node_version: '2'\n tool_parameters:\n answer_column:\n type: constant\n value: 2\n input_file:\n type: variable\n value:\n - '1756912537172'\n - files\n question_column:\n type: constant\n value: 1\n type: tool\n height: 52\n id: '1756912274158'\n position:\n x: 794.2003154321724\n y: 326\n positionAbsolute:\n x: 794.2003154321724\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n context:\n enabled: false\n variable_selector: []\n model:\n completion_params:\n temperature: 0.7\n mode: chat\n name: claude-3-5-sonnet-20240620\n provider: langgenius/anthropic/anthropic\n prompt_template:\n - id: 7f8105aa-a37d-4f5a-b581-babeeb31e833\n role: system\n text: '\n\n Generate a list of Q&A pairs based on {{#1753346901505.output#}}. Present\n the output as a Markdown table, where the first column is serial number,\n the second column is Question, and the third column is Question. Ensure\n that the table format can be easily converted into a CSV file.\n\n Example Output Format:\n\n | Index | Question | Answer |\n\n |-------|-----------|--------|\n\n | 1 | What is the main purpose of the document? | The document explains\n the company''s new product launch strategy. ![image](https://cloud.dify.ai/files/xxxxxxx)\n |\n\n | 2 || When will the product be launched? | The product will be launched\n in Q3 of this year. |\n\n\n Instructions:\n\n Read and understand the input text.\n\n Extract key information and generate meaningful questions and answers.\n\n Preserve any ![image] URLs from the input text in the answers.\n\n Keep questions concise and specific.\n\n Ensure answers are accurate, self-contained, and clear.\n\n Output only the Markdown table without any extra explanation.'\n selected: false\n title: LLM\n type: llm\n vision:\n enabled: false\n height: 88\n id: '1756912504019'\n position:\n x: 184.46657789772178\n y: 326\n positionAbsolute:\n x: 184.46657789772178\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Markdown text\n ja_JP: Markdown text\n pt_BR: Markdown text\n zh_Hans: Markdown格式文本,必须为Markdown表格格式\n label:\n en_US: Markdown text\n ja_JP: Markdown text\n pt_BR: Markdown text\n zh_Hans: Markdown格式文本\n llm_description: ''\n max: null\n min: null\n name: md_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Filename of the output file\n ja_JP: Filename of the output file\n pt_BR: Filename of the output file\n zh_Hans: 输出文件名\n label:\n en_US: Filename of the output file\n ja_JP: Filename of the output file\n pt_BR: Filename of the output file\n zh_Hans: 输出文件名\n llm_description: ''\n max: null\n min: null\n name: output_filename\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n params:\n md_text: ''\n output_filename: ''\n provider_id: bowenliang123/md_exporter/md_exporter\n provider_name: bowenliang123/md_exporter/md_exporter\n provider_type: builtin\n selected: false\n title: Markdown to CSV file\n tool_configurations: {}\n tool_description: Generate CSV file from Markdown text\n tool_label: Markdown to CSV file\n tool_name: md_to_csv\n tool_node_version: '2'\n tool_parameters:\n md_text:\n type: mixed\n value: '{{#1756912504019.text#}}'\n output_filename:\n type: mixed\n value: LLM Generated Q&A\n type: tool\n height: 52\n id: '1756912537172'\n position:\n x: 484.75465419110174\n y: 326\n positionAbsolute:\n x: 484.75465419110174\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 174\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n LLM-generated Q&A pairs are designed to extract key information from the\n input text and present it in a structured, easy-to-use format. Each pair\n consists of a concise question that captures an important point or detail,\n and a clear, self-contained answer that provides the relevant information\n without requiring additional context. The output is formatted as a Markdown\n table with three columns—Index, Question, and Answer—so that it can be easily\n converted into a CSV file for further processing. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 528\n height: 174\n id: '1756912556940'\n position:\n x: 184.46657789772178\n y: 462.64405262857747\n positionAbsolute:\n x: 184.46657789772178\n y: 462.64405262857747\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 528\n viewport:\n x: 1149.1394490177502\n y: 317.2338302699771\n zoom: 0.4911032886685182\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_reader_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_reader_imit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Crawl sub-pages\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: Crawl_sub_pages_2\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: Use_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_limit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl\n iteratively based on page relevance, yielding fewer but higher-quality pages.\n type: checkbox\n unit: null\n variable: jian_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Crawl subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: jina_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: firecrawl_url1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: firecrawl_subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: firecrawl_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: Exclude paths\n max_length: 256\n options: []\n placeholder: blog/*,/about/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: exclude_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: include_paths\n max_length: 256\n options: []\n placeholder: articles/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: include_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 0\n label: Max depth\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes\n the page of the entered url, depth 1 scrapes the url and everything after enteredURL\n + one /, and so on.\n type: number\n unit: null\n variable: max_depth\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: null\n variable: max_pages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: Extract only main content (no headers, navs, footers, etc.)\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: main_content\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: depthtest\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: null\n variable: depthtest\n", + "graph": { + "edges": [ + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "variable-aggregator" + }, + "id": "1750836391776-source-1753346901505-target", + "selected": false, + "source": "1750836391776", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "document-extractor", + "targetType": "variable-aggregator" + }, + "id": "1753349228522-source-1753346901505-target", + "selected": false, + "source": "1753349228522", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1754023419266-source-1753346901505-target", + "selected": false, + "source": "1754023419266", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1756442998557-source-1756442986174-target", + "selected": false, + "source": "1756442998557", + "sourceHandle": "source", + "target": "1756442986174", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInIteration": false, + "isInLoop": false, + "sourceType": "variable-aggregator", + "targetType": "if-else" + }, + "id": "1756442986174-source-1756443014860-target", + "selected": false, + "source": "1756442986174", + "sourceHandle": "source", + "target": "1756443014860", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1750836380067-source-1756442986174-target", + "selected": false, + "source": "1750836380067", + "sourceHandle": "source", + "target": "1756442986174", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "if-else", + "targetType": "tool" + }, + "id": "1756443014860-true-1750836391776-target", + "selected": false, + "source": "1756443014860", + "sourceHandle": "true", + "target": "1750836391776", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "if-else", + "targetType": "document-extractor" + }, + "id": "1756443014860-false-1753349228522-target", + "selected": false, + "source": "1756443014860", + "sourceHandle": "false", + "target": "1753349228522", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1756896212061-source-1753346901505-target", + "source": "1756896212061", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "variable-aggregator" + }, + "id": "1756907397615-source-1753346901505-target", + "source": "1756907397615", + "sourceHandle": "source", + "target": "1753346901505", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInIteration": false, + "isInLoop": false, + "sourceType": "variable-aggregator", + "targetType": "llm" + }, + "id": "1753346901505-source-1756912504019-target", + "source": "1753346901505", + "sourceHandle": "source", + "target": "1756912504019", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInIteration": false, + "isInLoop": false, + "sourceType": "llm", + "targetType": "tool" + }, + "id": "1756912504019-source-1756912537172-target", + "source": "1756912504019", + "sourceHandle": "source", + "target": "1756912537172", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "tool" + }, + "id": "1756912537172-source-1756912274158-target", + "source": "1756912537172", + "sourceHandle": "source", + "target": "1756912274158", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "knowledge-index" + }, + "id": "1756912274158-source-1750836372241-target", + "source": "1756912274158", + "sourceHandle": "source", + "target": "1750836372241", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + } + ], + "nodes": [ + { + "data": { + "chunk_structure": "qa_model", + "embedding_model": "jina-embeddings-v2-base-en", + "embedding_model_provider": "langgenius/jina/jina", + "index_chunk_variable_selector": [ + "1756912274158", + "result" + ], + "indexing_technique": "high_quality", + "keyword_number": 10, + "retrieval_model": { + "hybridSearchMode": "weighted_score", + "reranking_enable": false, + "score_threshold": 0.5, + "score_threshold_enabled": false, + "search_method": "semantic_search", + "top_k": 3, + "vector_setting": { + "embedding_model_name": "jina-embeddings-v2-base-en", + "embedding_provider_name": "langgenius/jina/jina" + } + }, + "selected": false, + "title": "Knowledge Base", + "type": "knowledge-index" + }, + "height": 114, + "id": "1750836372241", + "position": { + "x": 1150.8369138826617, + "y": 326 + }, + "positionAbsolute": { + "x": 1150.8369138826617, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "File", + "datasource_name": "upload-file", + "datasource_parameters": {}, + "fileExtensions": [ + "txt", + "markdown", + "mdx", + "pdf", + "html", + "xlsx", + "xls", + "vtt", + "properties", + "doc", + "docx", + "csv", + "eml", + "msg", + "pptx", + "xml", + "epub", + "ppt", + "md" + ], + "plugin_id": "langgenius/file", + "provider_name": "file", + "provider_type": "local_file", + "selected": false, + "title": "File", + "type": "datasource" + }, + "height": 52, + "id": "1750836380067", + "position": { + "x": -1371.6520723158733, + "y": 224.87938381325645 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": 224.87938381325645 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "is_team_authorization": true, + "output_schema": { + "properties": { + "documents": { + "description": "the documents extracted from the file", + "items": { + "type": "object" + }, + "type": "array" + }, + "images": { + "description": "The images extracted from the file", + "items": { + "type": "object" + }, + "type": "array" + } + }, + "type": "object" + }, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "zh_Hans": "用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)" + }, + "label": { + "en_US": "file", + "ja_JP": "file", + "pt_BR": "file", + "zh_Hans": "file" + }, + "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "max": null, + "min": null, + "name": "file", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "file" + } + ], + "params": { + "file": "" + }, + "provider_id": "langgenius/dify_extractor/dify_extractor", + "provider_name": "langgenius/dify_extractor/dify_extractor", + "provider_type": "builtin", + "selected": false, + "title": "Dify Extractor", + "tool_configurations": {}, + "tool_description": "Dify Extractor", + "tool_label": "Dify Extractor", + "tool_name": "dify_extractor", + "tool_node_version": "2", + "tool_parameters": { + "file": { + "type": "variable", + "value": [ + "1756442986174", + "output" + ] + } + }, + "type": "tool" + }, + "height": 52, + "id": "1750836391776", + "position": { + "x": -417.5334221022782, + "y": 268.1692071834485 + }, + "positionAbsolute": { + "x": -417.5334221022782, + "y": 268.1692071834485 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 252, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 1124 + }, + "height": 252, + "id": "1751252161631", + "position": { + "x": -1371.6520723158733, + "y": -123.758428116601 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": -123.758428116601 + }, + "selected": true, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 1124 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 388, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 285 + }, + "height": 388, + "id": "1751252440357", + "position": { + "x": -1723.9942193415582, + "y": 224.87938381325645 + }, + "positionAbsolute": { + "x": -1723.9942193415582, + "y": 224.87938381325645 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 285 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 430, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor in Retrieval-Augmented Generation (RAG) is a tool or component that automatically identifies, extracts, and structures text and data from various types of documents—such as PDFs, images, scanned files, handwritten notes, and more—into a format that can be effectively used by language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is a built-in document parser developed by Dify. It supports a wide range of common file formats and offers specialized handling for certain formats, such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\". In addition to text extraction, it can extract images embedded within documents, store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 430, + "id": "1751253091602", + "position": { + "x": -417.5334221022782, + "y": 546.5283142529594 + }, + "positionAbsolute": { + "x": -417.5334221022782, + "y": 546.5283142529594 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 336, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Processor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" extracts specified columns from tables to generate structured Q&A pairs. Users can independently designate which columns to use for questions and which for answers.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"These pairs are indexed by the question field, so user queries are matched directly against the questions to retrieve the corresponding answers. This \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q-to-Q\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" matching strategy improves clarity and precision, especially in scenarios involving high-frequency or highly similar user questions.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 336, + "id": "1751253953926", + "position": { + "x": 794.2003154321724, + "y": 417.25474169825833 + }, + "positionAbsolute": { + "x": 794.2003154321724, + "y": 417.25474169825833 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 410, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 410, + "id": "1751254117904", + "position": { + "x": 1150.8369138826617, + "y": 475.88970282568215 + }, + "positionAbsolute": { + "x": 1150.8369138826617, + "y": 475.88970282568215 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "output_type": "string", + "selected": false, + "title": "Variable Aggregator", + "type": "variable-aggregator", + "variables": [ + [ + "1750836391776", + "text" + ], + [ + "1753349228522", + "text" + ], + [ + "1754023419266", + "content" + ], + [ + "1756896212061", + "content" + ] + ] + }, + "height": 187, + "id": "1753346901505", + "position": { + "x": -117.24452412456148, + "y": 326 + }, + "positionAbsolute": { + "x": -117.24452412456148, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "is_array_file": false, + "selected": false, + "title": "Doc Extractor", + "type": "document-extractor", + "variable_selector": [ + "1756442986174", + "output" + ] + }, + "height": 92, + "id": "1753349228522", + "position": { + "x": -417.5334221022782, + "y": 417.25474169825833 + }, + "positionAbsolute": { + "x": -417.5334221022782, + "y": 417.25474169825833 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "Notion", + "datasource_name": "notion_datasource", + "datasource_parameters": {}, + "plugin_id": "langgenius/notion_datasource", + "provider_name": "notion_datasource", + "provider_type": "online_document", + "selected": false, + "title": "Notion", + "type": "datasource" + }, + "height": 52, + "id": "1754023419266", + "position": { + "x": -1369.6904698303242, + "y": 440.01452302398053 + }, + "positionAbsolute": { + "x": -1369.6904698303242, + "y": 440.01452302398053 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "output_type": "file", + "selected": false, + "title": "Variable Aggregator", + "type": "variable-aggregator", + "variables": [ + [ + "1750836380067", + "file" + ], + [ + "1756442998557", + "file" + ] + ] + }, + "height": 135, + "id": "1756442986174", + "position": { + "x": -1067.06980963949, + "y": 236.10252072775984 + }, + "positionAbsolute": { + "x": -1067.06980963949, + "y": 236.10252072775984 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "Google Drive", + "datasource_name": "google_drive", + "datasource_parameters": {}, + "plugin_id": "langgenius/google_drive", + "provider_name": "google_drive", + "provider_type": "online_drive", + "selected": false, + "title": "Google Drive", + "type": "datasource" + }, + "height": 52, + "id": "1756442998557", + "position": { + "x": -1371.6520723158733, + "y": 326 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "cases": [ + { + "case_id": "true", + "conditions": [ + { + "comparison_operator": "is", + "id": "1581dd11-7898-41f4-962f-937283ba7e01", + "value": ".xlsx", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "92abb46d-d7e4-46e7-a5e1-8a29bb45d528", + "value": ".xls", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "1dde5ae7-754d-4e83-96b2-fe1f02995d8b", + "value": ".md", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "7e1a80e5-c32a-46a4-8f92-8912c64972aa", + "value": ".markdown", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "53abfe95-c7d0-4f63-ad37-17d425d25106", + "value": ".mdx", + "varType": "string", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "436877b8-8c0a-4cc6-9565-92754db08571", + "value": ".html", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "5e3e375e-750b-4204-8ac3-9a1174a5ab7c", + "value": ".htm", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "1a84a784-a797-4f96-98a0-33a9b48ceb2b", + "value": ".docx", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "62d11445-876a-493f-85d3-8fc020146bdd", + "value": ".csv", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + }, + { + "comparison_operator": "is", + "id": "02c4bce8-7668-4ccd-b750-4281f314b231", + "value": ".txt", + "varType": "file", + "variable_selector": [ + "1756442986174", + "output", + "extension" + ] + } + ], + "id": "true", + "logical_operator": "or" + } + ], + "selected": false, + "title": "IF/ELSE", + "type": "if-else" + }, + "height": 358, + "id": "1756443014860", + "position": { + "x": -733.5977815139424, + "y": 236.10252072775984 + }, + "positionAbsolute": { + "x": -733.5977815139424, + "y": 236.10252072775984 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "Jina Reader", + "datasource_name": "jina_reader", + "datasource_parameters": { + "crawl_sub_pages": { + "type": "variable", + "value": [ + "rag", + "1756896212061", + "jina_subpages" + ] + }, + "limit": { + "type": "variable", + "value": [ + "rag", + "1756896212061", + "jina_limit" + ] + }, + "url": { + "type": "mixed", + "value": "{{#rag.1756896212061.jina_url#}}" + }, + "use_sitemap": { + "type": "variable", + "value": [ + "rag", + "1756896212061", + "jian_sitemap" + ] + } + }, + "plugin_id": "langgenius/jina_datasource", + "provider_name": "jinareader", + "provider_type": "website_crawl", + "selected": false, + "title": "Jina Reader", + "type": "datasource" + }, + "height": 52, + "id": "1756896212061", + "position": { + "x": -1371.6520723158733, + "y": 538.9988445953813 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": 538.9988445953813 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "Firecrawl", + "datasource_name": "crawl", + "datasource_parameters": { + "crawl_subpages": { + "type": "variable", + "value": [ + "rag", + "1756907397615", + "firecrawl_subpages" + ] + }, + "exclude_paths": { + "type": "mixed", + "value": "{{#rag.1756907397615.exclude_paths#}}" + }, + "include_paths": { + "type": "mixed", + "value": "{{#rag.1756907397615.include_paths#}}" + }, + "limit": { + "type": "variable", + "value": [ + "rag", + "1756907397615", + "max_pages" + ] + }, + "max_depth": { + "type": "variable", + "value": [ + "rag", + "1756907397615", + "max_depth" + ] + }, + "only_main_content": { + "type": "variable", + "value": [ + "rag", + "1756907397615", + "main_content" + ] + }, + "url": { + "type": "mixed", + "value": "{{#rag.1756907397615.firecrawl_url1#}}" + } + }, + "plugin_id": "langgenius/firecrawl_datasource", + "provider_name": "firecrawl", + "provider_type": "website_crawl", + "selected": false, + "title": "Firecrawl", + "type": "datasource" + }, + "height": 52, + "id": "1756907397615", + "position": { + "x": -1371.6520723158733, + "y": 644.3296146102903 + }, + "positionAbsolute": { + "x": -1371.6520723158733, + "y": 644.3296146102903 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "is_team_authorization": true, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "The file you want to extract QA from.", + "ja_JP": "The file you want to extract QA from.", + "pt_BR": "The file you want to extract QA from.", + "zh_Hans": "你想要提取 QA 的文件。" + }, + "label": { + "en_US": "Input File", + "ja_JP": "Input File", + "pt_BR": "Input File", + "zh_Hans": "输入文件" + }, + "llm_description": "The file you want to extract QA from.", + "max": null, + "min": null, + "name": "input_file", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "file" + }, + { + "auto_generate": null, + "default": 0, + "form": "llm", + "human_description": { + "en_US": "Column number for question.", + "ja_JP": "Column number for question.", + "pt_BR": "Column number for question.", + "zh_Hans": "问题所在的列。" + }, + "label": { + "en_US": "Column number for question", + "ja_JP": "Column number for question", + "pt_BR": "Column number for question", + "zh_Hans": "问题所在的列" + }, + "llm_description": "The column number for question, the format of the column number must be an integer.", + "max": null, + "min": null, + "name": "question_column", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "number" + }, + { + "auto_generate": null, + "default": 1, + "form": "llm", + "human_description": { + "en_US": "Column number for answer.", + "ja_JP": "Column number for answer.", + "pt_BR": "Column number for answer.", + "zh_Hans": "答案所在的列。" + }, + "label": { + "en_US": "Column number for answer", + "ja_JP": "Column number for answer", + "pt_BR": "Column number for answer", + "zh_Hans": "答案所在的列" + }, + "llm_description": "The column number for answer, the format of the column number must be an integer.", + "max": null, + "min": null, + "name": "answer_column", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "number" + } + ], + "params": { + "answer_column": "", + "input_file": "", + "question_column": "" + }, + "provider_id": "langgenius/qa_chunk/qa_chunk", + "provider_name": "langgenius/qa_chunk/qa_chunk", + "provider_type": "builtin", + "selected": false, + "title": "Q&A Processor", + "tool_configurations": {}, + "tool_description": "A tool for QA chunking mode.", + "tool_label": "QA Chunk", + "tool_name": "qa_chunk", + "tool_node_version": "2", + "tool_parameters": { + "answer_column": { + "type": "constant", + "value": 2 + }, + "input_file": { + "type": "variable", + "value": [ + "1756912537172", + "files" + ] + }, + "question_column": { + "type": "constant", + "value": 1 + } + }, + "type": "tool" + }, + "height": 52, + "id": "1756912274158", + "position": { + "x": 794.2003154321724, + "y": 326 + }, + "positionAbsolute": { + "x": 794.2003154321724, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "context": { + "enabled": false, + "variable_selector": [] + }, + "model": { + "completion_params": { + "temperature": 0.7 + }, + "mode": "chat", + "name": "claude-3-5-sonnet-20240620", + "provider": "langgenius/anthropic/anthropic" + }, + "prompt_template": [ + { + "id": "7f8105aa-a37d-4f5a-b581-babeeb31e833", + "role": "system", + "text": "\nGenerate a list of Q&A pairs based on {{#1753346901505.output#}}. Present the output as a Markdown table, where the first column is serial number, the second column is Question, and the third column is Question. Ensure that the table format can be easily converted into a CSV file.\nExample Output Format:\n| Index | Question | Answer |\n|-------|-----------|--------|\n| 1 | What is the main purpose of the document? | The document explains the company's new product launch strategy. ![image](https://cloud.dify.ai/files/xxxxxxx) |\n| 2 || When will the product be launched? | The product will be launched in Q3 of this year. |\n\nInstructions:\nRead and understand the input text.\nExtract key information and generate meaningful questions and answers.\nPreserve any ![image] URLs from the input text in the answers.\nKeep questions concise and specific.\nEnsure answers are accurate, self-contained, and clear.\nOutput only the Markdown table without any extra explanation." + } + ], + "selected": false, + "title": "LLM", + "type": "llm", + "vision": { + "enabled": false + } + }, + "height": 88, + "id": "1756912504019", + "position": { + "x": 184.46657789772178, + "y": 326 + }, + "positionAbsolute": { + "x": 184.46657789772178, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "is_team_authorization": true, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "Markdown text", + "ja_JP": "Markdown text", + "pt_BR": "Markdown text", + "zh_Hans": "Markdown格式文本,必须为Markdown表格格式" + }, + "label": { + "en_US": "Markdown text", + "ja_JP": "Markdown text", + "pt_BR": "Markdown text", + "zh_Hans": "Markdown格式文本" + }, + "llm_description": "", + "max": null, + "min": null, + "name": "md_text", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "Filename of the output file", + "ja_JP": "Filename of the output file", + "pt_BR": "Filename of the output file", + "zh_Hans": "输出文件名" + }, + "label": { + "en_US": "Filename of the output file", + "ja_JP": "Filename of the output file", + "pt_BR": "Filename of the output file", + "zh_Hans": "输出文件名" + }, + "llm_description": "", + "max": null, + "min": null, + "name": "output_filename", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + } + ], + "params": { + "md_text": "", + "output_filename": "" + }, + "provider_id": "bowenliang123/md_exporter/md_exporter", + "provider_name": "bowenliang123/md_exporter/md_exporter", + "provider_type": "builtin", + "selected": false, + "title": "Markdown to CSV file", + "tool_configurations": {}, + "tool_description": "Generate CSV file from Markdown text", + "tool_label": "Markdown to CSV file", + "tool_name": "md_to_csv", + "tool_node_version": "2", + "tool_parameters": { + "md_text": { + "type": "mixed", + "value": "{{#1756912504019.text#}}" + }, + "output_filename": { + "type": "mixed", + "value": "LLM Generated Q&A" + } + }, + "type": "tool" + }, + "height": 52, + "id": "1756912537172", + "position": { + "x": 484.75465419110174, + "y": 326 + }, + "positionAbsolute": { + "x": 484.75465419110174, + "y": 326 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 174, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The LLM-generated Q&A pairs are designed to extract key information from the input text and present it in a structured, easy-to-use format. Each pair consists of a concise question that captures an important point or detail, and a clear, self-contained answer that provides the relevant information without requiring additional context. The output is formatted as a Markdown table with three columns—Index, Question, and Answer—so that it can be easily converted into a CSV file for further processing. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 528 + }, + "height": 174, + "id": "1756912556940", + "position": { + "x": 184.46657789772178, + "y": 462.64405262857747 + }, + "positionAbsolute": { + "x": 184.46657789772178, + "y": 462.64405262857747 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 528 + } + ], + "viewport": { + "x": 1149.1394490177502, + "y": 317.2338302699771, + "zoom": 0.4911032886685182 + } + }, + "icon_info": { + "icon": "e4ea16ed-9690-4de9-ab80-5b622ecbcc04", + "icon_background": null, + "icon_type": "image", + "icon_url": "" + }, + "id": "98374ab6-9dcd-434d-983e-268bec156b43", + "name": "LLM Generated Q&A", + "icon": { + "icon": "e4ea16ed-9690-4de9-ab80-5b622ecbcc04", + "icon_background": null, + "icon_type": "image", + "icon_url": "" + }, + "language": "zh-Hans", + "position": 5 + }, + { + "chunk_structure": "hierarchical_model", + "description": "This knowledge pipeline uses LLMs to extract content from images and tables in documents and automatically generate descriptive annotations for contextual enrichment.", + "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/mineru:0.5.0@ca04f2dceb4107e3adf24839756954b7c5bcb7045d035dbab5821595541c093d\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/anthropic:0.2.0@a776815b091c81662b2b54295ef4b8a54b5533c2ec1c66c7c8f2feea724f3248\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: e642577f-da15-4c03-81b9-c9dec9189a3c\n icon_background: null\n icon_type: image\n icon_url: data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAP9UlEQVR4Ae2dTXPbxhnHdwFRr5ZN2b1kJraouk57i\/IJrJx6jDPT9Fpnkrvj3DOOv0DsXDvJxLk2nUnSW09hPkGc6aWdOBEtpZNLE9Gy3iiSQJ\/\/gg8DQnyFFiAAPjtDLbAA9uWPn5595VKrjLjtn\/YqrZaq+L6quL5X9pQqO1qtI3u+0mXy8MFJxfihP1qrss\/XQ+FFPtRK1UmreriMJkz\/GqaVX8N1z1dPHdyvnZpP1+fmVG3jhTVzDden6SjP6brt7b1y21VbWnk3CawKAbWp9Fmo0s3VbKamffWYgKz5vv+t1s5jt62qGxtrPVAnrUwqAH63u7dF\/4E3qaBbVCB8zjjHcZRDJs91XaXJpOGDMDgSx5zj2HWDMByz4\/v5fBZ80lLhE3Y498jcsfO8Nt1DlYbvmXs9L\/DbbY\/uozqmjwOUSvvVtuN8+tKLa4\/73GI1KDEAYek8x7vta\/0a5XiLcw1Y5uZcAxpgK5VKXeD4HvHTUaDdbivA2Go1yW+rZrPVkzDBUSOk7\/\/u2m8e9VyweGIdQAPenLpD\/3LvcLsM0C0szBNs8wY+nIvLpgKA8PS0YWBkKwkQyUo8un517b7tXFsl4cnO\/25p33lA7YoKMloqzanFxSXj2864xJe8Ao3GaRdGpAYQbVtEKwCS1au0Xf8TyuMWMirgQYXiOFjFw8PDcLvxC7ek79roSZ8bwO3dvTue77+P6hZV69LSElm9heKoLyXpKgCLeHx8zCBSb9m7e972YWwATVvPVfeoL\/YOcjg\/X1IrKyvd3mo313JQKAXQLgSEgBGO3v\/DG9eu3I1byFgAosr1HP9zauttitWLK32+nzs5aRgQMfSDoRtnXr8ep0qeGMAOfF+ho4FxuosXV7vjdfmWVHI\/qQKwhvv7z02VTCDVnJJ+dVIIJwIwDB\/G8FZXLwh8k761gt0PCJ8\/PzDjiHEgHBvAKHywfDKeVzCaYhYH1TAsIQazJ4VwLAAFvphvZoYeiwvh2YnVPqJ1OhwVVLti+foIJEGmNgQbYISG5Creqf85Ga7yKGlGAvj9zh5mNjbR4UCbT6rdUZLO7nWwwf0CMNNyvXuj1BhaBdPU2m2lnE8Q8aVLF6XDMUpNuW4UQMfk2bN9swKHqua7N9avPBwkzUAATbvP9b\/BDMfy8rLMbgxSUML7KoBxwqOjI1yr07TdK4OGZwZWwTS3+wDwYRWLTK311VgChygAZjA7Rq7cbpp1An3v7gtgUPWqW2j3YW5XnCgQR4HQ1OzWk529W\/3i6AsgLakyjUfAx6uS+z0sYaLAMAXQd2ADRt9PedCvV3wGwO939+7xNBuqX3GiwHkUQFWM5XnUnKu0HM8sXAnHdwZA+grVbdwA8ylOFLChABYlw5FFvBO1gj0Aou0H6wdi8REnCthQIMRTmazg7XCcPQBy229+XhaUhkWS4\/MrELKC+JJa13UB3P5xb1Pafl1d5MCyArCC6JSQ28LXdDn6LoD09bzbCJSql6UR37YC3U6t521x3F0AtaNvIlCqX5ZGfNsK4Gu5cGQJDWs4NgCiZ0JLujYRIBYQKohLQgFsSMDVMPeGDYBtt72FBAW+JGSXOFkBwAcI4bA\/EHwDoO9rY\/0cJ7iIC+JEgSQUwHpB4\/ygHWgAJDJfRiD2aREnCiSpAANodkajhDoAqgoS7bfzFMLFiQK2FGAjR7WxMXqdKjjogDCdthKTeESBqAKdTgiCK\/jjUG8kOOjsxYdAcaJAUgoAQF5hhV1xndacVL9JiS3x9leArSC2ZHa03y7jNg7s\/4iEigL2FOChGGIPAOoKosY2uOJEgTQUYGNHw39lB7vRI1HszyxOFEhDAQaQ0io7fqc3EgpMIw+SxgwrwJ0QRzvr3XpXAJxhIqZYdKp59TrSl2m4Kb6FGUuajR3trLvWtYAzpoEUd4oKcIeXhgQvCYBTfBGStFJzm\/\/EWkDqiiw1qR6W1TC7r11JlIurX\/6caPy5iJx+uUkd7SOrFYfgM8MwNBKYi7xLJoulgFTBxXqfuSuNAJi7V1asDM99+8fLpvYtly91VykUq4jDSzPtNpntNme0PLbjH67meFexf2C9Hmx8QMOAwVQcj82MF4XcJQrEVyDEmpmKk9Uw8bWUJ2Mo0ANgjOflEVHAmgLSCbEmpUQURwEBMI5q8ow1BQRAa1JKRHEUyAWAPx7Rj+I1afpGXOEUyAWAn+2cqI9\/aBROfCkQLT\/Iugiwfp\/tNtRH3x+LFcz6y4qRv8wDCOu3a6pgX6xgjBec9UcyDSBbPxZRrCArURw\/0wCy9WO595tiBVmLoviZBTBq\/VhwsYKsRDH8zAIYtX4st1hBVqIYfiYBHGT9WHKxgqxE\/v1MAjjI+rHcYgVZifz7mfo5pACsE\/XRDycjlYUVhPvT1QV1dTmT\/0cjyyA30LfisiBCFzwz2Ezf0BvD4ZkP\/n2k\/kbjhH++tiggjqFZFm+ZKoBxwIuKiPaigBhVJT\/n+snOL8bkXL68llqubYA3KLMvUnU8iUVM+zsU0fQGlaPw4Yd1U8RULWCS4PELE4vISuTDT7X1DgCxC8OlUvLJ\/pqWfOE+yyimagFRPb77h2VTRaLz8PfdU1po0Laqz8WSVm\/9dlG9fX1J4VhcthVIFUCWIgkQ8wqe7e\/tRtuYtuPnd3he\/5dfglpwKgBy5m2AmFfwWINZ96cKIIsfBfFjGohGG26YE\/CGqZOfa5kAkOViENFy++A\/wUwHX4v6b1Eb793fL0WD5TxnCiTfHY0hCOAa1oF4cdlVb9AUnLj8K3AuAD\/baSh8bDvA9zb1ZAe5N67J\/O8gbfIWHrsKBnjvfnPQLS+gsOlgBbEoIdoWFOtnU+XpxxXLAkbhA4i2LeEgKyjWb\/rQ2MzBxABG4ePMJAFhtC0o1o\/VLo4\/EYCD4GM5bEMYtYJi\/Vjp4vhjAzgKPpbENoRsBcX6scLF8sfqhIwLH0sDCOFsdEzYCvq0lausfGaFi+OPBHBS+FgamxDCCj4bMTPC6YqfLwWGAhgXPpbAFoSwgviIK54CA9uA54WPpbLdJuR4xS+GAn0BtAUfSyQQshLiRxU4A6Bt+DhBgZCVED+sQA+AScHHCQqErIT4rEAXwKTh4wQFQlZCfChgesH\/+G9DvfdDenswA0I4G+OEJiL5k1sFHAPfvw5TL4BYwtQlz2SCzntTgI+VEAhZidn1u23AaUkgEE5L+WykO3UAIYNAmA0YppGLTAAoEE7j1WcjzcwAKBBmA4i0c5EpAAXCtF\/\/9NPLHIAC4fShSDMHmQRQIEwTgemmlVkABcLpgpFW6pkGUCBMC4PppZN5AAXC6cGRRsq5AFAgTAOF6aSRGwAFwukAknSquQJQIEwah\/Tjzx2AAmH6kCSZYi4BFAiTRCLduHMLoECYLihJpUYA6uAna+j3O\/LoZClX\/t4afium4+oEoJ9rAFEQgZDfZz78MIB65a9PtinbFbV0USkn1zWyFfWT\/l2N6O94WMl03iLx6QtwR\/vIdU2Iy9vLK1h+BcCCvdC8FUcAzNsbK0J+u50QXcfvBX9FZdpaXV1VpdLQ3dqKUHQpQwYUaDZb6vnz58hJVSxgBl7ILGcBAJphmFDXeJb1kLKnrIDj+f4zpOmjayxOFEhBAc8LfiNaKy3DMCnoLUlEFOj2QSjcoZ2Xa7jueWIBoYO45BXg2tbzvaeY+zBtQM\/rzs8lnwNJYaYVCPU36k5bd+aClQA401SkWHiubbV2ao7Wbg1pt1pBwzDFfEhSM6oAW0Bfq7oz1wragBw4o5pIsVNUoN0O+htzc7QYYWNjrYa0YRYFwhTfwgwnxVXwxgtrnWEYX6zgDPOQatG5qad99RgJB1NxOjhpNpupZkYSmz0FeBCaKuGnKH0AoO+bE6Zz9mSREqelQKvV6iTlhy2gX0Uo09m5QzxRwLoC7XZnGk47vwLott0qUoIFlI6Idc0lwpACWIoF57ZVFb6pgqknjNmQKuCTahiyiEtCAYYPHZAOc502IKVG8H2NRE9PT5NIW+IUBYithlHBVwFrOAk6IebIqcITAKGCuCQUYAvoec4jjr8L4I2ra1UKNNUw38g3iS8KnFeBRqNhJjuw+uqljTXTAUGcXQBxon3\/S\/gnJ8fwxIkC1hTgmtVX+n440h4AHTKNRGgdFlCsYFgmOT6PAswTrN\/vrq09CsfVAyB6JrRE\/0PcIFYwLJMcn0eBw8Pg11iJrU+j8RCUvW57e6\/sOf43tFSmsry8pBYXF3tvkDNRYAIF0PY7PDxSsH7Xr13eiD7aYwFxEVbQ1\/oujo+PT2RgGkKIi6UAll2BIbho248jPAMgLlA9\/QV5pkd8cJD+j1lz5sTPtwJoxnWWXn0RbftxyfoCiItuW79JZpM6JE1qDwYU80PiiwKjFDg5aahG4xRVb90tBTVqv2cGAkhVcU35QZcZZpRXsfaLRMJEgbACQdUbDOVR1XsXC0\/D18PHAwHETdfX1x5SI\/BDzBFjLw+BMCydHPdTAIyAFbOohdgZVPXys2Qhh7tOr\/gr6hVvuq6rLl5cVVqPfGx4pHK1kAoAuv19GKo2TWqox9fXL78yqqBDLSAeRq\/Y8fTrFGENESMBQ\/eomOX6TCnQAx8NuTjz+vVxBBjblJElrND4ICxhRSzhONLOzj1n4CvpV4e1+8LKjA0gHopCeOHCBeW6I41oOD05LpgCaPMdHBwE1S4s3wTwQYqJAMQDYQgd2tgDG1sKhFBm9hx3ODDWRyBNDB8UmxhAPNSB8HN0TNAhWVpalCk7CDNDDuN8x8fHpj+ADgfafONWu2GZYgHIETx5+vND6hLfwfnCwjxBuCTWkMUpqI\/2HhYXnJ52vsJLQy2u57yPzmqcIp8LQCT4ZGfvtlb+A9raqIwqGdZwYWEhTl7kmYwr0GP1aIaDVrfcv7F+5eF5sn1uAJE4quS2qx7QlPMtnAPElZUV2fQcYhTAYT0f5nVDa0SrNL32ZpwqNyqHFQA5UmMNff8ehmoQhl335+fnxSKyQDnzo+ARLDVMrXUWq1gpjVUAOUffPf35fUfpvzCIsIgBjAtiFVmkDPpo3+Fruc3mqVlIgHM4gsQsVJ7znIdx23qDipsIgJxY1CJyOGDEYPYc7c\/lOPBdviR+SgoALnyw2gkzXPj02Zigqn39peOpR7bB42ImCiAnsv3j3iaNGVFnRd\/E0A2Hh31YSYwnYlgHx\/D5A0jZBdd7s8338T2z4DNA0bJibA4O+zCzBeOt93DOkPEWadHn6bxK931NL6Ha+aZkn1vsBfW+SXvxDoyJOixl6rBskUAYQ3yZxpAqg6AcGIlcsKMAtuXDzmjYnEo7VWyXkZSlG5Th1AEclJHtn\/YqtHFShYAsA0pPeWXawn8d91PDt0KecbiOIR8+h0\/G8kxY+HoRj+nF1cmg1c+UTQd7PVJ4nYbHzHXaf\/6po5x6m7bEJa1q2JnURg\/2TNoxAv4PoGedQHqhulIAAAAASUVORK5CYII=\n name: Contextual Enrichment Using LLM\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751336942081-source-1750400198569-target\n selected: false\n source: '1751336942081'\n sourceHandle: source\n target: '1750400198569'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: llm\n targetType: tool\n id: 1758002850987-source-1751336942081-target\n source: '1758002850987'\n sourceHandle: source\n target: '1751336942081'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1756915693835-source-1758027159239-target\n source: '1756915693835'\n sourceHandle: source\n target: '1758027159239'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: llm\n id: 1758027159239-source-1758002850987-target\n source: '1758027159239'\n sourceHandle: source\n target: '1758002850987'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius\/jina\/jina\n index_chunk_variable_selector:\n - '1751336942081'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: true\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: jina-reranker-v1-base-en\n reranking_provider_name: langgenius\/jina\/jina\n score_threshold: 0\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n weights: null\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750400198569'\n position:\n x: 474.7618603027596\n y: 282\n positionAbsolute:\n x: 474.7618603027596\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 458\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 5 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Text Input, Online Drive, Online Doc, and Web Crawler. Different\n types of Data Sources have different input and output types. The output\n of File Upload and Online Drive are files, while the output of Online Doc\n and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 458\n id: '1751264451381'\n position:\n x: -893.2836123260277\n y: 378.2537898330178\n positionAbsolute:\n x: -893.2836123260277\n y: 378.2537898330178\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 260\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n \u2192 use extractor to extract document content \u2192 split and clean content into\n structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1182\n height: 260\n id: '1751266376760'\n position:\n x: -704.0614991386192\n y: -73.30453110517956\n positionAbsolute:\n x: -704.0614991386192\n y: -73.30453110517956\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1182\n - data:\n author: TenTen\n desc: ''\n height: 304\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n is an advanced open-source document extractor designed specifically to convert\n complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into\n high-quality, machine-readable formats like Markdown and JSON. MinerU addresses\n challenges in document parsing such as layout detection, formula recognition,\n and multi-language support, which are critical for generating high-quality\n training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 304\n id: '1751266402561'\n position:\n x: -555.2228329530462\n y: 592.0458661166498\n positionAbsolute:\n x: -555.2228329530462\n y: 592.0458661166498\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 554\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such\n as a paragraph, a section, or even an entire document\u2014that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 554\n id: '1751266447821'\n position:\n x: 153.2996965006646\n y: 378.2537898330178\n positionAbsolute:\n x: 153.2996965006646\n y: 378.2537898330178\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 411\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only\n support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 411\n id: '1751266580099'\n position:\n x: 482.3389174180554\n y: 437.9839361130071\n positionAbsolute:\n x: 482.3389174180554\n y: 437.9839361130071\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: Parent child chunks result\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: ''\n ja_JP: ''\n pt_BR: ''\n zh_Hans: ''\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conte\u00fado de Entrada\n zh_Hans: \u8f93\u5165\u6587\u672c\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em par\u00e1grafos com base no separador e no comprimento\n m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuper\u00e1-lo.\n zh_Hans: \u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: \u7236\u5757\u6a21\u5f0f\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - label:\n en_US: Paragraph\n ja_JP: Paragraph\n pt_BR: Par\u00e1grafo\n zh_Hans: \u6bb5\u843d\n value: paragraph\n - label:\n en_US: Full Document\n ja_JP: Full Document\n pt_BR: Documento Completo\n zh_Hans: \u5168\u6587\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: \u7236\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento m\u00e1ximo para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento M\u00e1ximo do Bloco Pai\n zh_Hans: \u6700\u5927\u7236\u5757\u957f\u5ea6\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento m\u00e1ximo para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento M\u00e1ximo de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espa\u00e7os extras no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espa\u00e7os consecutivos, novas linhas e guias\n zh_Hans: \u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: \u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_name: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1758002850987.text#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Parent_Length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - Parent_Mode\n remove_extra_spaces:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n remove_urls_emails:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n separator:\n type: mixed\n value: '{{#rag.shared.Parent_Delimiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Child_Length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.Child_Delimiter#}}'\n type: tool\n height: 52\n id: '1751336942081'\n position:\n x: 144.55897745117755\n y: 282\n positionAbsolute:\n x: 144.55897745117755\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 446\n selected: true\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"In\n this step, the LLM is responsible for enriching and reorganizing content,\n along with images and tables. The goal is to maintain the integrity of image\n URLs and tables while providing contextual descriptions and summaries to\n enhance understanding. The content should be structured into well-organized\n paragraphs, using double newlines to separate them. The LLM should enrich\n the document by adding relevant descriptions for images and extracting key\n insights from tables, ensuring the content remains easy to retrieve within\n a Retrieval-Augmented Generation (RAG) system. The final output should preserve\n the original structure, making it more accessible for knowledge retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 446\n id: '1753967810859'\n position:\n x: -176.67459682201036\n y: 405.2790698865377\n positionAbsolute:\n x: -176.67459682201036\n y: 405.2790698865377\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - pdf\n - doc\n - docx\n - pptx\n - ppt\n - jpg\n - png\n - jpeg\n plugin_id: langgenius\/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1756915693835'\n position:\n x: -893.2836123260277\n y: 282\n positionAbsolute:\n x: -893.2836123260277\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n context:\n enabled: false\n variable_selector: []\n model:\n completion_params:\n temperature: 0.7\n mode: chat\n name: claude-3-5-sonnet-20240620\n provider: langgenius\/anthropic\/anthropic\n prompt_template:\n - id: beb97761-d30d-4549-9b67-de1b8292e43d\n role: system\n text: \"You are an AI document assistant. \\nYour tasks are:\\nEnrich the content\\\n \\ contextually:\\nAdd meaningful descriptions for each image.\\nSummarize\\\n \\ key information from each table.\\nOutput the enriched content\u00a0with clear\\\n \\ annotations showing the\u00a0corresponding image and table positions, so\\\n \\ the text can later be aligned back into the original document. Preserve\\\n \\ any ![image] URLs from the input text.\\nYou will receive two inputs:\\n\\\n The file and text\u00a0(may contain images url and tables).\\nThe final output\\\n \\ should be a\u00a0single, enriched version of the original document with ![image]\\\n \\ url preserved.\\nGenerate output directly without saying words like:\\\n \\ Here's the enriched version of the original text with the image description\\\n \\ inserted.\"\n - id: f92ef0cd-03a7-48a7-80e8-bcdc965fb399\n role: user\n text: The file is {{#1756915693835.file#}} and the text are\u00a0{{#1758027159239.text#}}.\n selected: false\n title: LLM\n type: llm\n vision:\n configs:\n detail: high\n variable_selector:\n - '1756915693835'\n - file\n enabled: true\n height: 88\n id: '1758002850987'\n position:\n x: -176.67459682201036\n y: 282\n positionAbsolute:\n x: -176.67459682201036\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: \u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)\n pt_BR: The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n zh_Hans: \u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: The file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: (For local deployment v1 and v2) Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v1\u3068v2\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044\n pt_BR: (For local deployment v1 and v2) Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v1\u548cv2\u7248\u672c\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr\n label:\n en_US: parse method\n ja_JP: \u89e3\u6790\u65b9\u6cd5\n pt_BR: parse method\n zh_Hans: \u89e3\u6790\u65b9\u6cd5\n llm_description: (For local deployment v1 and v2) Parsing method, can be\n auto, ocr, or txt. Default is auto. If results are not satisfactory, try\n ocr\n max: null\n min: null\n name: parse_method\n options:\n - icon: ''\n label:\n en_US: auto\n ja_JP: auto\n pt_BR: auto\n zh_Hans: auto\n value: auto\n - icon: ''\n label:\n en_US: ocr\n ja_JP: ocr\n pt_BR: ocr\n zh_Hans: ocr\n value: ocr\n - icon: ''\n label:\n en_US: txt\n ja_JP: txt\n pt_BR: txt\n zh_Hans: txt\n value: txt\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API and local deployment v2) Whether to enable formula\n recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API and local deployment v2) Whether to enable formula\n recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n label:\n en_US: Enable formula recognition\n ja_JP: \u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable formula recognition\n zh_Hans: \u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n llm_description: (For official API and local deployment v2) Whether to enable\n formula recognition\n max: null\n min: null\n name: enable_formula\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API and local deployment v2) Whether to enable table\n recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API and local deployment v2) Whether to enable table\n recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b\n label:\n en_US: Enable table recognition\n ja_JP: \u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable table recognition\n zh_Hans: \u5f00\u542f\u8868\u683c\u8bc6\u522b\n llm_description: (For official API and local deployment v2) Whether to enable\n table recognition\n max: null\n min: null\n name: enable_table\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: '(For official API and local deployment v2) Specify document language,\n default ch, can be set to auto(local deployment need to specify the\n language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u3067\u306f\u8a00\u8a9e\u3092\u6307\u5b9a\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3059\uff09\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5\n pt_BR: '(For official API and local deployment v2) Specify document language,\n default ch, can be set to auto(local deployment need to specify the\n language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5'\n zh_Hans: \uff08\u4ec5\u9650\u5b98\u65b9api\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff08\u672c\u5730\u90e8\u7f72\u9700\u8981\u6307\u5b9a\u660e\u786e\u7684\u8bed\u8a00\uff0c\u9ed8\u8ba4ch\uff09\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5\n label:\n en_US: Document language\n ja_JP: \u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\n pt_BR: Document language\n zh_Hans: \u6587\u6863\u8bed\u8a00\n llm_description: '(For official API and local deployment v2) Specify document\n language, default ch, can be set to auto(local deployment need to specify\n the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5'\n max: null\n min: null\n name: language\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 0\n form: form\n human_description:\n en_US: (For official API) Whether to enable OCR recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable OCR recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b\n label:\n en_US: Enable OCR recognition\n ja_JP: OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable OCR recognition\n zh_Hans: \u5f00\u542fOCR\u8bc6\u522b\n llm_description: (For official API) Whether to enable OCR recognition\n max: null\n min: null\n name: enable_ocr\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: '[]'\n form: form\n human_description:\n en_US: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059\n pt_BR: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a\n label:\n en_US: Extra export formats\n ja_JP: \u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\n pt_BR: Extra export formats\n zh_Hans: \u989d\u5916\u5bfc\u51fa\u683c\u5f0f\n llm_description: '(For official API) Example: [\"docx\",\"html\"], markdown,\n json are the default export formats, no need to set, this parameter only\n supports one or more of docx, html, latex'\n max: null\n min: null\n name: extra_formats\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: pipeline\n form: form\n human_description:\n en_US: '(For local deployment v2) Example: pipeline, vlm-transformers,\n vlm-sglang-engine, vlm-sglang-client, default is pipeline'\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306fpipeline\n pt_BR: '(For local deployment v2) Example: pipeline, vlm-transformers,\n vlm-sglang-engine, vlm-sglang-client, default is pipeline'\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u793a\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\uff0c\u9ed8\u8ba4\u503c\u4e3apipeline\n label:\n en_US: Backend type\n ja_JP: \u30d0\u30c3\u30af\u30a8\u30f3\u30c9\u30bf\u30a4\u30d7\n pt_BR: Backend type\n zh_Hans: \u89e3\u6790\u540e\u7aef\n llm_description: '(For local deployment v2) Example: pipeline, vlm-transformers,\n vlm-sglang-engine, vlm-sglang-client, default is pipeline'\n max: null\n min: null\n name: backend\n options:\n - icon: ''\n label:\n en_US: pipeline\n ja_JP: pipeline\n pt_BR: pipeline\n zh_Hans: pipeline\n value: pipeline\n - icon: ''\n label:\n en_US: vlm-transformers\n ja_JP: vlm-transformers\n pt_BR: vlm-transformers\n zh_Hans: vlm-transformers\n value: vlm-transformers\n - icon: ''\n label:\n en_US: vlm-sglang-engine\n ja_JP: vlm-sglang-engine\n pt_BR: vlm-sglang-engine\n zh_Hans: vlm-sglang-engine\n value: vlm-sglang-engine\n - icon: ''\n label:\n en_US: vlm-sglang-client\n ja_JP: vlm-sglang-client\n pt_BR: vlm-sglang-client\n zh_Hans: vlm-sglang-client\n value: vlm-sglang-client\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: ''\n form: form\n human_description:\n en_US: '(For local deployment v2 when backend is vlm-sglang-client) Example:\n http:\/\/127.0.0.1:8000, default is empty'\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528 \u89e3\u6790\u5f8c\u7aef\u304cvlm-sglang-client\u306e\u5834\u5408\uff09\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306f\u7a7a\n pt_BR: '(For local deployment v2 when backend is vlm-sglang-client) Example:\n http:\/\/127.0.0.1:8000, default is empty'\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c \u89e3\u6790\u540e\u7aef\u4e3avlm-sglang-client\u65f6\uff09\u793a\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\uff0c\u9ed8\u8ba4\u503c\u4e3a\u7a7a\n label:\n en_US: sglang-server url\n ja_JP: sglang-server\u30a2\u30c9\u30ec\u30b9\n pt_BR: sglang-server url\n zh_Hans: sglang-server\u5730\u5740\n llm_description: '(For local deployment v2 when backend is vlm-sglang-client)\n Example: http:\/\/127.0.0.1:8000, default is empty'\n max: null\n min: null\n name: sglang_server_url\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n params:\n backend: ''\n enable_formula: ''\n enable_ocr: ''\n enable_table: ''\n extra_formats: ''\n file: ''\n language: ''\n parse_method: ''\n sglang_server_url: ''\n provider_id: langgenius\/mineru\/mineru\n provider_name: langgenius\/mineru\/mineru\n provider_type: builtin\n selected: false\n title: Parse File\n tool_configurations:\n backend:\n type: constant\n value: pipeline\n enable_formula:\n type: constant\n value: 1\n enable_ocr:\n type: constant\n value: true\n enable_table:\n type: constant\n value: 1\n extra_formats:\n type: mixed\n value: '[]'\n language:\n type: mixed\n value: auto\n parse_method:\n type: constant\n value: auto\n sglang_server_url:\n type: mixed\n value: ''\n tool_description: a tool for parsing text, tables, and images, supporting\n multiple formats such as pdf, pptx, docx, etc. supporting multiple languages\n such as English, Chinese, etc.\n tool_label: Parse File\n tool_name: parse-file\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756915693835'\n - file\n type: tool\n height: 270\n id: '1758027159239'\n position:\n x: -544.9739996945534\n y: 282\n positionAbsolute:\n x: -544.9739996945534\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 679.9701291615181\n y: -191.49392257836791\n zoom: 0.8239704766223018\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: Parent_Mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Parent_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Parent_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Child_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: ''\n type: number\n unit: tokens\n variable: Maximum_Child_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: ''\n type: checkbox\n unit: null\n variable: clean_2\n", + "graph": { + "edges": [ + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "knowledge-index" + }, + "id": "1751336942081-source-1750400198569-target", + "selected": false, + "source": "1751336942081", + "sourceHandle": "source", + "target": "1750400198569", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "llm", + "targetType": "tool" + }, + "id": "1758002850987-source-1751336942081-target", + "source": "1758002850987", + "sourceHandle": "source", + "target": "1751336942081", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInIteration": false, + "isInLoop": false, + "sourceType": "datasource", + "targetType": "tool" + }, + "id": "1756915693835-source-1758027159239-target", + "source": "1756915693835", + "sourceHandle": "source", + "target": "1758027159239", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "llm" + }, + "id": "1758027159239-source-1758002850987-target", + "source": "1758027159239", + "sourceHandle": "source", + "target": "1758002850987", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + } + ], + "nodes": [ + { + "data": { + "chunk_structure": "hierarchical_model", + "embedding_model": "jina-embeddings-v2-base-en", + "embedding_model_provider": "langgenius\/jina\/jina", + "index_chunk_variable_selector": [ + "1751336942081", + "result" + ], + "indexing_technique": "high_quality", + "keyword_number": 10, + "retrieval_model": { + "reranking_enable": true, + "reranking_mode": "reranking_model", + "reranking_model": { + "reranking_model_name": "jina-reranker-v1-base-en", + "reranking_provider_name": "langgenius\/jina\/jina" + }, + "score_threshold": 0, + "score_threshold_enabled": false, + "search_method": "hybrid_search", + "top_k": 3, + "weights": null + }, + "selected": false, + "title": "Knowledge Base", + "type": "knowledge-index" + }, + "height": 114, + "id": "1750400198569", + "position": { + "x": 474.7618603027596, + "y": 282 + }, + "positionAbsolute": { + "x": 474.7618603027596, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 458, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 5 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Text Input, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 458, + "id": "1751264451381", + "position": { + "x": -893.2836123260277, + "y": 378.2537898330178 + }, + "positionAbsolute": { + "x": -893.2836123260277, + "y": 378.2537898330178 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 260, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source \u2192 use extractor to extract document content \u2192 split and clean content into structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 1182 + }, + "height": 260, + "id": "1751266376760", + "position": { + "x": -704.0614991386192, + "y": -73.30453110517956 + }, + "positionAbsolute": { + "x": -704.0614991386192, + "y": -73.30453110517956 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 1182 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 304, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is an advanced open-source document extractor designed specifically to convert complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into high-quality, machine-readable formats like Markdown and JSON. MinerU addresses challenges in document parsing such as layout detection, formula recognition, and multi-language support, which are critical for generating high-quality training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 304, + "id": "1751266402561", + "position": { + "x": -555.2228329530462, + "y": 592.0458661166498 + }, + "positionAbsolute": { + "x": -555.2228329530462, + "y": 592.0458661166498 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 554, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" addresses the dilemma of context and precision by leveraging a two-tier hierarchical approach that effectively balances the trade-off between accurate matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Query Matching with Child Chunks: Small, focused pieces of information, often as concise as a single sentence within a paragraph, are used to match the user's query. These child chunks enable precise and relevant initial retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such as a paragraph, a section, or even an entire document\u2014that include the matched child chunks are then retrieved. These parent chunks provide comprehensive context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 554, + "id": "1751266447821", + "position": { + "x": 153.2996965006646, + "y": 378.2537898330178 + }, + "positionAbsolute": { + "x": 153.2996965006646, + "y": 378.2537898330178 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 411, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 411, + "id": "1751266580099", + "position": { + "x": 482.3389174180554, + "y": 437.9839361130071 + }, + "positionAbsolute": { + "x": 482.3389174180554, + "y": 437.9839361130071 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "is_team_authorization": true, + "output_schema": { + "properties": { + "result": { + "description": "Parent child chunks result", + "items": { + "type": "object" + }, + "type": "array" + } + }, + "type": "object" + }, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "", + "ja_JP": "", + "pt_BR": "", + "zh_Hans": "" + }, + "label": { + "en_US": "Input Content", + "ja_JP": "Input Content", + "pt_BR": "Conte\u00fado de Entrada", + "zh_Hans": "\u8f93\u5165\u6587\u672c" + }, + "llm_description": "The text you want to chunk.", + "max": null, + "min": null, + "name": "input_text", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": "paragraph", + "form": "llm", + "human_description": { + "en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", + "ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", + "pt_BR": "Dividir texto em par\u00e1grafos com base no separador e no comprimento m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuper\u00e1-lo.", + "zh_Hans": "\u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002" + }, + "label": { + "en_US": "Parent Mode", + "ja_JP": "Parent Mode", + "pt_BR": "Modo Pai", + "zh_Hans": "\u7236\u5757\u6a21\u5f0f" + }, + "llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", + "max": null, + "min": null, + "name": "parent_mode", + "options": [ + { + "label": { + "en_US": "Paragraph", + "ja_JP": "Paragraph", + "pt_BR": "Par\u00e1grafo", + "zh_Hans": "\u6bb5\u843d" + }, + "value": "paragraph" + }, + { + "label": { + "en_US": "Full Document", + "ja_JP": "Full Document", + "pt_BR": "Documento Completo", + "zh_Hans": "\u5168\u6587" + }, + "value": "full_doc" + } + ], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "select" + }, + { + "auto_generate": null, + "default": "\n\n", + "form": "llm", + "human_description": { + "en_US": "Separator used for chunking", + "ja_JP": "Separator used for chunking", + "pt_BR": "Separador usado para divis\u00e3o", + "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26" + }, + "label": { + "en_US": "Parent Delimiter", + "ja_JP": "Parent Delimiter", + "pt_BR": "Separador de Pai", + "zh_Hans": "\u7236\u5757\u5206\u9694\u7b26" + }, + "llm_description": "The separator used to split chunks", + "max": null, + "min": null, + "name": "separator", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": 1024, + "form": "llm", + "human_description": { + "en_US": "Maximum length for chunking", + "ja_JP": "Maximum length for chunking", + "pt_BR": "Comprimento m\u00e1ximo para divis\u00e3o", + "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6" + }, + "label": { + "en_US": "Maximum Parent Chunk Length", + "ja_JP": "Maximum Parent Chunk Length", + "pt_BR": "Comprimento M\u00e1ximo do Bloco Pai", + "zh_Hans": "\u6700\u5927\u7236\u5757\u957f\u5ea6" + }, + "llm_description": "Maximum length allowed per chunk", + "max": null, + "min": null, + "name": "max_length", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "number" + }, + { + "auto_generate": null, + "default": ". ", + "form": "llm", + "human_description": { + "en_US": "Separator used for subchunking", + "ja_JP": "Separator used for subchunking", + "pt_BR": "Separador usado para subdivis\u00e3o", + "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26" + }, + "label": { + "en_US": "Child Delimiter", + "ja_JP": "Child Delimiter", + "pt_BR": "Separador de Subdivis\u00e3o", + "zh_Hans": "\u5b50\u5206\u5757\u5206\u9694\u7b26" + }, + "llm_description": "The separator used to split subchunks", + "max": null, + "min": null, + "name": "subchunk_separator", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": 512, + "form": "llm", + "human_description": { + "en_US": "Maximum length for subchunking", + "ja_JP": "Maximum length for subchunking", + "pt_BR": "Comprimento m\u00e1ximo para subdivis\u00e3o", + "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6" + }, + "label": { + "en_US": "Maximum Child Chunk Length", + "ja_JP": "Maximum Child Chunk Length", + "pt_BR": "Comprimento M\u00e1ximo de Subdivis\u00e3o", + "zh_Hans": "\u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6" + }, + "llm_description": "Maximum length allowed per subchunk", + "max": null, + "min": null, + "name": "subchunk_max_length", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "number" + }, + { + "auto_generate": null, + "default": 0, + "form": "llm", + "human_description": { + "en_US": "Whether to remove consecutive spaces, newlines and tabs", + "ja_JP": "Whether to remove consecutive spaces, newlines and tabs", + "pt_BR": "Se deve remover espa\u00e7os extras no texto", + "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26" + }, + "label": { + "en_US": "Replace consecutive spaces, newlines and tabs", + "ja_JP": "Replace consecutive spaces, newlines and tabs", + "pt_BR": "Substituir espa\u00e7os consecutivos, novas linhas e guias", + "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26" + }, + "llm_description": "Whether to remove consecutive spaces, newlines and tabs", + "max": null, + "min": null, + "name": "remove_extra_spaces", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + }, + { + "auto_generate": null, + "default": 0, + "form": "llm", + "human_description": { + "en_US": "Whether to remove URLs and emails in the text", + "ja_JP": "Whether to remove URLs and emails in the text", + "pt_BR": "Se deve remover URLs e e-mails no texto", + "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740" + }, + "label": { + "en_US": "Delete all URLs and email addresses", + "ja_JP": "Delete all URLs and email addresses", + "pt_BR": "Remover todas as URLs e e-mails", + "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740" + }, + "llm_description": "Whether to remove URLs and emails in the text", + "max": null, + "min": null, + "name": "remove_urls_emails", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + } + ], + "params": { + "input_text": "", + "max_length": "", + "parent_mode": "", + "remove_extra_spaces": "", + "remove_urls_emails": "", + "separator": "", + "subchunk_max_length": "", + "subchunk_separator": "" + }, + "provider_id": "langgenius\/parentchild_chunker\/parentchild_chunker", + "provider_name": "langgenius\/parentchild_chunker\/parentchild_chunker", + "provider_type": "builtin", + "selected": false, + "title": "Parent-child Chunker", + "tool_configurations": {}, + "tool_description": "Process documents into parent-child chunk structures", + "tool_label": "Parent-child Chunker", + "tool_name": "parentchild_chunker", + "tool_node_version": "2", + "tool_parameters": { + "input_text": { + "type": "mixed", + "value": "{{#1758002850987.text#}}" + }, + "max_length": { + "type": "variable", + "value": [ + "rag", + "shared", + "Maximum_Parent_Length" + ] + }, + "parent_mode": { + "type": "variable", + "value": [ + "rag", + "shared", + "Parent_Mode" + ] + }, + "remove_extra_spaces": { + "type": "variable", + "value": [ + "rag", + "shared", + "clean_1" + ] + }, + "remove_urls_emails": { + "type": "variable", + "value": [ + "rag", + "shared", + "clean_2" + ] + }, + "separator": { + "type": "mixed", + "value": "{{#rag.shared.Parent_Delimiter#}}" + }, + "subchunk_max_length": { + "type": "variable", + "value": [ + "rag", + "shared", + "Maximum_Child_Length" + ] + }, + "subchunk_separator": { + "type": "mixed", + "value": "{{#rag.shared.Child_Delimiter#}}" + } + }, + "type": "tool" + }, + "height": 52, + "id": "1751336942081", + "position": { + "x": 144.55897745117755, + "y": 282 + }, + "positionAbsolute": { + "x": 144.55897745117755, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 446, + "selected": true, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"In this step, the LLM is responsible for enriching and reorganizing content, along with images and tables. The goal is to maintain the integrity of image URLs and tables while providing contextual descriptions and summaries to enhance understanding. The content should be structured into well-organized paragraphs, using double newlines to separate them. The LLM should enrich the document by adding relevant descriptions for images and extracting key insights from tables, ensuring the content remains easy to retrieve within a Retrieval-Augmented Generation (RAG) system. The final output should preserve the original structure, making it more accessible for knowledge retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 446, + "id": "1753967810859", + "position": { + "x": -176.67459682201036, + "y": 405.2790698865377 + }, + "positionAbsolute": { + "x": -176.67459682201036, + "y": 405.2790698865377 + }, + "selected": true, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "File", + "datasource_name": "upload-file", + "datasource_parameters": {}, + "fileExtensions": [ + "pdf", + "doc", + "docx", + "pptx", + "ppt", + "jpg", + "png", + "jpeg" + ], + "plugin_id": "langgenius\/file", + "provider_name": "file", + "provider_type": "local_file", + "selected": false, + "title": "File", + "type": "datasource" + }, + "height": 52, + "id": "1756915693835", + "position": { + "x": -893.2836123260277, + "y": 282 + }, + "positionAbsolute": { + "x": -893.2836123260277, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "context": { + "enabled": false, + "variable_selector": [] + }, + "model": { + "completion_params": { + "temperature": 0.7 + }, + "mode": "chat", + "name": "claude-3-5-sonnet-20240620", + "provider": "langgenius\/anthropic\/anthropic" + }, + "prompt_template": [ + { + "id": "beb97761-d30d-4549-9b67-de1b8292e43d", + "role": "system", + "text": "You are an AI document assistant. \nYour tasks are:\nEnrich the content contextually:\nAdd meaningful descriptions for each image.\nSummarize key information from each table.\nOutput the enriched content\u00a0with clear annotations showing the\u00a0corresponding image and table positions, so the text can later be aligned back into the original document. Preserve any ![image] URLs from the input text.\nYou will receive two inputs:\nThe file and text\u00a0(may contain images url and tables).\nThe final output should be a\u00a0single, enriched version of the original document with ![image] url preserved.\nGenerate output directly without saying words like: Here's the enriched version of the original text with the image description inserted." + }, + { + "id": "f92ef0cd-03a7-48a7-80e8-bcdc965fb399", + "role": "user", + "text": "The file is {{#1756915693835.file#}} and the text are\u00a0{{#1758027159239.text#}}." + } + ], + "selected": false, + "title": "LLM", + "type": "llm", + "vision": { + "configs": { + "detail": "high", + "variable_selector": [ + "1756915693835", + "file" + ] + }, + "enabled": true + } + }, + "height": 88, + "id": "1758002850987", + "position": { + "x": -176.67459682201036, + "y": 282 + }, + "positionAbsolute": { + "x": -176.67459682201036, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "is_team_authorization": true, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "ja_JP": "\u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)", + "pt_BR": "The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "zh_Hans": "\u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)" + }, + "label": { + "en_US": "file", + "ja_JP": "file", + "pt_BR": "file", + "zh_Hans": "file" + }, + "llm_description": "The file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "max": null, + "min": null, + "name": "file", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "file" + }, + { + "auto_generate": null, + "default": "auto", + "form": "form", + "human_description": { + "en_US": "(For local deployment v1 and v2) Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr", + "ja_JP": "\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v1\u3068v2\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044", + "pt_BR": "(For local deployment v1 and v2) Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr", + "zh_Hans": "\uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v1\u548cv2\u7248\u672c\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr" + }, + "label": { + "en_US": "parse method", + "ja_JP": "\u89e3\u6790\u65b9\u6cd5", + "pt_BR": "parse method", + "zh_Hans": "\u89e3\u6790\u65b9\u6cd5" + }, + "llm_description": "(For local deployment v1 and v2) Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr", + "max": null, + "min": null, + "name": "parse_method", + "options": [ + { + "icon": "", + "label": { + "en_US": "auto", + "ja_JP": "auto", + "pt_BR": "auto", + "zh_Hans": "auto" + }, + "value": "auto" + }, + { + "icon": "", + "label": { + "en_US": "ocr", + "ja_JP": "ocr", + "pt_BR": "ocr", + "zh_Hans": "ocr" + }, + "value": "ocr" + }, + { + "icon": "", + "label": { + "en_US": "txt", + "ja_JP": "txt", + "pt_BR": "txt", + "zh_Hans": "txt" + }, + "value": "txt" + } + ], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "select" + }, + { + "auto_generate": null, + "default": 1, + "form": "form", + "human_description": { + "en_US": "(For official API and local deployment v2) Whether to enable formula recognition", + "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b", + "pt_BR": "(For official API and local deployment v2) Whether to enable formula recognition", + "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b" + }, + "label": { + "en_US": "Enable formula recognition", + "ja_JP": "\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b", + "pt_BR": "Enable formula recognition", + "zh_Hans": "\u5f00\u542f\u516c\u5f0f\u8bc6\u522b" + }, + "llm_description": "(For official API and local deployment v2) Whether to enable formula recognition", + "max": null, + "min": null, + "name": "enable_formula", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + }, + { + "auto_generate": null, + "default": 1, + "form": "form", + "human_description": { + "en_US": "(For official API and local deployment v2) Whether to enable table recognition", + "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b", + "pt_BR": "(For official API and local deployment v2) Whether to enable table recognition", + "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b" + }, + "label": { + "en_US": "Enable table recognition", + "ja_JP": "\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b", + "pt_BR": "Enable table recognition", + "zh_Hans": "\u5f00\u542f\u8868\u683c\u8bc6\u522b" + }, + "llm_description": "(For official API and local deployment v2) Whether to enable table recognition", + "max": null, + "min": null, + "name": "enable_table", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + }, + { + "auto_generate": null, + "default": "auto", + "form": "form", + "human_description": { + "en_US": "(For official API and local deployment v2) Specify document language, default ch, can be set to auto(local deployment need to specify the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5", + "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u3067\u306f\u8a00\u8a9e\u3092\u6307\u5b9a\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3059\uff09\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5", + "pt_BR": "(For official API and local deployment v2) Specify document language, default ch, can be set to auto(local deployment need to specify the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5", + "zh_Hans": "\uff08\u4ec5\u9650\u5b98\u65b9api\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff08\u672c\u5730\u90e8\u7f72\u9700\u8981\u6307\u5b9a\u660e\u786e\u7684\u8bed\u8a00\uff0c\u9ed8\u8ba4ch\uff09\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5" + }, + "label": { + "en_US": "Document language", + "ja_JP": "\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e", + "pt_BR": "Document language", + "zh_Hans": "\u6587\u6863\u8bed\u8a00" + }, + "llm_description": "(For official API and local deployment v2) Specify document language, default ch, can be set to auto(local deployment need to specify the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5", + "max": null, + "min": null, + "name": "language", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": 0, + "form": "form", + "human_description": { + "en_US": "(For official API) Whether to enable OCR recognition", + "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b", + "pt_BR": "(For official API) Whether to enable OCR recognition", + "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b" + }, + "label": { + "en_US": "Enable OCR recognition", + "ja_JP": "OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b", + "pt_BR": "Enable OCR recognition", + "zh_Hans": "\u5f00\u542fOCR\u8bc6\u522b" + }, + "llm_description": "(For official API) Whether to enable OCR recognition", + "max": null, + "min": null, + "name": "enable_ocr", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + }, + { + "auto_generate": null, + "default": "[]", + "form": "form", + "human_description": { + "en_US": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex", + "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059", + "pt_BR": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex", + "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a" + }, + "label": { + "en_US": "Extra export formats", + "ja_JP": "\u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f", + "pt_BR": "Extra export formats", + "zh_Hans": "\u989d\u5916\u5bfc\u51fa\u683c\u5f0f" + }, + "llm_description": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex", + "max": null, + "min": null, + "name": "extra_formats", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": "pipeline", + "form": "form", + "human_description": { + "en_US": "(For local deployment v2) Example: pipeline, vlm-transformers, vlm-sglang-engine, vlm-sglang-client, default is pipeline", + "ja_JP": "\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306fpipeline", + "pt_BR": "(For local deployment v2) Example: pipeline, vlm-transformers, vlm-sglang-engine, vlm-sglang-client, default is pipeline", + "zh_Hans": "\uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u793a\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\uff0c\u9ed8\u8ba4\u503c\u4e3apipeline" + }, + "label": { + "en_US": "Backend type", + "ja_JP": "\u30d0\u30c3\u30af\u30a8\u30f3\u30c9\u30bf\u30a4\u30d7", + "pt_BR": "Backend type", + "zh_Hans": "\u89e3\u6790\u540e\u7aef" + }, + "llm_description": "(For local deployment v2) Example: pipeline, vlm-transformers, vlm-sglang-engine, vlm-sglang-client, default is pipeline", + "max": null, + "min": null, + "name": "backend", + "options": [ + { + "icon": "", + "label": { + "en_US": "pipeline", + "ja_JP": "pipeline", + "pt_BR": "pipeline", + "zh_Hans": "pipeline" + }, + "value": "pipeline" + }, + { + "icon": "", + "label": { + "en_US": "vlm-transformers", + "ja_JP": "vlm-transformers", + "pt_BR": "vlm-transformers", + "zh_Hans": "vlm-transformers" + }, + "value": "vlm-transformers" + }, + { + "icon": "", + "label": { + "en_US": "vlm-sglang-engine", + "ja_JP": "vlm-sglang-engine", + "pt_BR": "vlm-sglang-engine", + "zh_Hans": "vlm-sglang-engine" + }, + "value": "vlm-sglang-engine" + }, + { + "icon": "", + "label": { + "en_US": "vlm-sglang-client", + "ja_JP": "vlm-sglang-client", + "pt_BR": "vlm-sglang-client", + "zh_Hans": "vlm-sglang-client" + }, + "value": "vlm-sglang-client" + } + ], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "select" + }, + { + "auto_generate": null, + "default": "", + "form": "form", + "human_description": { + "en_US": "(For local deployment v2 when backend is vlm-sglang-client) Example: http:\/\/127.0.0.1:8000, default is empty", + "ja_JP": "\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528 \u89e3\u6790\u5f8c\u7aef\u304cvlm-sglang-client\u306e\u5834\u5408\uff09\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306f\u7a7a", + "pt_BR": "(For local deployment v2 when backend is vlm-sglang-client) Example: http:\/\/127.0.0.1:8000, default is empty", + "zh_Hans": "\uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c \u89e3\u6790\u540e\u7aef\u4e3avlm-sglang-client\u65f6\uff09\u793a\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\uff0c\u9ed8\u8ba4\u503c\u4e3a\u7a7a" + }, + "label": { + "en_US": "sglang-server url", + "ja_JP": "sglang-server\u30a2\u30c9\u30ec\u30b9", + "pt_BR": "sglang-server url", + "zh_Hans": "sglang-server\u5730\u5740" + }, + "llm_description": "(For local deployment v2 when backend is vlm-sglang-client) Example: http:\/\/127.0.0.1:8000, default is empty", + "max": null, + "min": null, + "name": "sglang_server_url", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + } + ], + "params": { + "backend": "", + "enable_formula": "", + "enable_ocr": "", + "enable_table": "", + "extra_formats": "", + "file": "", + "language": "", + "parse_method": "", + "sglang_server_url": "" + }, + "provider_id": "langgenius\/mineru\/mineru", + "provider_name": "langgenius\/mineru\/mineru", + "provider_type": "builtin", + "selected": false, + "title": "Parse File", + "tool_configurations": { + "backend": { + "type": "constant", + "value": "pipeline" + }, + "enable_formula": { + "type": "constant", + "value": 1 + }, + "enable_ocr": { + "type": "constant", + "value": true + }, + "enable_table": { + "type": "constant", + "value": 1 + }, + "extra_formats": { + "type": "mixed", + "value": "[]" + }, + "language": { + "type": "mixed", + "value": "auto" + }, + "parse_method": { + "type": "constant", + "value": "auto" + }, + "sglang_server_url": { + "type": "mixed", + "value": "" + } + }, + "tool_description": "a tool for parsing text, tables, and images, supporting multiple formats such as pdf, pptx, docx, etc. supporting multiple languages such as English, Chinese, etc.", + "tool_label": "Parse File", + "tool_name": "parse-file", + "tool_node_version": "2", + "tool_parameters": { + "file": { + "type": "variable", + "value": [ + "1756915693835", + "file" + ] + } + }, + "type": "tool" + }, + "height": 270, + "id": "1758027159239", + "position": { + "x": -544.9739996945534, + "y": 282 + }, + "positionAbsolute": { + "x": -544.9739996945534, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + } + ], + "viewport": { + "x": 679.9701291615181, + "y": -191.49392257836791, + "zoom": 0.8239704766223018 + } + }, + "icon_info": { + "icon": "e642577f-da15-4c03-81b9-c9dec9189a3c", + "icon_background": null, + "icon_type": "image", + "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAP9UlEQVR4Ae2dTXPbxhnHdwFRr5ZN2b1kJraouk57i\/IJrJx6jDPT9Fpnkrvj3DOOv0DsXDvJxLk2nUnSW09hPkGc6aWdOBEtpZNLE9Gy3iiSQJ\/\/gg8DQnyFFiAAPjtDLbAA9uWPn5595VKrjLjtn\/YqrZaq+L6quL5X9pQqO1qtI3u+0mXy8MFJxfihP1qrss\/XQ+FFPtRK1UmreriMJkz\/GqaVX8N1z1dPHdyvnZpP1+fmVG3jhTVzDden6SjP6brt7b1y21VbWnk3CawKAbWp9Fmo0s3VbKamffWYgKz5vv+t1s5jt62qGxtrPVAnrUwqAH63u7dF\/4E3qaBbVCB8zjjHcZRDJs91XaXJpOGDMDgSx5zj2HWDMByz4\/v5fBZ80lLhE3Y498jcsfO8Nt1DlYbvmXs9L\/DbbY\/uozqmjwOUSvvVtuN8+tKLa4\/73GI1KDEAYek8x7vta\/0a5XiLcw1Y5uZcAxpgK5VKXeD4HvHTUaDdbivA2Go1yW+rZrPVkzDBUSOk7\/\/u2m8e9VyweGIdQAPenLpD\/3LvcLsM0C0szBNs8wY+nIvLpgKA8PS0YWBkKwkQyUo8un517b7tXFsl4cnO\/25p33lA7YoKMloqzanFxSXj2864xJe8Ao3GaRdGpAYQbVtEKwCS1au0Xf8TyuMWMirgQYXiOFjFw8PDcLvxC7ek79roSZ8bwO3dvTue77+P6hZV69LSElm9heKoLyXpKgCLeHx8zCBSb9m7e972YWwATVvPVfeoL\/YOcjg\/X1IrKyvd3mo313JQKAXQLgSEgBGO3v\/DG9eu3I1byFgAosr1HP9zauttitWLK32+nzs5aRgQMfSDoRtnXr8ep0qeGMAOfF+ho4FxuosXV7vjdfmWVHI\/qQKwhvv7z02VTCDVnJJ+dVIIJwIwDB\/G8FZXLwh8k761gt0PCJ8\/PzDjiHEgHBvAKHywfDKeVzCaYhYH1TAsIQazJ4VwLAAFvphvZoYeiwvh2YnVPqJ1OhwVVLti+foIJEGmNgQbYISG5Creqf85Ga7yKGlGAvj9zh5mNjbR4UCbT6rdUZLO7nWwwf0CMNNyvXuj1BhaBdPU2m2lnE8Q8aVLF6XDMUpNuW4UQMfk2bN9swKHqua7N9avPBwkzUAATbvP9b\/BDMfy8rLMbgxSUML7KoBxwqOjI1yr07TdK4OGZwZWwTS3+wDwYRWLTK311VgChygAZjA7Rq7cbpp1An3v7gtgUPWqW2j3YW5XnCgQR4HQ1OzWk529W\/3i6AsgLakyjUfAx6uS+z0sYaLAMAXQd2ADRt9PedCvV3wGwO939+7xNBuqX3GiwHkUQFWM5XnUnKu0HM8sXAnHdwZA+grVbdwA8ylOFLChABYlw5FFvBO1gj0Aou0H6wdi8REnCthQIMRTmazg7XCcPQBy229+XhaUhkWS4\/MrELKC+JJa13UB3P5xb1Pafl1d5MCyArCC6JSQ28LXdDn6LoD09bzbCJSql6UR37YC3U6t521x3F0AtaNvIlCqX5ZGfNsK4Gu5cGQJDWs4NgCiZ0JLujYRIBYQKohLQgFsSMDVMPeGDYBtt72FBAW+JGSXOFkBwAcI4bA\/EHwDoO9rY\/0cJ7iIC+JEgSQUwHpB4\/ygHWgAJDJfRiD2aREnCiSpAANodkajhDoAqgoS7bfzFMLFiQK2FGAjR7WxMXqdKjjogDCdthKTeESBqAKdTgiCK\/jjUG8kOOjsxYdAcaJAUgoAQF5hhV1xndacVL9JiS3x9leArSC2ZHa03y7jNg7s\/4iEigL2FOChGGIPAOoKosY2uOJEgTQUYGNHw39lB7vRI1HszyxOFEhDAQaQ0io7fqc3EgpMIw+SxgwrwJ0QRzvr3XpXAJxhIqZYdKp59TrSl2m4Kb6FGUuajR3trLvWtYAzpoEUd4oKcIeXhgQvCYBTfBGStFJzm\/\/EWkDqiiw1qR6W1TC7r11JlIurX\/6caPy5iJx+uUkd7SOrFYfgM8MwNBKYi7xLJoulgFTBxXqfuSuNAJi7V1asDM99+8fLpvYtly91VykUq4jDSzPtNpntNme0PLbjH67meFexf2C9Hmx8QMOAwVQcj82MF4XcJQrEVyDEmpmKk9Uw8bWUJ2Mo0ANgjOflEVHAmgLSCbEmpUQURwEBMI5q8ow1BQRAa1JKRHEUyAWAPx7Rj+I1afpGXOEUyAWAn+2cqI9\/aBROfCkQLT\/Iugiwfp\/tNtRH3x+LFcz6y4qRv8wDCOu3a6pgX6xgjBec9UcyDSBbPxZRrCArURw\/0wCy9WO595tiBVmLoviZBTBq\/VhwsYKsRDH8zAIYtX4st1hBVqIYfiYBHGT9WHKxgqxE\/v1MAjjI+rHcYgVZifz7mfo5pACsE\/XRDycjlYUVhPvT1QV1dTmT\/0cjyyA30LfisiBCFzwz2Ezf0BvD4ZkP\/n2k\/kbjhH++tiggjqFZFm+ZKoBxwIuKiPaigBhVJT\/n+snOL8bkXL68llqubYA3KLMvUnU8iUVM+zsU0fQGlaPw4Yd1U8RULWCS4PELE4vISuTDT7X1DgCxC8OlUvLJ\/pqWfOE+yyimagFRPb77h2VTRaLz8PfdU1po0Laqz8WSVm\/9dlG9fX1J4VhcthVIFUCWIgkQ8wqe7e\/tRtuYtuPnd3he\/5dfglpwKgBy5m2AmFfwWINZ96cKIIsfBfFjGohGG26YE\/CGqZOfa5kAkOViENFy++A\/wUwHX4v6b1Eb793fL0WD5TxnCiTfHY0hCOAa1oF4cdlVb9AUnLj8K3AuAD\/baSh8bDvA9zb1ZAe5N67J\/O8gbfIWHrsKBnjvfnPQLS+gsOlgBbEoIdoWFOtnU+XpxxXLAkbhA4i2LeEgKyjWb\/rQ2MzBxABG4ePMJAFhtC0o1o\/VLo4\/EYCD4GM5bEMYtYJi\/Vjp4vhjAzgKPpbENoRsBcX6scLF8sfqhIwLH0sDCOFsdEzYCvq0lausfGaFi+OPBHBS+FgamxDCCj4bMTPC6YqfLwWGAhgXPpbAFoSwgviIK54CA9uA54WPpbLdJuR4xS+GAn0BtAUfSyQQshLiRxU4A6Bt+DhBgZCVED+sQA+AScHHCQqErIT4rEAXwKTh4wQFQlZCfChgesH\/+G9DvfdDenswA0I4G+OEJiL5k1sFHAPfvw5TL4BYwtQlz2SCzntTgI+VEAhZidn1u23AaUkgEE5L+WykO3UAIYNAmA0YppGLTAAoEE7j1WcjzcwAKBBmA4i0c5EpAAXCtF\/\/9NPLHIAC4fShSDMHmQRQIEwTgemmlVkABcLpgpFW6pkGUCBMC4PppZN5AAXC6cGRRsq5AFAgTAOF6aSRGwAFwukAknSquQJQIEwah\/Tjzx2AAmH6kCSZYi4BFAiTRCLduHMLoECYLihJpUYA6uAna+j3O\/LoZClX\/t4afium4+oEoJ9rAFEQgZDfZz78MIB65a9PtinbFbV0USkn1zWyFfWT\/l2N6O94WMl03iLx6QtwR\/vIdU2Iy9vLK1h+BcCCvdC8FUcAzNsbK0J+u50QXcfvBX9FZdpaXV1VpdLQ3dqKUHQpQwYUaDZb6vnz58hJVSxgBl7ILGcBAJphmFDXeJb1kLKnrIDj+f4zpOmjayxOFEhBAc8LfiNaKy3DMCnoLUlEFOj2QSjcoZ2Xa7jueWIBoYO45BXg2tbzvaeY+zBtQM\/rzs8lnwNJYaYVCPU36k5bd+aClQA401SkWHiubbV2ao7Wbg1pt1pBwzDFfEhSM6oAW0Bfq7oz1wragBw4o5pIsVNUoN0O+htzc7QYYWNjrYa0YRYFwhTfwgwnxVXwxgtrnWEYX6zgDPOQatG5qad99RgJB1NxOjhpNpupZkYSmz0FeBCaKuGnKH0AoO+bE6Zz9mSREqelQKvV6iTlhy2gX0Uo09m5QzxRwLoC7XZnGk47vwLott0qUoIFlI6Idc0lwpACWIoF57ZVFb6pgqknjNmQKuCTahiyiEtCAYYPHZAOc502IKVG8H2NRE9PT5NIW+IUBYithlHBVwFrOAk6IebIqcITAKGCuCQUYAvoec4jjr8L4I2ra1UKNNUw38g3iS8KnFeBRqNhJjuw+uqljTXTAUGcXQBxon3\/S\/gnJ8fwxIkC1hTgmtVX+n440h4AHTKNRGgdFlCsYFgmOT6PAswTrN\/vrq09CsfVAyB6JrRE\/0PcIFYwLJMcn0eBw8Pg11iJrU+j8RCUvW57e6\/sOf43tFSmsry8pBYXF3tvkDNRYAIF0PY7PDxSsH7Xr13eiD7aYwFxEVbQ1\/oujo+PT2RgGkKIi6UAll2BIbho248jPAMgLlA9\/QV5pkd8cJD+j1lz5sTPtwJoxnWWXn0RbftxyfoCiItuW79JZpM6JE1qDwYU80PiiwKjFDg5aahG4xRVb90tBTVqv2cGAkhVcU35QZcZZpRXsfaLRMJEgbACQdUbDOVR1XsXC0\/D18PHAwHETdfX1x5SI\/BDzBFjLw+BMCydHPdTAIyAFbOohdgZVPXys2Qhh7tOr\/gr6hVvuq6rLl5cVVqPfGx4pHK1kAoAuv19GKo2TWqox9fXL78yqqBDLSAeRq\/Y8fTrFGENESMBQ\/eomOX6TCnQAx8NuTjz+vVxBBjblJElrND4ICxhRSzhONLOzj1n4CvpV4e1+8LKjA0gHopCeOHCBeW6I41oOD05LpgCaPMdHBwE1S4s3wTwQYqJAMQDYQgd2tgDG1sKhFBm9hx3ODDWRyBNDB8UmxhAPNSB8HN0TNAhWVpalCk7CDNDDuN8x8fHpj+ADgfafONWu2GZYgHIETx5+vND6hLfwfnCwjxBuCTWkMUpqI\/2HhYXnJ52vsJLQy2u57yPzmqcIp8LQCT4ZGfvtlb+A9raqIwqGdZwYWEhTl7kmYwr0GP1aIaDVrfcv7F+5eF5sn1uAJE4quS2qx7QlPMtnAPElZUV2fQcYhTAYT0f5nVDa0SrNL32ZpwqNyqHFQA5UmMNff8ehmoQhl335+fnxSKyQDnzo+ARLDVMrXUWq1gpjVUAOUffPf35fUfpvzCIsIgBjAtiFVmkDPpo3+Fruc3mqVlIgHM4gsQsVJ7znIdx23qDipsIgJxY1CJyOGDEYPYc7c\/lOPBdviR+SgoALnyw2gkzXPj02Zigqn39peOpR7bB42ImCiAnsv3j3iaNGVFnRd\/E0A2Hh31YSYwnYlgHx\/D5A0jZBdd7s8338T2z4DNA0bJibA4O+zCzBeOt93DOkPEWadHn6bxK931NL6Ha+aZkn1vsBfW+SXvxDoyJOixl6rBskUAYQ3yZxpAqg6AcGIlcsKMAtuXDzmjYnEo7VWyXkZSlG5Th1AEclJHtn\/YqtHFShYAsA0pPeWXawn8d91PDt0KecbiOIR8+h0\/G8kxY+HoRj+nF1cmg1c+UTQd7PVJ4nYbHzHXaf\/6po5x6m7bEJa1q2JnURg\/2TNoxAv4PoGedQHqhulIAAAAASUVORK5CYII=" + }, + "id": "103825d3-7018-43ae-bcf0-f3c001f3eb69", + "name": "Contextual Enrichment Using LLM" +}, +{ + "chunk_structure": "hierarchical_model", + "description": "This Knowledge Pipeline extracts images and tables from complex PDF documents for downstream processing.", + "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/mineru:0.5.0@ca04f2dceb4107e3adf24839756954b7c5bcb7045d035dbab5821595541c093d\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 87426868-91d6-4774-a535-5fd4595a77b3\n icon_background: null\n icon_type: image\n icon_url: data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAARwElEQVR4Ae1dvXPcxhVfLMAP0RR1pL7MGVu8G7sXXdszotNYne1x6kgpktZSiiRNIrtMilgqnNZSb4\/lzm4i5i8w1TvDE+UZyZIlnihKOvIAbN5v7\/aIw93xPvBBHPDezBHYBbC7+O2Pb9++\/YAlMiIPHjwoO65btpQqK6VKVKySsqwV9fQpSliy6IcTubhYxrFTrJJqXe+Mz2+I8KgJoeh3IIRBTW1vt+MoXLWWlgRheo\/uqlmWVSVMa67jVJeXl6sHTx7dGb1HurK9uVnybHtNKXFBWAKEW1XCKvcrhb+tCdi+LBeX2ud80o3AaHipDUGkFErdJXJu2J63vliptAncnXr8MakQ8PH9+2tU9Av0omtCCZx3iZSSsLCE49j6iHPE+U+fCEnnCEOmTp\/uehbXzPWuizmNoFaC4CQdFxCE3V9\/bcd4vk8txpLwW\/f6FPZ9RT8c\/fZ9nSdESmGtK1veOvPGG3SerCRGQGg6V8rLxIwPg6QDUWzb1kTDcXrKaROu16v6T550RMuTJzvCHOhEYBS8PM8TIGmj4QrX9ejndiRG5Kj6lvj8zLlzNzsuxBiInYCaeI7zqeWrK8YuA+lmZqbF9PSUcIh0o2irUQCNEZeJTSoqXg0i4d7evial0ZIgopLWzdNvvvl53MDESsBfNrc+sqX6wth0juOIublZMUXHcSUqoOPmO6nPxYkXiFinn9GMIGLcGjEWApLWK7u2\/ZVpauMgniFAnICaNPN8TAIvaMXd3ZcHdqMlbjve1NXFSvSetIxaGU\/u3\/\/Uk\/aPIB+a1rm5Y+LEwnwkrRe1TPx8vAigBVssLYj51+Z0x5Dq+iNXNn58tLV1OWpOYxMQtt7jra0vqFd1HbYe7DsU8tjsTNQy8fMZRQB2PJQLjiQlS4mvwIEoxR2rCdZNrpTfUnd9FVrv2LHZxIiXRJMSBbCsP5sWXvX6nnj1qq5dPOQQ33D86Y\/HaZJH1oAgnyflHZAPfrrSieOJkS\/rlV3k8s1SS3eC6h4cABc82bizvfmgPComIxHQkA+9XPjwoI6bBRg1W74\/Dwig7sEBuNbIDCPFNDoJhyYgky8PlIn\/HUDChQgkHIqAvcg3ijM5\/tfmFLOEALgwLgmHIiANqX0bbHaZfFmq\/myUJUxCV+5\/S4qrNKh0AwnY7GY3OxwLx18baRhtUOZ8PV8IgITHiSOmY0KDE9cGveGhBHy0SY5GJa4gYe5wDIKSrwMB0zHBDCZw5+G9e1cOQ6YvAWH3kX2pnYzw8zVZfVhSfI0RaCIAroAzEJp6cu0w90xfApL6pEkFogSvN49uNIHlv8MjAD8hRsdISq7d+Krfkz0J2Gp6PwKT51pM7pcAxzMC\/RDQY8fNpnjtV5op1eu+ngSUUmnjEeTjprcXbBw3DALoO5imWJA516tX3EVAmt1yDS4XEK816DxMXnwPI9ATATTFmJ5H5lx5X8quDkkXAZXvX0ZK8\/NzPRPkSEZgVAQwKRlCq34+DWvBDgLC9oP2w\/yvKLOYdW78hxFoIQAuQQuSNNcJBZDpIKCx\/bjpDSDEp7EgYLQgjWR8GEywTcBHmz\/r9bls+wXh4fO4EIAWbDmn1x5v3l8z6bYJKKV3GZFTtEyShRFIAoHp5kxq4Ut\/zaTfJqAS8gIiufk10PAxbgRajmloQs01pK+n5KNn4kp7GxEnlwZOYMBtqUl4inlqGeckoywt5MfODbXajp7G7\/jeIrYB0RoQe7UAb+755oR1GX0NOKYlzZ6GGM5pAhIzVxFp074sLIxAkghg7x8I7VezhmPTBrSs8wiwBgQKLEkigLVEEIyM4Njs8iqLAtQNsdt9ElzLhGTJhskEIBNeCGxG9YLegaZpaaXXYlyzCcbqJhZGIEkEYAdCjAaUD2jiKSJ41gtQYEkaAd0RoYkuEOyKK2mMroyA3YrEOQsjkCQCRgs6dbcsaYtc7fizZFM1Jpkxp80IAAHTE7ZsVZbkgikjkptgoMCSBgJGAxL3SmiMmxqwZRymUQDOo9gIGAKCe9L0RgKRxUaH3z5xBExrS5xbaTv+9FSZxLPmDBiBTgSId9YKorLohO4sKofygoBRdp5Si20NmJeX4\/fIPgLG40JEPMEEzH595bqEtF7Ool4wLUWa0F7wr+\/\/JlMVdOrOfzrKY8p3\/C9\/FjMXL3ZcK2rADHrQHtPkiBa+dsOYdrmooCT93s\/\/8U+x9\/33SWczcelzE5xilYGEjY2NFHPMflZMwJTraOdvfxfuTz+lnGt2s3O8bb0URPheA+NxsZeU5\/N1Qqp2d8Wzq38SJ774l3DefrvzYgZDSazJ0V\/r3Hmu3xZTEHgoLuWKNyT0Hj5MOedsZBfo8OqhOCbgEdQLSLhDmrCIJOwg4BFgz1m2EAD5ikpCQwIHX9SGyJjWAydhM5jC5vFoSLhANqH9+uuZf8W4bHppNZd\/xN\/ryDyE2SugIWERm2MmYEb4aEgI27BIwgTMUG2DhDXqmBSJhEzADBEQRfHISV0kEjIBM0ZAQ0KMmBRBmIAZrWWMGWPsOO\/CBMxwDWP2TN5JyATMMAFRNJBw98t\/Z7yU4xePCTg+dqk9Wf\/6a\/Hy1q3U8kszIyZgmmhHyOvlzVu5JCETMAIp0n40jyRkAqbNooj55Y2ETMCIhDiKx0HCV19\/cxRZx54nEzB2SNNJ8MWXX+ZikRMTMB2+JJJLHnyE\/FmkRKhxkGh4nfDBFT4DAqwBmQdHigAT8Ejh58yZgMyBI0WAbcCY4Td7wcScbN\/kJt3GZA3Yt2r5QhoIMAHTQJnz6IsAE7AvNHwhDQSYgGmgzHn0RYAJ2BcavpAGAkzANFDmPPoiwATsCw1fSAOBifcDTrofLI1KznIerAGzXDsFKBsTsACVnOVXZAJmuXYKUDYmYAEqOcuvyATMcu0UoGxMwAJUcpZfkQmY5dopQNkmzg846nw7m77Fge9xzH7wgZhaPT+wSodN35qf1+kibef8eTHz3rsD0+51w7D59Xq2V9yk+UUnjoC9QD8sDhs+4odNfqZWV8U8fTQwjs3AsYsptlDTn96ivVt2iZDT770n5i79Lpb0D3unPF0rVBMMstT+8MdEPpUFQoLkSD8vi8bTIHqhCAhAQRR8KiupHemRPhaN53lLtTiJOfFN8CCbp7FxV9RJM+398EMbN5Bkl3YfxffaBkm\/9P2Hv2gSI2337t0uQmNLNeSD7wSPIv3yGyWNSbp34gk4CGx0PPCD3RfcY8\/Yb7ALxxH5+lmBn+nY7H3\/g04\/qFnRJDtvvSWO\/faTcbIoxDOFaYLnLl\/SnZBgrYI0ccnMxQ9Er68doTnmz7P2R7kwBAQE6KEGpUFNZ5wCLdubhPndYjcqfoUiYPj7vMHmMiqQ5nmQEK6eoKC5hz3I0o1AoQgI53EaArsybFvWY2zu03iHtPIoFAHRIw5KWCMGr0U9n363c2QEznCWbgQKRcB6wBUDKOTZs92IxBRjescmubjtTZPupB9z74YxFQQXDNwiQZm9eDEYjPU8PNznD2kDjjo2POl+w1wTEIa\/+9P\/tH9Oj9kGKAaCTI85gSCQTN\/TsL3JnZDeUE08AUfVGIAB5IC7hOXoESiUDQi4QT4MwYWbyLirIqzxwhox7vwmNb2J14CjAB\/ndKxB+aLpD8qwhJ90my74zsOc556Akmy9GXKJYK5euGc6DEDj3hMefkuyxz1uGbPw3MQTMKsao\/5N54dkZugfgKUbgcLZgN0QxB+DSQ7hYT5niOUA8Zck+yk6\/vZTXUpfedkv7QSUEMQLTvtCkWdoPcqwNmDWX9F\/8iSWIvq1Zzod1oCxwNlMBOTb6THbGlPBWHoj4FhC1JQQJaWUsCwKsYyFwCuy+fARwbD7Ze7Spdxov7GA6fEQuNaSmkOnNQowAQ0kQx4xJb9BEwwwHR\/T8sPEQzJoeln7dQPaQUB7cVGQ7hOytCCk5BY5DNc4Iy2GfMf\/+pdwchMXlidPxl9m3xfSniLWCTHxbpj40YmWIkY80OzyOpDhcGQCDofTwLtAvGOffKKJx8NuA+Fq38AEbEMx2glIBtfKFG3LgVEW5+239DjzaKkU826\/1QlRQtWsx1tbd8gIXFtYmBdTDvOxmJRI960brit2dmiNjCXWudeRLvacWwgBEBBuGKH8tm8mdAsHGYHkEJDkk9FjIgHfTHK5ccqMACHgeb7GgdwwVW6CmRLpI3AwEiIkWIgSeOQcZGEE0kCg3QtW6t6BDRhgZRqF4DyKi0DA3KtJy7eanRAmYHEZkfKb+8YGtKyqVI5VRf6uy\/MBU66HwmbXboI9qyZd160CiYBaLCww\/OLpIOC3+hvurFOVy5VKFdkikn2B6VRA0XMxBFxeXm66YSyhqgCFxuaKjg2\/f8IIuJ4x9dQGstKDv8qyaAM7UW40XDEzM51wEUZLPq41CKPlmp+7E5nPFwEe0wEhp989JKMd0Rb5YxA4YCdCLIxA\/AhgIgKEiKc1YHMkxLLWEelxTxgwsCSIgPG20PqjAwLanreOPKEBuSOSIPqcNLn7mhrQcE7bgIuVSo3mBa6TK2bN9T0xJbM7LzBrNk3WOJVlm9k0v9Td3QDngF2zCcaZUv\/FYX+\/gQMLIxA7Anv1fZ0m+Vo01xA4IKAv1xGxt9e8CecsjECcCLQ1oO\/fNOm2CXi68uY6pkhjRKR9o7mLj4xARASg2PRgB82+OlOp6A4IkmwTUKev1Hc4vnpZ10H+wwjEhUDdtKyW+DyYZgcBnaZqrEEDshYMwsTnURAAl9D7JduveubcuZvBtDoI2OyZqBu4gbVgECY+j4LA7u5L\/Ti5+G6F0+kgIC6SFrxOY8JVsLZe3wvfz2FGYCQEgrbf2crKZ+GHuwgILSh96ypufPmqzo7pMGIcHhoBLPMAh7SEbD+TSBcBceFU5dxt0yPefdFUn+YBPjICwyIAM05PvbLE7bDtZ9LoSUBcpGG539Ohtt9ocFNs0OLj0AjAfNvb1z7lmutN6Ra118N9CagnqvpKd5mhRnnVXC\/4OK4XAsGmV1ni6nJludrrPsT1JSAunq6sXKfJqjfgnMZeHkxCoMJyGALgCLgCzlCv90a\/ptekcSgBcZPt+59h8Bht+fPnL7hTYpDjYxcCIB040hzxUBtnKitXum4KRQwkIHrFru9\/DNeMR9O1nj0ndvM+MiEYOQjyPUMriSl95HD2\/OmPh0FlIAGRCOxBUq3vMwmHgbR493STb+r9w+y+IEJDERAP9CIh24RBKIt5Dg50ar7hyQfEhiYgbg6TkDsmQKW4YjocB83uaOQDciMREA8YEpqOybNnz9lPCGAKJvDzoe5Nh8PzRycfIBuZgHgIJDy9svKOcdG8ePlKYMCZm2Sgk28xPV3UOc7hanlB\/YNhbb4wOmMR0CRyamXlivKFHjGB1xtNMs+oNujk7witt13bERgdI6kJX12Fq6XSWt8xzhtHIiAyPFM5d5MWMr1DY8e3oY4xdoxC8nzCcaojm8+gLqFcjNbDPAHXn3oHAxVRS2xFTSD4\/KPNrctCqmuWsMqIx6772Gkhym4L4VVevCoOyPaXOPEC8TChwCgT+Peoxbt6FpNVYpJYCWjK9Hjz3mdKikuGiPgEmCbj7PTIn4KIE1BTvjwfo+AFmw5rw7EyEqYUwi1Bc3tjV\/jXozS3JrHgMRECmgzCGtHEg4y2Y2sySlsKx7bNpa5jFEC7EitAxLB46Q4EEWyf9gOCGwW7YuiNCQ5Ip7\/jQSz8bpeWasRNPFMViRLQZPJo8+dV2vjjsiXFBXorOu8WaEmbfvhkLEipj3SOD2oj3oh96hRtbN1ZbNyLX5HEECj8zo3Hj3UUrmMjSLl0sukqoXPEYWsMfY3s9Z5C9p3wsEZcruuVkj1vii8y9Vrb3NwsHRf2mpJqlVhzntAo9yMlXtN80d28slxcMqd87IHAKHhhWz7sjKY8bBZurT8X3npSmq5HUXVU6gTsV5AHmw\/KjnDLBEqJyFmm+0oEzop6+pQ6XQJhLdbiYonCJRPGkT43i3BHXPB6Ts9rhFUt\/G7+9nYVcWS94VrNWloSrd3PatgPnLCqusKpjuu3Q9pxyv8BVb3XBNS3Vn0AAAAASUVORK5CYII=\n name: Complex PDF with Images & Tables\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1750400203722-source-1751281136356-target\n selected: false\n source: '1750400203722'\n sourceHandle: source\n target: '1751281136356'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751338398711-source-1750400198569-target\n selected: false\n source: '1751338398711'\n sourceHandle: source\n target: '1750400198569'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: tool\n id: 1751281136356-source-1751338398711-target\n selected: false\n source: '1751281136356'\n sourceHandle: source\n target: '1751338398711'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius\/jina\/jina\n index_chunk_variable_selector:\n - '1751338398711'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: true\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: jina-reranker-v1-base-en\n reranking_provider_name: langgenius\/jina\/jina\n score_threshold: 0\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n weights: null\n selected: true\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750400198569'\n position:\n x: 355.92518399555183\n y: 282\n positionAbsolute:\n x: 355.92518399555183\n y: 282\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius\/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File Upload\n type: datasource\n height: 52\n id: '1750400203722'\n position:\n x: -579\n y: 282\n positionAbsolute:\n x: -579\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 337\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 358\n height: 337\n id: '1751264451381'\n position:\n x: -990.8091030156684\n y: 282\n positionAbsolute:\n x: -990.8091030156684\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 358\n - data:\n author: TenTen\n desc: ''\n height: 260\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n \u2192 use extractor to extract document content \u2192 split and clean content into\n structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1182\n height: 260\n id: '1751266376760'\n position:\n x: -579\n y: -22.64803881585007\n positionAbsolute:\n x: -579\n y: -22.64803881585007\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1182\n - data:\n author: TenTen\n desc: ''\n height: 541\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor for large language models (LLMs) like MinerU is a tool\n that preprocesses and converts diverse document types into structured, clean,\n and machine-readable data. This structured data can then be used to train\n or augment LLMs and retrieval-augmented generation (RAG) systems by providing\n them with accurate, well-organized content from varied sources. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n is an advanced open-source document extractor designed specifically to convert\n complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into\n high-quality, machine-readable formats like Markdown and JSON. MinerU addresses\n challenges in document parsing such as layout detection, formula recognition,\n and multi-language support, which are critical for generating high-quality\n training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 541\n id: '1751266402561'\n position:\n x: -263.7680017647218\n y: 558.328085421591\n positionAbsolute:\n x: -263.7680017647218\n y: 558.328085421591\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 554\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such\n as a paragraph, a section, or even an entire document\u2014that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 554\n id: '1751266447821'\n position:\n x: 42.95253988413964\n y: 366.1915342509804\n positionAbsolute:\n x: 42.95253988413964\n y: 366.1915342509804\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 411\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only\n support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 411\n id: '1751266580099'\n position:\n x: 355.92518399555183\n y: 434.6494699299023\n positionAbsolute:\n x: 355.92518399555183\n y: 434.6494699299023\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n credential_id: fd1cbc33-1481-47ee-9af2-954b53d350e0\n is_team_authorization: false\n output_schema:\n properties:\n full_zip_url:\n description: The zip URL of the complete parsed result\n type: string\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: \u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)\n pt_BR: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n zh_Hans: \u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: (For local deployment service)Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u30b5\u30fc\u30d3\u30b9\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044\n pt_BR: (For local deployment service)Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72\u670d\u52a1\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr\n label:\n en_US: parse method\n ja_JP: \u89e3\u6790\u65b9\u6cd5\n pt_BR: parse method\n zh_Hans: \u89e3\u6790\u65b9\u6cd5\n llm_description: Parsing method, can be auto, ocr, or txt. Default is auto.\n If results are not satisfactory, try ocr\n max: null\n min: null\n name: parse_method\n options:\n - label:\n en_US: auto\n ja_JP: auto\n pt_BR: auto\n zh_Hans: auto\n value: auto\n - label:\n en_US: ocr\n ja_JP: ocr\n pt_BR: ocr\n zh_Hans: ocr\n value: ocr\n - label:\n en_US: txt\n ja_JP: txt\n pt_BR: txt\n zh_Hans: txt\n value: txt\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API) Whether to enable formula recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable formula recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n label:\n en_US: Enable formula recognition\n ja_JP: \u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable formula recognition\n zh_Hans: \u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n llm_description: (For official API) Whether to enable formula recognition\n max: null\n min: null\n name: enable_formula\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API) Whether to enable table recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable table recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b\n label:\n en_US: Enable table recognition\n ja_JP: \u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable table recognition\n zh_Hans: \u5f00\u542f\u8868\u683c\u8bc6\u522b\n llm_description: (For official API) Whether to enable table recognition\n max: null\n min: null\n name: enable_table\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: doclayout_yolo\n form: form\n human_description:\n en_US: '(For official API) Optional values: doclayout_yolo, layoutlmv3,\n default value is doclayout_yolo. doclayout_yolo is a self-developed\n model with better effect'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\uff1adoclayout_yolo\u3001layoutlmv3\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u5024\u306f doclayout_yolo\u3002doclayout_yolo\n \u306f\u81ea\u5df1\u958b\u767a\u30e2\u30c7\u30eb\u3067\u3001\u52b9\u679c\u304c\u3088\u308a\u826f\u3044\n pt_BR: '(For official API) Optional values: doclayout_yolo, layoutlmv3,\n default value is doclayout_yolo. doclayout_yolo is a self-developed\n model with better effect'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u53ef\u9009\u503c\uff1adoclayout_yolo\u3001layoutlmv3\uff0c\u9ed8\u8ba4\u503c\u4e3a doclayout_yolo\u3002doclayout_yolo\n \u4e3a\u81ea\u7814\u6a21\u578b\uff0c\u6548\u679c\u66f4\u597d\n label:\n en_US: Layout model\n ja_JP: \u30ec\u30a4\u30a2\u30a6\u30c8\u691c\u51fa\u30e2\u30c7\u30eb\n pt_BR: Layout model\n zh_Hans: \u5e03\u5c40\u68c0\u6d4b\u6a21\u578b\n llm_description: '(For official API) Optional values: doclayout_yolo, layoutlmv3,\n default value is doclayout_yolo. doclayout_yolo is a self-developed model\n withbetter effect'\n max: null\n min: null\n name: layout_model\n options:\n - label:\n en_US: doclayout_yolo\n ja_JP: doclayout_yolo\n pt_BR: doclayout_yolo\n zh_Hans: doclayout_yolo\n value: doclayout_yolo\n - label:\n en_US: layoutlmv3\n ja_JP: layoutlmv3\n pt_BR: layoutlmv3\n zh_Hans: layoutlmv3\n value: layoutlmv3\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: '(For official API) Specify document language, default ch, can\n be set to auto, when auto, the model will automatically identify document\n language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5\n pt_BR: '(For official API) Specify document language, default ch, can\n be set to auto, when auto, the model will automatically identify document\n language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5\n label:\n en_US: Document language\n ja_JP: \u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\n pt_BR: Document language\n zh_Hans: \u6587\u6863\u8bed\u8a00\n llm_description: '(For official API) Specify document language, default\n ch, can be set to auto, when auto, the model will automatically identify\n document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5'\n max: null\n min: null\n name: language\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 0\n form: form\n human_description:\n en_US: (For official API) Whether to enable OCR recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable OCR recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b\n label:\n en_US: Enable OCR recognition\n ja_JP: OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable OCR recognition\n zh_Hans: \u5f00\u542fOCR\u8bc6\u522b\n llm_description: (For official API) Whether to enable OCR recognition\n max: null\n min: null\n name: enable_ocr\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: '[]'\n form: form\n human_description:\n en_US: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059\n pt_BR: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a\n label:\n en_US: Extra export formats\n ja_JP: \u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\n pt_BR: Extra export formats\n zh_Hans: \u989d\u5916\u5bfc\u51fa\u683c\u5f0f\n llm_description: '(For official API) Example: [\"docx\",\"html\"], markdown,\n json are the default export formats, no need to set, this parameter only\n supports one or more of docx, html, latex'\n max: null\n min: null\n name: extra_formats\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n params:\n enable_formula: ''\n enable_ocr: ''\n enable_table: ''\n extra_formats: ''\n file: ''\n language: ''\n layout_model: ''\n parse_method: ''\n provider_id: langgenius\/mineru\/mineru\n provider_name: langgenius\/mineru\/mineru\n provider_type: builtin\n selected: false\n title: MinerU\n tool_configurations:\n enable_formula:\n type: constant\n value: 1\n enable_ocr:\n type: constant\n value: 0\n enable_table:\n type: constant\n value: 1\n extra_formats:\n type: constant\n value: '[]'\n language:\n type: constant\n value: auto\n layout_model:\n type: constant\n value: doclayout_yolo\n parse_method:\n type: constant\n value: auto\n tool_description: a tool for parsing text, tables, and images, supporting\n multiple formats such as pdf, pptx, docx, etc. supporting multiple languages\n such as English, Chinese, etc.\n tool_label: Parse File\n tool_name: parse-file\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1750400203722'\n - file\n type: tool\n height: 244\n id: '1751281136356'\n position:\n x: -263.7680017647218\n y: 282\n positionAbsolute:\n x: -263.7680017647218\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: Parent child chunks result\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: ''\n ja_JP: ''\n pt_BR: ''\n zh_Hans: ''\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conte\u00fado de Entrada\n zh_Hans: \u8f93\u5165\u6587\u672c\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em par\u00e1grafos com base no separador e no comprimento\n m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuper\u00e1-lo.\n zh_Hans: \u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: \u7236\u5757\u6a21\u5f0f\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - label:\n en_US: Paragraph\n ja_JP: Paragraph\n pt_BR: Par\u00e1grafo\n zh_Hans: \u6bb5\u843d\n value: paragraph\n - label:\n en_US: Full Document\n ja_JP: Full Document\n pt_BR: Documento Completo\n zh_Hans: \u5168\u6587\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: \u7236\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento m\u00e1ximo para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento M\u00e1ximo do Bloco Pai\n zh_Hans: \u6700\u5927\u7236\u5757\u957f\u5ea6\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento m\u00e1ximo para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento M\u00e1ximo de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espa\u00e7os extras no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espa\u00e7os consecutivos, novas linhas e guias\n zh_Hans: \u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: \u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_name: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1751281136356.text#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Parent_Length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - Parent_Mode\n remove_extra_spaces:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n remove_urls_emails:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n separator:\n type: mixed\n value: '{{#rag.shared.Parent_Delimiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Child_Length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.Child_Delimiter#}}'\n type: tool\n height: 52\n id: '1751338398711'\n position:\n x: 42.95253988413964\n y: 282\n positionAbsolute:\n x: 42.95253988413964\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 628.3302331655243\n y: 120.08894361588159\n zoom: 0.7027501395646496\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: Parent_Mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Parent_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Parent_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Child_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Child_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_2\n", + "graph": { + "edges": [ + { + "data": { + "isInLoop": false, + "sourceType": "datasource", + "targetType": "tool" + }, + "id": "1750400203722-source-1751281136356-target", + "selected": false, + "source": "1750400203722", + "sourceHandle": "source", + "target": "1751281136356", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "knowledge-index" + }, + "id": "1751338398711-source-1750400198569-target", + "selected": false, + "source": "1751338398711", + "sourceHandle": "source", + "target": "1750400198569", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + }, + { + "data": { + "isInLoop": false, + "sourceType": "tool", + "targetType": "tool" + }, + "id": "1751281136356-source-1751338398711-target", + "selected": false, + "source": "1751281136356", + "sourceHandle": "source", + "target": "1751338398711", + "targetHandle": "target", + "type": "custom", + "zIndex": 0 + } + ], + "nodes": [ + { + "data": { + "chunk_structure": "hierarchical_model", + "embedding_model": "jina-embeddings-v2-base-en", + "embedding_model_provider": "langgenius\/jina\/jina", + "index_chunk_variable_selector": [ + "1751338398711", + "result" + ], + "indexing_technique": "high_quality", + "keyword_number": 10, + "retrieval_model": { + "reranking_enable": true, + "reranking_mode": "reranking_model", + "reranking_model": { + "reranking_model_name": "jina-reranker-v1-base-en", + "reranking_provider_name": "langgenius\/jina\/jina" + }, + "score_threshold": 0, + "score_threshold_enabled": false, + "search_method": "hybrid_search", + "top_k": 3, + "weights": null + }, + "selected": true, + "title": "Knowledge Base", + "type": "knowledge-index" + }, + "height": 114, + "id": "1750400198569", + "position": { + "x": 355.92518399555183, + "y": 282 + }, + "positionAbsolute": { + "x": 355.92518399555183, + "y": 282 + }, + "selected": true, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "datasource_configurations": {}, + "datasource_label": "File", + "datasource_name": "upload-file", + "datasource_parameters": {}, + "fileExtensions": [ + "txt", + "markdown", + "mdx", + "pdf", + "html", + "xlsx", + "xls", + "vtt", + "properties", + "doc", + "docx", + "csv", + "eml", + "msg", + "pptx", + "xml", + "epub", + "ppt", + "md" + ], + "plugin_id": "langgenius\/file", + "provider_name": "file", + "provider_type": "local_file", + "selected": false, + "title": "File Upload", + "type": "datasource" + }, + "height": 52, + "id": "1750400203722", + "position": { + "x": -579, + "y": 282 + }, + "positionAbsolute": { + "x": -579, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 337, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 358 + }, + "height": 337, + "id": "1751264451381", + "position": { + "x": -990.8091030156684, + "y": 282 + }, + "positionAbsolute": { + "x": -990.8091030156684, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 358 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 260, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source \u2192 use extractor to extract document content \u2192 split and clean content into structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 1182 + }, + "height": 260, + "id": "1751266376760", + "position": { + "x": -579, + "y": -22.64803881585007 + }, + "positionAbsolute": { + "x": -579, + "y": -22.64803881585007 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 1182 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 541, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor for large language models (LLMs) like MinerU is a tool that preprocesses and converts diverse document types into structured, clean, and machine-readable data. This structured data can then be used to train or augment LLMs and retrieval-augmented generation (RAG) systems by providing them with accurate, well-organized content from varied sources. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is an advanced open-source document extractor designed specifically to convert complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into high-quality, machine-readable formats like Markdown and JSON. MinerU addresses challenges in document parsing such as layout detection, formula recognition, and multi-language support, which are critical for generating high-quality training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 541, + "id": "1751266402561", + "position": { + "x": -263.7680017647218, + "y": 558.328085421591 + }, + "positionAbsolute": { + "x": -263.7680017647218, + "y": 558.328085421591 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 554, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" addresses the dilemma of context and precision by leveraging a two-tier hierarchical approach that effectively balances the trade-off between accurate matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Query Matching with Child Chunks: Small, focused pieces of information, often as concise as a single sentence within a paragraph, are used to match the user's query. These child chunks enable precise and relevant initial retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such as a paragraph, a section, or even an entire document\u2014that include the matched child chunks are then retrieved. These parent chunks provide comprehensive context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 554, + "id": "1751266447821", + "position": { + "x": 42.95253988413964, + "y": 366.1915342509804 + }, + "positionAbsolute": { + "x": 42.95253988413964, + "y": 366.1915342509804 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "author": "TenTen", + "desc": "", + "height": 411, + "selected": false, + "showAuthor": true, + "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}", + "theme": "blue", + "title": "", + "type": "", + "width": 240 + }, + "height": 411, + "id": "1751266580099", + "position": { + "x": 355.92518399555183, + "y": 434.6494699299023 + }, + "positionAbsolute": { + "x": 355.92518399555183, + "y": 434.6494699299023 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom-note", + "width": 240 + }, + { + "data": { + "credential_id": "fd1cbc33-1481-47ee-9af2-954b53d350e0", + "is_team_authorization": false, + "output_schema": { + "properties": { + "full_zip_url": { + "description": "The zip URL of the complete parsed result", + "type": "string" + }, + "images": { + "description": "The images extracted from the file", + "items": { + "type": "object" + }, + "type": "array" + } + }, + "type": "object" + }, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "ja_JP": "\u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)", + "pt_BR": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "zh_Hans": "\u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)" + }, + "label": { + "en_US": "file", + "ja_JP": "file", + "pt_BR": "file", + "zh_Hans": "file" + }, + "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", + "max": null, + "min": null, + "name": "file", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "file" + }, + { + "auto_generate": null, + "default": "auto", + "form": "form", + "human_description": { + "en_US": "(For local deployment service)Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr", + "ja_JP": "\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u30b5\u30fc\u30d3\u30b9\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044", + "pt_BR": "(For local deployment service)Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr", + "zh_Hans": "\uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72\u670d\u52a1\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr" + }, + "label": { + "en_US": "parse method", + "ja_JP": "\u89e3\u6790\u65b9\u6cd5", + "pt_BR": "parse method", + "zh_Hans": "\u89e3\u6790\u65b9\u6cd5" + }, + "llm_description": "Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr", + "max": null, + "min": null, + "name": "parse_method", + "options": [ + { + "label": { + "en_US": "auto", + "ja_JP": "auto", + "pt_BR": "auto", + "zh_Hans": "auto" + }, + "value": "auto" + }, + { + "label": { + "en_US": "ocr", + "ja_JP": "ocr", + "pt_BR": "ocr", + "zh_Hans": "ocr" + }, + "value": "ocr" + }, + { + "label": { + "en_US": "txt", + "ja_JP": "txt", + "pt_BR": "txt", + "zh_Hans": "txt" + }, + "value": "txt" + } + ], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "select" + }, + { + "auto_generate": null, + "default": 1, + "form": "form", + "human_description": { + "en_US": "(For official API) Whether to enable formula recognition", + "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b", + "pt_BR": "(For official API) Whether to enable formula recognition", + "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b" + }, + "label": { + "en_US": "Enable formula recognition", + "ja_JP": "\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b", + "pt_BR": "Enable formula recognition", + "zh_Hans": "\u5f00\u542f\u516c\u5f0f\u8bc6\u522b" + }, + "llm_description": "(For official API) Whether to enable formula recognition", + "max": null, + "min": null, + "name": "enable_formula", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + }, + { + "auto_generate": null, + "default": 1, + "form": "form", + "human_description": { + "en_US": "(For official API) Whether to enable table recognition", + "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b", + "pt_BR": "(For official API) Whether to enable table recognition", + "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b" + }, + "label": { + "en_US": "Enable table recognition", + "ja_JP": "\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b", + "pt_BR": "Enable table recognition", + "zh_Hans": "\u5f00\u542f\u8868\u683c\u8bc6\u522b" + }, + "llm_description": "(For official API) Whether to enable table recognition", + "max": null, + "min": null, + "name": "enable_table", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + }, + { + "auto_generate": null, + "default": "doclayout_yolo", + "form": "form", + "human_description": { + "en_US": "(For official API) Optional values: doclayout_yolo, layoutlmv3, default value is doclayout_yolo. doclayout_yolo is a self-developed model with better effect", + "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\uff1adoclayout_yolo\u3001layoutlmv3\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u5024\u306f doclayout_yolo\u3002doclayout_yolo \u306f\u81ea\u5df1\u958b\u767a\u30e2\u30c7\u30eb\u3067\u3001\u52b9\u679c\u304c\u3088\u308a\u826f\u3044", + "pt_BR": "(For official API) Optional values: doclayout_yolo, layoutlmv3, default value is doclayout_yolo. doclayout_yolo is a self-developed model with better effect", + "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u53ef\u9009\u503c\uff1adoclayout_yolo\u3001layoutlmv3\uff0c\u9ed8\u8ba4\u503c\u4e3a doclayout_yolo\u3002doclayout_yolo \u4e3a\u81ea\u7814\u6a21\u578b\uff0c\u6548\u679c\u66f4\u597d" + }, + "label": { + "en_US": "Layout model", + "ja_JP": "\u30ec\u30a4\u30a2\u30a6\u30c8\u691c\u51fa\u30e2\u30c7\u30eb", + "pt_BR": "Layout model", + "zh_Hans": "\u5e03\u5c40\u68c0\u6d4b\u6a21\u578b" + }, + "llm_description": "(For official API) Optional values: doclayout_yolo, layoutlmv3, default value is doclayout_yolo. doclayout_yolo is a self-developed model withbetter effect", + "max": null, + "min": null, + "name": "layout_model", + "options": [ + { + "label": { + "en_US": "doclayout_yolo", + "ja_JP": "doclayout_yolo", + "pt_BR": "doclayout_yolo", + "zh_Hans": "doclayout_yolo" + }, + "value": "doclayout_yolo" + }, + { + "label": { + "en_US": "layoutlmv3", + "ja_JP": "layoutlmv3", + "pt_BR": "layoutlmv3", + "zh_Hans": "layoutlmv3" + }, + "value": "layoutlmv3" + } + ], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "select" + }, + { + "auto_generate": null, + "default": "auto", + "form": "form", + "human_description": { + "en_US": "(For official API) Specify document language, default ch, can be set to auto, when auto, the model will automatically identify document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5", + "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5", + "pt_BR": "(For official API) Specify document language, default ch, can be set to auto, when auto, the model will automatically identify document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5", + "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5" + }, + "label": { + "en_US": "Document language", + "ja_JP": "\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e", + "pt_BR": "Document language", + "zh_Hans": "\u6587\u6863\u8bed\u8a00" + }, + "llm_description": "(For official API) Specify document language, default ch, can be set to auto, when auto, the model will automatically identify document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5", + "max": null, + "min": null, + "name": "language", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": 0, + "form": "form", + "human_description": { + "en_US": "(For official API) Whether to enable OCR recognition", + "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b", + "pt_BR": "(For official API) Whether to enable OCR recognition", + "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b" + }, + "label": { + "en_US": "Enable OCR recognition", + "ja_JP": "OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b", + "pt_BR": "Enable OCR recognition", + "zh_Hans": "\u5f00\u542fOCR\u8bc6\u522b" + }, + "llm_description": "(For official API) Whether to enable OCR recognition", + "max": null, + "min": null, + "name": "enable_ocr", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + }, + { + "auto_generate": null, + "default": "[]", + "form": "form", + "human_description": { + "en_US": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex", + "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059", + "pt_BR": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex", + "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a" + }, + "label": { + "en_US": "Extra export formats", + "ja_JP": "\u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f", + "pt_BR": "Extra export formats", + "zh_Hans": "\u989d\u5916\u5bfc\u51fa\u683c\u5f0f" + }, + "llm_description": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex", + "max": null, + "min": null, + "name": "extra_formats", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + } + ], + "params": { + "enable_formula": "", + "enable_ocr": "", + "enable_table": "", + "extra_formats": "", + "file": "", + "language": "", + "layout_model": "", + "parse_method": "" + }, + "provider_id": "langgenius\/mineru\/mineru", + "provider_name": "langgenius\/mineru\/mineru", + "provider_type": "builtin", + "selected": false, + "title": "MinerU", + "tool_configurations": { + "enable_formula": { + "type": "constant", + "value": 1 + }, + "enable_ocr": { + "type": "constant", + "value": 0 + }, + "enable_table": { + "type": "constant", + "value": 1 + }, + "extra_formats": { + "type": "constant", + "value": "[]" + }, + "language": { + "type": "constant", + "value": "auto" + }, + "layout_model": { + "type": "constant", + "value": "doclayout_yolo" + }, + "parse_method": { + "type": "constant", + "value": "auto" + } + }, + "tool_description": "a tool for parsing text, tables, and images, supporting multiple formats such as pdf, pptx, docx, etc. supporting multiple languages such as English, Chinese, etc.", + "tool_label": "Parse File", + "tool_name": "parse-file", + "tool_node_version": "2", + "tool_parameters": { + "file": { + "type": "variable", + "value": [ + "1750400203722", + "file" + ] + } + }, + "type": "tool" + }, + "height": 244, + "id": "1751281136356", + "position": { + "x": -263.7680017647218, + "y": 282 + }, + "positionAbsolute": { + "x": -263.7680017647218, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + }, + { + "data": { + "is_team_authorization": true, + "output_schema": { + "properties": { + "result": { + "description": "Parent child chunks result", + "items": { + "type": "object" + }, + "type": "array" + } + }, + "type": "object" + }, + "paramSchemas": [ + { + "auto_generate": null, + "default": null, + "form": "llm", + "human_description": { + "en_US": "", + "ja_JP": "", + "pt_BR": "", + "zh_Hans": "" + }, + "label": { + "en_US": "Input Content", + "ja_JP": "Input Content", + "pt_BR": "Conte\u00fado de Entrada", + "zh_Hans": "\u8f93\u5165\u6587\u672c" + }, + "llm_description": "The text you want to chunk.", + "max": null, + "min": null, + "name": "input_text", + "options": [], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": "paragraph", + "form": "llm", + "human_description": { + "en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", + "ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", + "pt_BR": "Dividir texto em par\u00e1grafos com base no separador e no comprimento m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuper\u00e1-lo.", + "zh_Hans": "\u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002" + }, + "label": { + "en_US": "Parent Mode", + "ja_JP": "Parent Mode", + "pt_BR": "Modo Pai", + "zh_Hans": "\u7236\u5757\u6a21\u5f0f" + }, + "llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", + "max": null, + "min": null, + "name": "parent_mode", + "options": [ + { + "label": { + "en_US": "Paragraph", + "ja_JP": "Paragraph", + "pt_BR": "Par\u00e1grafo", + "zh_Hans": "\u6bb5\u843d" + }, + "value": "paragraph" + }, + { + "label": { + "en_US": "Full Document", + "ja_JP": "Full Document", + "pt_BR": "Documento Completo", + "zh_Hans": "\u5168\u6587" + }, + "value": "full_doc" + } + ], + "placeholder": null, + "precision": null, + "required": true, + "scope": null, + "template": null, + "type": "select" + }, + { + "auto_generate": null, + "default": "\n\n", + "form": "llm", + "human_description": { + "en_US": "Separator used for chunking", + "ja_JP": "Separator used for chunking", + "pt_BR": "Separador usado para divis\u00e3o", + "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26" + }, + "label": { + "en_US": "Parent Delimiter", + "ja_JP": "Parent Delimiter", + "pt_BR": "Separador de Pai", + "zh_Hans": "\u7236\u5757\u5206\u9694\u7b26" + }, + "llm_description": "The separator used to split chunks", + "max": null, + "min": null, + "name": "separator", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": 1024, + "form": "llm", + "human_description": { + "en_US": "Maximum length for chunking", + "ja_JP": "Maximum length for chunking", + "pt_BR": "Comprimento m\u00e1ximo para divis\u00e3o", + "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6" + }, + "label": { + "en_US": "Maximum Parent Chunk Length", + "ja_JP": "Maximum Parent Chunk Length", + "pt_BR": "Comprimento M\u00e1ximo do Bloco Pai", + "zh_Hans": "\u6700\u5927\u7236\u5757\u957f\u5ea6" + }, + "llm_description": "Maximum length allowed per chunk", + "max": null, + "min": null, + "name": "max_length", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "number" + }, + { + "auto_generate": null, + "default": ". ", + "form": "llm", + "human_description": { + "en_US": "Separator used for subchunking", + "ja_JP": "Separator used for subchunking", + "pt_BR": "Separador usado para subdivis\u00e3o", + "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26" + }, + "label": { + "en_US": "Child Delimiter", + "ja_JP": "Child Delimiter", + "pt_BR": "Separador de Subdivis\u00e3o", + "zh_Hans": "\u5b50\u5206\u5757\u5206\u9694\u7b26" + }, + "llm_description": "The separator used to split subchunks", + "max": null, + "min": null, + "name": "subchunk_separator", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "string" + }, + { + "auto_generate": null, + "default": 512, + "form": "llm", + "human_description": { + "en_US": "Maximum length for subchunking", + "ja_JP": "Maximum length for subchunking", + "pt_BR": "Comprimento m\u00e1ximo para subdivis\u00e3o", + "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6" + }, + "label": { + "en_US": "Maximum Child Chunk Length", + "ja_JP": "Maximum Child Chunk Length", + "pt_BR": "Comprimento M\u00e1ximo de Subdivis\u00e3o", + "zh_Hans": "\u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6" + }, + "llm_description": "Maximum length allowed per subchunk", + "max": null, + "min": null, + "name": "subchunk_max_length", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "number" + }, + { + "auto_generate": null, + "default": 0, + "form": "llm", + "human_description": { + "en_US": "Whether to remove consecutive spaces, newlines and tabs", + "ja_JP": "Whether to remove consecutive spaces, newlines and tabs", + "pt_BR": "Se deve remover espa\u00e7os extras no texto", + "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26" + }, + "label": { + "en_US": "Replace consecutive spaces, newlines and tabs", + "ja_JP": "Replace consecutive spaces, newlines and tabs", + "pt_BR": "Substituir espa\u00e7os consecutivos, novas linhas e guias", + "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26" + }, + "llm_description": "Whether to remove consecutive spaces, newlines and tabs", + "max": null, + "min": null, + "name": "remove_extra_spaces", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + }, + { + "auto_generate": null, + "default": 0, + "form": "llm", + "human_description": { + "en_US": "Whether to remove URLs and emails in the text", + "ja_JP": "Whether to remove URLs and emails in the text", + "pt_BR": "Se deve remover URLs e e-mails no texto", + "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740" + }, + "label": { + "en_US": "Delete all URLs and email addresses", + "ja_JP": "Delete all URLs and email addresses", + "pt_BR": "Remover todas as URLs e e-mails", + "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740" + }, + "llm_description": "Whether to remove URLs and emails in the text", + "max": null, + "min": null, + "name": "remove_urls_emails", + "options": [], + "placeholder": null, + "precision": null, + "required": false, + "scope": null, + "template": null, + "type": "boolean" + } + ], + "params": { + "input_text": "", + "max_length": "", + "parent_mode": "", + "remove_extra_spaces": "", + "remove_urls_emails": "", + "separator": "", + "subchunk_max_length": "", + "subchunk_separator": "" + }, + "provider_id": "langgenius\/parentchild_chunker\/parentchild_chunker", + "provider_name": "langgenius\/parentchild_chunker\/parentchild_chunker", + "provider_type": "builtin", + "selected": false, + "title": "Parent-child Chunker", + "tool_configurations": {}, + "tool_description": "Process documents into parent-child chunk structures", + "tool_label": "Parent-child Chunker", + "tool_name": "parentchild_chunker", + "tool_node_version": "2", + "tool_parameters": { + "input_text": { + "type": "mixed", + "value": "{{#1751281136356.text#}}" + }, + "max_length": { + "type": "variable", + "value": [ + "rag", + "shared", + "Maximum_Parent_Length" + ] + }, + "parent_mode": { + "type": "variable", + "value": [ + "rag", + "shared", + "Parent_Mode" + ] + }, + "remove_extra_spaces": { + "type": "variable", + "value": [ + "rag", + "shared", + "clean_1" + ] + }, + "remove_urls_emails": { + "type": "variable", + "value": [ + "rag", + "shared", + "clean_2" + ] + }, + "separator": { + "type": "mixed", + "value": "{{#rag.shared.Parent_Delimiter#}}" + }, + "subchunk_max_length": { + "type": "variable", + "value": [ + "rag", + "shared", + "Maximum_Child_Length" + ] + }, + "subchunk_separator": { + "type": "mixed", + "value": "{{#rag.shared.Child_Delimiter#}}" + } + }, + "type": "tool" + }, + "height": 52, + "id": "1751338398711", + "position": { + "x": 42.95253988413964, + "y": 282 + }, + "positionAbsolute": { + "x": 42.95253988413964, + "y": 282 + }, + "selected": false, + "sourcePosition": "right", + "targetPosition": "left", + "type": "custom", + "width": 242 + } + ], + "viewport": { + "x": 628.3302331655243, + "y": 120.08894361588159, + "zoom": 0.7027501395646496 + } + }, + "icon_info": { + "icon": "87426868-91d6-4774-a535-5fd4595a77b3", + "icon_background": null, + "icon_type": "image", + "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAARwElEQVR4Ae1dvXPcxhVfLMAP0RR1pL7MGVu8G7sXXdszotNYne1x6kgpktZSiiRNIrtMilgqnNZSb4\/lzm4i5i8w1TvDE+UZyZIlnihKOvIAbN5v7\/aIw93xPvBBHPDezBHYBbC7+O2Pb9++\/YAlMiIPHjwoO65btpQqK6VKVKySsqwV9fQpSliy6IcTubhYxrFTrJJqXe+Mz2+I8KgJoeh3IIRBTW1vt+MoXLWWlgRheo\/uqlmWVSVMa67jVJeXl6sHTx7dGb1HurK9uVnybHtNKXFBWAKEW1XCKvcrhb+tCdi+LBeX2ud80o3AaHipDUGkFErdJXJu2J63vliptAncnXr8MakQ8PH9+2tU9Av0omtCCZx3iZSSsLCE49j6iHPE+U+fCEnnCEOmTp\/uehbXzPWuizmNoFaC4CQdFxCE3V9\/bcd4vk8txpLwW\/f6FPZ9RT8c\/fZ9nSdESmGtK1veOvPGG3SerCRGQGg6V8rLxIwPg6QDUWzb1kTDcXrKaROu16v6T550RMuTJzvCHOhEYBS8PM8TIGmj4QrX9ejndiRG5Kj6lvj8zLlzNzsuxBiInYCaeI7zqeWrK8YuA+lmZqbF9PSUcIh0o2irUQCNEZeJTSoqXg0i4d7evial0ZIgopLWzdNvvvl53MDESsBfNrc+sqX6wth0juOIublZMUXHcSUqoOPmO6nPxYkXiFinn9GMIGLcGjEWApLWK7u2\/ZVpauMgniFAnICaNPN8TAIvaMXd3ZcHdqMlbjve1NXFSvSetIxaGU\/u3\/\/Uk\/aPIB+a1rm5Y+LEwnwkrRe1TPx8vAigBVssLYj51+Z0x5Dq+iNXNn58tLV1OWpOYxMQtt7jra0vqFd1HbYe7DsU8tjsTNQy8fMZRQB2PJQLjiQlS4mvwIEoxR2rCdZNrpTfUnd9FVrv2LHZxIiXRJMSBbCsP5sWXvX6nnj1qq5dPOQQ33D86Y\/HaZJH1oAgnyflHZAPfrrSieOJkS\/rlV3k8s1SS3eC6h4cABc82bizvfmgPComIxHQkA+9XPjwoI6bBRg1W74\/Dwig7sEBuNbIDCPFNDoJhyYgky8PlIn\/HUDChQgkHIqAvcg3ijM5\/tfmFLOEALgwLgmHIiANqX0bbHaZfFmq\/myUJUxCV+5\/S4qrNKh0AwnY7GY3OxwLx18baRhtUOZ8PV8IgITHiSOmY0KDE9cGveGhBHy0SY5GJa4gYe5wDIKSrwMB0zHBDCZw5+G9e1cOQ6YvAWH3kX2pnYzw8zVZfVhSfI0RaCIAroAzEJp6cu0w90xfApL6pEkFogSvN49uNIHlv8MjAD8hRsdISq7d+Krfkz0J2Gp6PwKT51pM7pcAxzMC\/RDQY8fNpnjtV5op1eu+ngSUUmnjEeTjprcXbBw3DALoO5imWJA516tX3EVAmt1yDS4XEK816DxMXnwPI9ATATTFmJ5H5lx5X8quDkkXAZXvX0ZK8\/NzPRPkSEZgVAQwKRlCq34+DWvBDgLC9oP2w\/yvKLOYdW78hxFoIQAuQQuSNNcJBZDpIKCx\/bjpDSDEp7EgYLQgjWR8GEywTcBHmz\/r9bls+wXh4fO4EIAWbDmn1x5v3l8z6bYJKKV3GZFTtEyShRFIAoHp5kxq4Ut\/zaTfJqAS8gIiufk10PAxbgRajmloQs01pK+n5KNn4kp7GxEnlwZOYMBtqUl4inlqGeckoywt5MfODbXajp7G7\/jeIrYB0RoQe7UAb+755oR1GX0NOKYlzZ6GGM5pAhIzVxFp074sLIxAkghg7x8I7VezhmPTBrSs8wiwBgQKLEkigLVEEIyM4Njs8iqLAtQNsdt9ElzLhGTJhskEIBNeCGxG9YLegaZpaaXXYlyzCcbqJhZGIEkEYAdCjAaUD2jiKSJ41gtQYEkaAd0RoYkuEOyKK2mMroyA3YrEOQsjkCQCRgs6dbcsaYtc7fizZFM1Jpkxp80IAAHTE7ZsVZbkgikjkptgoMCSBgJGAxL3SmiMmxqwZRymUQDOo9gIGAKCe9L0RgKRxUaH3z5xBExrS5xbaTv+9FSZxLPmDBiBTgSId9YKorLohO4sKofygoBRdp5Si20NmJeX4\/fIPgLG40JEPMEEzH595bqEtF7Ool4wLUWa0F7wr+\/\/JlMVdOrOfzrKY8p3\/C9\/FjMXL3ZcK2rADHrQHtPkiBa+dsOYdrmooCT93s\/\/8U+x9\/33SWczcelzE5xilYGEjY2NFHPMflZMwJTraOdvfxfuTz+lnGt2s3O8bb0URPheA+NxsZeU5\/N1Qqp2d8Wzq38SJ774l3DefrvzYgZDSazJ0V\/r3Hmu3xZTEHgoLuWKNyT0Hj5MOedsZBfo8OqhOCbgEdQLSLhDmrCIJOwg4BFgz1m2EAD5ikpCQwIHX9SGyJjWAydhM5jC5vFoSLhANqH9+uuZf8W4bHppNZd\/xN\/ryDyE2SugIWERm2MmYEb4aEgI27BIwgTMUG2DhDXqmBSJhEzADBEQRfHISV0kEjIBM0ZAQ0KMmBRBmIAZrWWMGWPsOO\/CBMxwDWP2TN5JyATMMAFRNJBw98t\/Z7yU4xePCTg+dqk9Wf\/6a\/Hy1q3U8kszIyZgmmhHyOvlzVu5JCETMAIp0n40jyRkAqbNooj55Y2ETMCIhDiKx0HCV19\/cxRZx54nEzB2SNNJ8MWXX+ZikRMTMB2+JJJLHnyE\/FmkRKhxkGh4nfDBFT4DAqwBmQdHigAT8Ejh58yZgMyBI0WAbcCY4Td7wcScbN\/kJt3GZA3Yt2r5QhoIMAHTQJnz6IsAE7AvNHwhDQSYgGmgzHn0RYAJ2BcavpAGAkzANFDmPPoiwATsCw1fSAOBifcDTrofLI1KznIerAGzXDsFKBsTsACVnOVXZAJmuXYKUDYmYAEqOcuvyATMcu0UoGxMwAJUcpZfkQmY5dopQNkmzg846nw7m77Fge9xzH7wgZhaPT+wSodN35qf1+kibef8eTHz3rsD0+51w7D59Xq2V9yk+UUnjoC9QD8sDhs+4odNfqZWV8U8fTQwjs3AsYsptlDTn96ivVt2iZDT770n5i79Lpb0D3unPF0rVBMMstT+8MdEPpUFQoLkSD8vi8bTIHqhCAhAQRR8KiupHemRPhaN53lLtTiJOfFN8CCbp7FxV9RJM+398EMbN5Bkl3YfxffaBkm\/9P2Hv2gSI2337t0uQmNLNeSD7wSPIv3yGyWNSbp34gk4CGx0PPCD3RfcY8\/Yb7ALxxH5+lmBn+nY7H3\/g04\/qFnRJDtvvSWO\/faTcbIoxDOFaYLnLl\/SnZBgrYI0ccnMxQ9Er68doTnmz7P2R7kwBAQE6KEGpUFNZ5wCLdubhPndYjcqfoUiYPj7vMHmMiqQ5nmQEK6eoKC5hz3I0o1AoQgI53EaArsybFvWY2zu03iHtPIoFAHRIw5KWCMGr0U9n363c2QEznCWbgQKRcB6wBUDKOTZs92IxBRjescmubjtTZPupB9z74YxFQQXDNwiQZm9eDEYjPU8PNznD2kDjjo2POl+w1wTEIa\/+9P\/tH9Oj9kGKAaCTI85gSCQTN\/TsL3JnZDeUE08AUfVGIAB5IC7hOXoESiUDQi4QT4MwYWbyLirIqzxwhox7vwmNb2J14CjAB\/ndKxB+aLpD8qwhJ90my74zsOc556Akmy9GXKJYK5euGc6DEDj3hMefkuyxz1uGbPw3MQTMKsao\/5N54dkZugfgKUbgcLZgN0QxB+DSQ7hYT5niOUA8Zck+yk6\/vZTXUpfedkv7QSUEMQLTvtCkWdoPcqwNmDWX9F\/8iSWIvq1Zzod1oCxwNlMBOTb6THbGlPBWHoj4FhC1JQQJaWUsCwKsYyFwCuy+fARwbD7Ze7Spdxov7GA6fEQuNaSmkOnNQowAQ0kQx4xJb9BEwwwHR\/T8sPEQzJoeln7dQPaQUB7cVGQ7hOytCCk5BY5DNc4Iy2GfMf\/+pdwchMXlidPxl9m3xfSniLWCTHxbpj40YmWIkY80OzyOpDhcGQCDofTwLtAvGOffKKJx8NuA+Fq38AEbEMx2glIBtfKFG3LgVEW5+239DjzaKkU826\/1QlRQtWsx1tbd8gIXFtYmBdTDvOxmJRI960brit2dmiNjCXWudeRLvacWwgBEBBuGKH8tm8mdAsHGYHkEJDkk9FjIgHfTHK5ccqMACHgeb7GgdwwVW6CmRLpI3AwEiIkWIgSeOQcZGEE0kCg3QtW6t6BDRhgZRqF4DyKi0DA3KtJy7eanRAmYHEZkfKb+8YGtKyqVI5VRf6uy\/MBU66HwmbXboI9qyZd160CiYBaLCww\/OLpIOC3+hvurFOVy5VKFdkikn2B6VRA0XMxBFxeXm66YSyhqgCFxuaKjg2\/f8IIuJ4x9dQGstKDv8qyaAM7UW40XDEzM51wEUZLPq41CKPlmp+7E5nPFwEe0wEhp989JKMd0Rb5YxA4YCdCLIxA\/AhgIgKEiKc1YHMkxLLWEelxTxgwsCSIgPG20PqjAwLanreOPKEBuSOSIPqcNLn7mhrQcE7bgIuVSo3mBa6TK2bN9T0xJbM7LzBrNk3WOJVlm9k0v9Td3QDngF2zCcaZUv\/FYX+\/gQMLIxA7Anv1fZ0m+Vo01xA4IKAv1xGxt9e8CecsjECcCLQ1oO\/fNOm2CXi68uY6pkhjRKR9o7mLj4xARASg2PRgB82+OlOp6A4IkmwTUKev1Hc4vnpZ10H+wwjEhUDdtKyW+DyYZgcBnaZqrEEDshYMwsTnURAAl9D7JduveubcuZvBtDoI2OyZqBu4gbVgECY+j4LA7u5L\/Ti5+G6F0+kgIC6SFrxOY8JVsLZe3wvfz2FGYCQEgrbf2crKZ+GHuwgILSh96ypufPmqzo7pMGIcHhoBLPMAh7SEbD+TSBcBceFU5dxt0yPefdFUn+YBPjICwyIAM05PvbLE7bDtZ9LoSUBcpGG539Ohtt9ocFNs0OLj0AjAfNvb1z7lmutN6Ra118N9CagnqvpKd5mhRnnVXC\/4OK4XAsGmV1ni6nJludrrPsT1JSAunq6sXKfJqjfgnMZeHkxCoMJyGALgCLgCzlCv90a\/ptekcSgBcZPt+59h8Bht+fPnL7hTYpDjYxcCIB040hzxUBtnKitXum4KRQwkIHrFru9\/DNeMR9O1nj0ndvM+MiEYOQjyPUMriSl95HD2\/OmPh0FlIAGRCOxBUq3vMwmHgbR493STb+r9w+y+IEJDERAP9CIh24RBKIt5Dg50ar7hyQfEhiYgbg6TkDsmQKW4YjocB83uaOQDciMREA8YEpqOybNnz9lPCGAKJvDzoe5Nh8PzRycfIBuZgHgIJDy9svKOcdG8ePlKYMCZm2Sgk28xPV3UOc7hanlB\/YNhbb4wOmMR0CRyamXlivKFHjGB1xtNMs+oNujk7witt13bERgdI6kJX12Fq6XSWt8xzhtHIiAyPFM5d5MWMr1DY8e3oY4xdoxC8nzCcaojm8+gLqFcjNbDPAHXn3oHAxVRS2xFTSD4\/KPNrctCqmuWsMqIx6772Gkhym4L4VVevCoOyPaXOPEC8TChwCgT+Peoxbt6FpNVYpJYCWjK9Hjz3mdKikuGiPgEmCbj7PTIn4KIE1BTvjwfo+AFmw5rw7EyEqYUwi1Bc3tjV\/jXozS3JrHgMRECmgzCGtHEg4y2Y2sySlsKx7bNpa5jFEC7EitAxLB46Q4EEWyf9gOCGwW7YuiNCQ5Ip7\/jQSz8bpeWasRNPFMViRLQZPJo8+dV2vjjsiXFBXorOu8WaEmbfvhkLEipj3SOD2oj3oh96hRtbN1ZbNyLX5HEECj8zo3Hj3UUrmMjSLl0sukqoXPEYWsMfY3s9Z5C9p3wsEZcruuVkj1vii8y9Vrb3NwsHRf2mpJqlVhzntAo9yMlXtN80d28slxcMqd87IHAKHhhWz7sjKY8bBZurT8X3npSmq5HUXVU6gTsV5AHmw\/KjnDLBEqJyFmm+0oEzop6+pQ6XQJhLdbiYonCJRPGkT43i3BHXPB6Ts9rhFUt\/G7+9nYVcWS94VrNWloSrd3PatgPnLCqusKpjuu3Q9pxyv8BVb3XBNS3Vn0AAAAASUVORK5CYII=" + }, + "id": "629cb5b8-490a-48bc-808b-ffc13085cb4f", + "name": "Complex PDF with Images & Tables" +} + } +} \ No newline at end of file diff --git a/api/controllers/common/errors.py b/api/controllers/common/errors.py index 6e2ea952fc..252cf3549a 100644 --- a/api/controllers/common/errors.py +++ b/api/controllers/common/errors.py @@ -25,6 +25,12 @@ class UnsupportedFileTypeError(BaseHTTPException): code = 415 +class BlockedFileExtensionError(BaseHTTPException): + error_code = "file_extension_blocked" + description = "The file extension is blocked for security reasons." + code = 400 + + class TooManyFilesError(BaseHTTPException): error_code = "too_many_files" description = "Only one file is allowed." diff --git a/api/controllers/console/apikey.py b/api/controllers/console/apikey.py index 4f04af7932..bd5862cbd0 100644 --- a/api/controllers/console/apikey.py +++ b/api/controllers/console/apikey.py @@ -104,14 +104,11 @@ class BaseApiKeyResource(Resource): resource_model: type | None = None resource_id_field: str | None = None - def delete(self, resource_id, api_key_id): + def delete(self, resource_id: str, api_key_id: str): assert self.resource_id_field is not None, "resource_id_field must be set" - resource_id = str(resource_id) - api_key_id = str(api_key_id) current_user, current_tenant_id = current_account_with_tenant() _get_resource(resource_id, current_tenant_id, self.resource_model) - # The role of the current user in the ta table must be admin or owner if not current_user.is_admin_or_owner: raise Forbidden() diff --git a/api/controllers/console/app/advanced_prompt_template.py b/api/controllers/console/app/advanced_prompt_template.py index 5885d7b447..075345d860 100644 --- a/api/controllers/console/app/advanced_prompt_template.py +++ b/api/controllers/console/app/advanced_prompt_template.py @@ -5,18 +5,20 @@ from controllers.console.wraps import account_initialization_required, setup_req from libs.login import login_required from services.advanced_prompt_template_service import AdvancedPromptTemplateService +parser = ( + reqparse.RequestParser() + .add_argument("app_mode", type=str, required=True, location="args", help="Application mode") + .add_argument("model_mode", type=str, required=True, location="args", help="Model mode") + .add_argument("has_context", type=str, required=False, default="true", location="args", help="Whether has context") + .add_argument("model_name", type=str, required=True, location="args", help="Model name") +) + @console_ns.route("/app/prompt-templates") class AdvancedPromptTemplateList(Resource): @api.doc("get_advanced_prompt_templates") @api.doc(description="Get advanced prompt templates based on app mode and model configuration") - @api.expect( - api.parser() - .add_argument("app_mode", type=str, required=True, location="args", help="Application mode") - .add_argument("model_mode", type=str, required=True, location="args", help="Model mode") - .add_argument("has_context", type=str, default="true", location="args", help="Whether has context") - .add_argument("model_name", type=str, required=True, location="args", help="Model name") - ) + @api.expect(parser) @api.response( 200, "Prompt templates retrieved successfully", fields.List(fields.Raw(description="Prompt template data")) ) @@ -25,13 +27,6 @@ class AdvancedPromptTemplateList(Resource): @login_required @account_initialization_required def get(self): - parser = ( - reqparse.RequestParser() - .add_argument("app_mode", type=str, required=True, location="args") - .add_argument("model_mode", type=str, required=True, location="args") - .add_argument("has_context", type=str, required=False, default="true", location="args") - .add_argument("model_name", type=str, required=True, location="args") - ) args = parser.parse_args() return AdvancedPromptTemplateService.get_prompt(args) diff --git a/api/controllers/console/app/agent.py b/api/controllers/console/app/agent.py index 717263a74d..fde28fdb98 100644 --- a/api/controllers/console/app/agent.py +++ b/api/controllers/console/app/agent.py @@ -8,17 +8,19 @@ from libs.login import login_required from models.model import AppMode from services.agent_service import AgentService +parser = ( + reqparse.RequestParser() + .add_argument("message_id", type=uuid_value, required=True, location="args", help="Message UUID") + .add_argument("conversation_id", type=uuid_value, required=True, location="args", help="Conversation UUID") +) + @console_ns.route("/apps//agent/logs") class AgentLogApi(Resource): @api.doc("get_agent_logs") @api.doc(description="Get agent execution logs for an application") @api.doc(params={"app_id": "Application ID"}) - @api.expect( - api.parser() - .add_argument("message_id", type=str, required=True, location="args", help="Message UUID") - .add_argument("conversation_id", type=str, required=True, location="args", help="Conversation UUID") - ) + @api.expect(parser) @api.response(200, "Agent logs retrieved successfully", fields.List(fields.Raw(description="Agent log entries"))) @api.response(400, "Invalid request parameters") @setup_required @@ -27,12 +29,6 @@ class AgentLogApi(Resource): @get_app_model(mode=[AppMode.AGENT_CHAT]) def get(self, app_model): """Get agent logs""" - parser = ( - reqparse.RequestParser() - .add_argument("message_id", type=uuid_value, required=True, location="args") - .add_argument("conversation_id", type=uuid_value, required=True, location="args") - ) - args = parser.parse_args() return AgentService.get_agent_logs(app_model, args["conversation_id"], args["message_id"]) diff --git a/api/controllers/console/app/annotation.py b/api/controllers/console/app/annotation.py index 932214058a..bc4113b5c7 100644 --- a/api/controllers/console/app/annotation.py +++ b/api/controllers/console/app/annotation.py @@ -16,6 +16,7 @@ from fields.annotation_fields import ( annotation_fields, annotation_hit_history_fields, ) +from libs.helper import uuid_value from libs.login import login_required from services.annotation_service import AppAnnotationService @@ -175,8 +176,10 @@ class AnnotationApi(Resource): api.model( "CreateAnnotationRequest", { - "question": fields.String(required=True, description="Question text"), - "answer": fields.String(required=True, description="Answer text"), + "message_id": fields.String(description="Message ID (optional)"), + "question": fields.String(description="Question text (required when message_id not provided)"), + "answer": fields.String(description="Answer text (use 'answer' or 'content')"), + "content": fields.String(description="Content text (use 'answer' or 'content')"), "annotation_reply": fields.Raw(description="Annotation reply data"), }, ) @@ -193,11 +196,14 @@ class AnnotationApi(Resource): app_id = str(app_id) parser = ( reqparse.RequestParser() - .add_argument("question", required=True, type=str, location="json") - .add_argument("answer", required=True, type=str, location="json") + .add_argument("message_id", required=False, type=uuid_value, location="json") + .add_argument("question", required=False, type=str, location="json") + .add_argument("answer", required=False, type=str, location="json") + .add_argument("content", required=False, type=str, location="json") + .add_argument("annotation_reply", required=False, type=dict, location="json") ) args = parser.parse_args() - annotation = AppAnnotationService.insert_app_annotation_directly(args, app_id) + annotation = AppAnnotationService.up_insert_app_annotation_from_message(args, app_id) return annotation @setup_required @@ -245,6 +251,13 @@ class AnnotationExportApi(Resource): return response, 200 +parser = ( + reqparse.RequestParser() + .add_argument("question", required=True, type=str, location="json") + .add_argument("answer", required=True, type=str, location="json") +) + + @console_ns.route("/apps//annotations/") class AnnotationUpdateDeleteApi(Resource): @api.doc("update_delete_annotation") @@ -253,6 +266,7 @@ class AnnotationUpdateDeleteApi(Resource): @api.response(200, "Annotation updated successfully", annotation_fields) @api.response(204, "Annotation deleted successfully") @api.response(403, "Insufficient permissions") + @api.expect(parser) @setup_required @login_required @account_initialization_required @@ -262,11 +276,6 @@ class AnnotationUpdateDeleteApi(Resource): def post(self, app_id, annotation_id): app_id = str(app_id) annotation_id = str(annotation_id) - parser = ( - reqparse.RequestParser() - .add_argument("question", required=True, type=str, location="json") - .add_argument("answer", required=True, type=str, location="json") - ) args = parser.parse_args() annotation = AppAnnotationService.update_app_annotation_directly(args, app_id, annotation_id) return annotation diff --git a/api/controllers/console/app/app.py b/api/controllers/console/app/app.py index 17505d69b2..a487512961 100644 --- a/api/controllers/console/app/app.py +++ b/api/controllers/console/app/app.py @@ -3,7 +3,7 @@ import uuid from flask_restx import Resource, fields, inputs, marshal, marshal_with, reqparse from sqlalchemy import select from sqlalchemy.orm import Session -from werkzeug.exceptions import BadRequest, Forbidden, abort +from werkzeug.exceptions import BadRequest, abort from controllers.console import api, console_ns from controllers.console.app.wraps import get_app_model @@ -12,14 +12,16 @@ from controllers.console.wraps import ( cloud_edition_billing_resource_check, edit_permission_required, enterprise_license_required, + is_admin_or_owner_required, setup_required, ) from core.ops.ops_trace_manager import OpsTraceManager +from core.workflow.enums import NodeType from extensions.ext_database import db from fields.app_fields import app_detail_fields, app_detail_fields_with_site, app_pagination_fields from libs.login import current_account_with_tenant, login_required from libs.validators import validate_description_length -from models import App +from models import App, Workflow from services.app_dsl_service import AppDslService, ImportMode from services.app_service import AppService from services.enterprise.enterprise_service import EnterpriseService @@ -106,6 +108,35 @@ class AppListApi(Resource): if str(app.id) in res: app.access_mode = res[str(app.id)].access_mode + workflow_capable_app_ids = [ + str(app.id) for app in app_pagination.items if app.mode in {"workflow", "advanced-chat"} + ] + draft_trigger_app_ids: set[str] = set() + if workflow_capable_app_ids: + draft_workflows = ( + db.session.execute( + select(Workflow).where( + Workflow.version == Workflow.VERSION_DRAFT, + Workflow.app_id.in_(workflow_capable_app_ids), + ) + ) + .scalars() + .all() + ) + trigger_node_types = { + NodeType.TRIGGER_WEBHOOK, + NodeType.TRIGGER_SCHEDULE, + NodeType.TRIGGER_PLUGIN, + } + for workflow in draft_workflows: + for _, node_data in workflow.walk_nodes(): + if node_data.get("type") in trigger_node_types: + draft_trigger_app_ids.add(str(workflow.app_id)) + break + + for app in app_pagination.items: + app.has_draft_trigger = str(app.id) in draft_trigger_app_ids + return marshal(app_pagination, app_pagination_fields), 200 @api.doc("create_app") @@ -220,10 +251,8 @@ class AppApi(Resource): args = parser.parse_args() app_service = AppService() - # Construct ArgsDict from parsed arguments - from services.app_service import AppService as AppServiceType - args_dict: AppServiceType.ArgsDict = { + args_dict: AppService.ArgsDict = { "name": args["name"], "description": args.get("description", ""), "icon_type": args.get("icon_type", ""), @@ -353,12 +382,15 @@ class AppExportApi(Resource): } +parser = reqparse.RequestParser().add_argument("name", type=str, required=True, location="json", help="Name to check") + + @console_ns.route("/apps//name") class AppNameApi(Resource): @api.doc("check_app_name") @api.doc(description="Check if app name is available") @api.doc(params={"app_id": "Application ID"}) - @api.expect(api.parser().add_argument("name", type=str, required=True, location="args", help="Name to check")) + @api.expect(parser) @api.response(200, "Name availability checked") @setup_required @login_required @@ -367,7 +399,6 @@ class AppNameApi(Resource): @marshal_with(app_detail_fields) @edit_permission_required def post(self, app_model): - parser = reqparse.RequestParser().add_argument("name", type=str, required=True, location="json") args = parser.parse_args() app_service = AppService() @@ -455,15 +486,11 @@ class AppApiStatus(Resource): @api.response(403, "Insufficient permissions") @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required @get_app_model @marshal_with(app_detail_fields) def post(self, app_model): - # The role of the current user in the ta table must be admin or owner - current_user, _ = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() - parser = reqparse.RequestParser().add_argument("enable_api", type=bool, required=True, location="json") args = parser.parse_args() diff --git a/api/controllers/console/app/app_import.py b/api/controllers/console/app/app_import.py index d902c129ad..02dbd42515 100644 --- a/api/controllers/console/app/app_import.py +++ b/api/controllers/console/app/app_import.py @@ -1,6 +1,7 @@ from flask_restx import Resource, marshal_with, reqparse from sqlalchemy.orm import Session +from controllers.console import api from controllers.console.app.wraps import get_app_model from controllers.console.wraps import ( account_initialization_required, @@ -18,9 +19,23 @@ from services.feature_service import FeatureService from .. import console_ns +parser = ( + reqparse.RequestParser() + .add_argument("mode", type=str, required=True, location="json") + .add_argument("yaml_content", type=str, location="json") + .add_argument("yaml_url", type=str, location="json") + .add_argument("name", type=str, location="json") + .add_argument("description", type=str, location="json") + .add_argument("icon_type", type=str, location="json") + .add_argument("icon", type=str, location="json") + .add_argument("icon_background", type=str, location="json") + .add_argument("app_id", type=str, location="json") +) + @console_ns.route("/apps/imports") class AppImportApi(Resource): + @api.expect(parser) @setup_required @login_required @account_initialization_required @@ -30,18 +45,6 @@ class AppImportApi(Resource): def post(self): # Check user role first current_user, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("mode", type=str, required=True, location="json") - .add_argument("yaml_content", type=str, location="json") - .add_argument("yaml_url", type=str, location="json") - .add_argument("name", type=str, location="json") - .add_argument("description", type=str, location="json") - .add_argument("icon_type", type=str, location="json") - .add_argument("icon", type=str, location="json") - .add_argument("icon_background", type=str, location="json") - .add_argument("app_id", type=str, location="json") - ) args = parser.parse_args() # Create service with session diff --git a/api/controllers/console/app/conversation.py b/api/controllers/console/app/conversation.py index d5fa70d678..57b6c314f3 100644 --- a/api/controllers/console/app/conversation.py +++ b/api/controllers/console/app/conversation.py @@ -1,7 +1,5 @@ -from datetime import datetime - -import pytz import sqlalchemy as sa +from flask import abort from flask_restx import Resource, marshal_with, reqparse from flask_restx.inputs import int_range from sqlalchemy import func, or_ @@ -19,7 +17,7 @@ from fields.conversation_fields import ( conversation_pagination_fields, conversation_with_summary_pagination_fields, ) -from libs.datetime_utils import naive_utc_now +from libs.datetime_utils import naive_utc_now, parse_time_range from libs.helper import DatetimeString from libs.login import current_account_with_tenant, login_required from models import Conversation, EndUser, Message, MessageAnnotation @@ -90,25 +88,17 @@ class CompletionConversationApi(Resource): account = current_user assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) - - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) + try: + start_datetime_utc, end_datetime_utc = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) + if start_datetime_utc: query = query.where(Conversation.created_at >= start_datetime_utc) - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=59) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - + if end_datetime_utc: + end_datetime_utc = end_datetime_utc.replace(second=59) query = query.where(Conversation.created_at < end_datetime_utc) # FIXME, the type ignore in this file @@ -270,29 +260,21 @@ class ChatConversationApi(Resource): account = current_user assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) - - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) + try: + start_datetime_utc, end_datetime_utc = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) + if start_datetime_utc: match args["sort_by"]: case "updated_at" | "-updated_at": query = query.where(Conversation.updated_at >= start_datetime_utc) case "created_at" | "-created_at" | _: query = query.where(Conversation.created_at >= start_datetime_utc) - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=59) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - + if end_datetime_utc: + end_datetime_utc = end_datetime_utc.replace(second=59) match args["sort_by"]: case "updated_at" | "-updated_at": query = query.where(Conversation.updated_at <= end_datetime_utc) diff --git a/api/controllers/console/app/message.py b/api/controllers/console/app/message.py index 7e0ae370ef..3f66278940 100644 --- a/api/controllers/console/app/message.py +++ b/api/controllers/console/app/message.py @@ -16,7 +16,6 @@ from controllers.console.app.wraps import get_app_model from controllers.console.explore.error import AppSuggestedQuestionsAfterAnswerDisabledError from controllers.console.wraps import ( account_initialization_required, - cloud_edition_billing_resource_check, edit_permission_required, setup_required, ) @@ -24,12 +23,11 @@ from core.app.entities.app_invoke_entities import InvokeFrom from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError from core.model_runtime.errors.invoke import InvokeError from extensions.ext_database import db -from fields.conversation_fields import annotation_fields, message_detail_fields +from fields.conversation_fields import message_detail_fields from libs.helper import uuid_value from libs.infinite_scroll_pagination import InfiniteScrollPagination from libs.login import current_account_with_tenant, login_required from models.model import AppMode, Conversation, Message, MessageAnnotation, MessageFeedback -from services.annotation_service import AppAnnotationService from services.errors.conversation import ConversationNotExistsError from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError from services.message_service import MessageService @@ -194,45 +192,6 @@ class MessageFeedbackApi(Resource): return {"result": "success"} -@console_ns.route("/apps//annotations") -class MessageAnnotationApi(Resource): - @api.doc("create_message_annotation") - @api.doc(description="Create message annotation") - @api.doc(params={"app_id": "Application ID"}) - @api.expect( - api.model( - "MessageAnnotationRequest", - { - "message_id": fields.String(description="Message ID"), - "question": fields.String(required=True, description="Question text"), - "answer": fields.String(required=True, description="Answer text"), - "annotation_reply": fields.Raw(description="Annotation reply"), - }, - ) - ) - @api.response(200, "Annotation created successfully", annotation_fields) - @api.response(403, "Insufficient permissions") - @marshal_with(annotation_fields) - @get_app_model - @setup_required - @login_required - @cloud_edition_billing_resource_check("annotation") - @account_initialization_required - @edit_permission_required - def post(self, app_model): - parser = ( - reqparse.RequestParser() - .add_argument("message_id", required=False, type=uuid_value, location="json") - .add_argument("question", required=True, type=str, location="json") - .add_argument("answer", required=True, type=str, location="json") - .add_argument("annotation_reply", required=False, type=dict, location="json") - ) - args = parser.parse_args() - annotation = AppAnnotationService.up_insert_app_annotation_from_message(args, app_model.id) - - return annotation - - @console_ns.route("/apps//annotations/count") class MessageAnnotationCountApi(Resource): @api.doc("get_annotation_count") diff --git a/api/controllers/console/app/model_config.py b/api/controllers/console/app/model_config.py index 72ce8a7ddf..91e2cfd60e 100644 --- a/api/controllers/console/app/model_config.py +++ b/api/controllers/console/app/model_config.py @@ -3,11 +3,10 @@ from typing import cast from flask import request from flask_restx import Resource, fields -from werkzeug.exceptions import Forbidden from controllers.console import api, console_ns from controllers.console.app.wraps import get_app_model -from controllers.console.wraps import account_initialization_required, setup_required +from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required from core.agent.entities import AgentToolEntity from core.tools.tool_manager import ToolManager from core.tools.utils.configuration import ToolParameterConfigurationManager @@ -48,15 +47,12 @@ class ModelConfigResource(Resource): @api.response(404, "App not found") @setup_required @login_required + @edit_permission_required @account_initialization_required @get_app_model(mode=[AppMode.AGENT_CHAT, AppMode.CHAT, AppMode.COMPLETION]) def post(self, app_model): """Modify app model config""" current_user, current_tenant_id = current_account_with_tenant() - - if not current_user.has_edit_permission: - raise Forbidden() - # validate config model_configuration = AppModelConfigService.validate_configuration( tenant_id=current_tenant_id, diff --git a/api/controllers/console/app/site.py b/api/controllers/console/app/site.py index c4d640bf0e..b8edbf77c7 100644 --- a/api/controllers/console/app/site.py +++ b/api/controllers/console/app/site.py @@ -1,10 +1,15 @@ from flask_restx import Resource, fields, marshal_with, reqparse -from werkzeug.exceptions import Forbidden, NotFound +from werkzeug.exceptions import NotFound from constants.languages import supported_language from controllers.console import api, console_ns from controllers.console.app.wraps import get_app_model -from controllers.console.wraps import account_initialization_required, setup_required +from controllers.console.wraps import ( + account_initialization_required, + edit_permission_required, + is_admin_or_owner_required, + setup_required, +) from extensions.ext_database import db from fields.app_fields import app_site_fields from libs.datetime_utils import naive_utc_now @@ -76,17 +81,13 @@ class AppSite(Resource): @api.response(404, "App not found") @setup_required @login_required + @edit_permission_required @account_initialization_required @get_app_model @marshal_with(app_site_fields) def post(self, app_model): args = parse_app_site_args() current_user, _ = current_account_with_tenant() - - # The role of the current user in the ta table must be editor, admin, or owner - if not current_user.has_edit_permission: - raise Forbidden() - site = db.session.query(Site).where(Site.app_id == app_model.id).first() if not site: raise NotFound @@ -130,16 +131,12 @@ class AppSiteAccessTokenReset(Resource): @api.response(404, "App or site not found") @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required @get_app_model @marshal_with(app_site_fields) def post(self, app_model): - # The role of the current user in the ta table must be admin or owner current_user, _ = current_account_with_tenant() - - if not current_user.is_admin_or_owner: - raise Forbidden() - site = db.session.query(Site).where(Site.app_id == app_model.id).first() if not site: diff --git a/api/controllers/console/app/statistic.py b/api/controllers/console/app/statistic.py index 0917a6e53c..b4bd05e891 100644 --- a/api/controllers/console/app/statistic.py +++ b/api/controllers/console/app/statistic.py @@ -1,9 +1,7 @@ -from datetime import datetime from decimal import Decimal -import pytz import sqlalchemy as sa -from flask import jsonify +from flask import abort, jsonify from flask_restx import Resource, fields, reqparse from controllers.console import api, console_ns @@ -11,9 +9,10 @@ from controllers.console.app.wraps import get_app_model from controllers.console.wraps import account_initialization_required, setup_required from core.app.entities.app_invoke_entities import InvokeFrom from extensions.ext_database import db -from libs.helper import DatetimeString +from libs.datetime_utils import parse_time_range +from libs.helper import DatetimeString, convert_datetime_to_date from libs.login import current_account_with_tenant, login_required -from models import AppMode, Message +from models import AppMode @console_ns.route("/apps//statistics/daily-messages") @@ -45,8 +44,9 @@ class DailyMessageStatistic(Resource): ) args = parser.parse_args() - sql_query = """SELECT - DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, + converted_created_at = convert_datetime_to_date("created_at") + sql_query = f"""SELECT + {converted_created_at} AS date, COUNT(*) AS message_count FROM messages @@ -56,26 +56,16 @@ WHERE arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER} assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) - - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) + try: + start_datetime_utc, end_datetime_utc = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) + if start_datetime_utc: sql_query += " AND created_at >= :start" arg_dict["start"] = start_datetime_utc - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=0) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - + if end_datetime_utc: sql_query += " AND created_at < :end" arg_dict["end"] = end_datetime_utc @@ -91,16 +81,19 @@ WHERE return jsonify({"data": response_data}) +parser = ( + reqparse.RequestParser() + .add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args", help="Start date (YYYY-MM-DD HH:MM)") + .add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args", help="End date (YYYY-MM-DD HH:MM)") +) + + @console_ns.route("/apps//statistics/daily-conversations") class DailyConversationStatistic(Resource): @api.doc("get_daily_conversation_statistics") @api.doc(description="Get daily conversation statistics for an application") @api.doc(params={"app_id": "Application ID"}) - @api.expect( - api.parser() - .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)") - .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)") - ) + @api.expect(parser) @api.response( 200, "Daily conversation statistics retrieved successfully", @@ -113,48 +106,40 @@ class DailyConversationStatistic(Resource): def get(self, app_model): account, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - .add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - ) args = parser.parse_args() + + converted_created_at = convert_datetime_to_date("created_at") + sql_query = f"""SELECT + {converted_created_at} AS date, + COUNT(DISTINCT conversation_id) AS conversation_count +FROM + messages +WHERE + app_id = :app_id + AND invoke_from != :invoke_from""" + arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER} assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - stmt = ( - sa.select( - sa.func.date( - sa.func.date_trunc("day", sa.text("created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz")) - ).label("date"), - sa.func.count(sa.distinct(Message.conversation_id)).label("conversation_count"), - ) - .select_from(Message) - .where(Message.app_id == app_model.id, Message.invoke_from != InvokeFrom.DEBUGGER) - ) + try: + start_datetime_utc, end_datetime_utc = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) - stmt = stmt.where(Message.created_at >= start_datetime_utc) + if start_datetime_utc: + sql_query += " AND created_at >= :start" + arg_dict["start"] = start_datetime_utc - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=0) - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - stmt = stmt.where(Message.created_at < end_datetime_utc) + if end_datetime_utc: + sql_query += " AND created_at < :end" + arg_dict["end"] = end_datetime_utc - stmt = stmt.group_by("date").order_by("date") + sql_query += " GROUP BY date ORDER BY date" response_data = [] with db.engine.begin() as conn: - rs = conn.execute(stmt, {"tz": account.timezone}) - for row in rs: - response_data.append({"date": str(row.date), "conversation_count": row.conversation_count}) + rs = conn.execute(sa.text(sql_query), arg_dict) + for i in rs: + response_data.append({"date": str(i.date), "conversation_count": i.conversation_count}) return jsonify({"data": response_data}) @@ -164,11 +149,7 @@ class DailyTerminalsStatistic(Resource): @api.doc("get_daily_terminals_statistics") @api.doc(description="Get daily terminal/end-user statistics for an application") @api.doc(params={"app_id": "Application ID"}) - @api.expect( - api.parser() - .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)") - .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)") - ) + @api.expect(parser) @api.response( 200, "Daily terminal statistics retrieved successfully", @@ -181,15 +162,11 @@ class DailyTerminalsStatistic(Resource): def get(self, app_model): account, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - .add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - ) args = parser.parse_args() - sql_query = """SELECT - DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, + converted_created_at = convert_datetime_to_date("created_at") + sql_query = f"""SELECT + {converted_created_at} AS date, COUNT(DISTINCT messages.from_end_user_id) AS terminal_count FROM messages @@ -198,26 +175,17 @@ WHERE AND invoke_from != :invoke_from""" arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER} assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) - - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) + try: + start_datetime_utc, end_datetime_utc = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) + if start_datetime_utc: sql_query += " AND created_at >= :start" arg_dict["start"] = start_datetime_utc - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=0) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - + if end_datetime_utc: sql_query += " AND created_at < :end" arg_dict["end"] = end_datetime_utc @@ -238,11 +206,7 @@ class DailyTokenCostStatistic(Resource): @api.doc("get_daily_token_cost_statistics") @api.doc(description="Get daily token cost statistics for an application") @api.doc(params={"app_id": "Application ID"}) - @api.expect( - api.parser() - .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)") - .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)") - ) + @api.expect(parser) @api.response( 200, "Daily token cost statistics retrieved successfully", @@ -255,15 +219,11 @@ class DailyTokenCostStatistic(Resource): def get(self, app_model): account, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - .add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - ) args = parser.parse_args() - sql_query = """SELECT - DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, + converted_created_at = convert_datetime_to_date("created_at") + sql_query = f"""SELECT + {converted_created_at} AS date, (SUM(messages.message_tokens) + SUM(messages.answer_tokens)) AS token_count, SUM(total_price) AS total_price FROM @@ -273,26 +233,17 @@ WHERE AND invoke_from != :invoke_from""" arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER} assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) - - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) + try: + start_datetime_utc, end_datetime_utc = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) + if start_datetime_utc: sql_query += " AND created_at >= :start" arg_dict["start"] = start_datetime_utc - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=0) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - + if end_datetime_utc: sql_query += " AND created_at < :end" arg_dict["end"] = end_datetime_utc @@ -315,11 +266,7 @@ class AverageSessionInteractionStatistic(Resource): @api.doc("get_average_session_interaction_statistics") @api.doc(description="Get average session interaction statistics for an application") @api.doc(params={"app_id": "Application ID"}) - @api.expect( - api.parser() - .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)") - .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)") - ) + @api.expect(parser) @api.response( 200, "Average session interaction statistics retrieved successfully", @@ -332,15 +279,11 @@ class AverageSessionInteractionStatistic(Resource): def get(self, app_model): account, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - .add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - ) args = parser.parse_args() - sql_query = """SELECT - DATE(DATE_TRUNC('day', c.created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, + converted_created_at = convert_datetime_to_date("c.created_at") + sql_query = f"""SELECT + {converted_created_at} AS date, AVG(subquery.message_count) AS interactions FROM ( @@ -357,26 +300,17 @@ FROM AND m.invoke_from != :invoke_from""" arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER} assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) - - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) + try: + start_datetime_utc, end_datetime_utc = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) + if start_datetime_utc: sql_query += " AND c.created_at >= :start" arg_dict["start"] = start_datetime_utc - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=0) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - + if end_datetime_utc: sql_query += " AND c.created_at < :end" arg_dict["end"] = end_datetime_utc @@ -408,11 +342,7 @@ class UserSatisfactionRateStatistic(Resource): @api.doc("get_user_satisfaction_rate_statistics") @api.doc(description="Get user satisfaction rate statistics for an application") @api.doc(params={"app_id": "Application ID"}) - @api.expect( - api.parser() - .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)") - .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)") - ) + @api.expect(parser) @api.response( 200, "User satisfaction rate statistics retrieved successfully", @@ -425,15 +355,11 @@ class UserSatisfactionRateStatistic(Resource): def get(self, app_model): account, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - .add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - ) args = parser.parse_args() - sql_query = """SELECT - DATE(DATE_TRUNC('day', m.created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, + converted_created_at = convert_datetime_to_date("m.created_at") + sql_query = f"""SELECT + {converted_created_at} AS date, COUNT(m.id) AS message_count, COUNT(mf.id) AS feedback_count FROM @@ -446,26 +372,17 @@ WHERE AND m.invoke_from != :invoke_from""" arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER} assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) - - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) + try: + start_datetime_utc, end_datetime_utc = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) + if start_datetime_utc: sql_query += " AND m.created_at >= :start" arg_dict["start"] = start_datetime_utc - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=0) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - + if end_datetime_utc: sql_query += " AND m.created_at < :end" arg_dict["end"] = end_datetime_utc @@ -491,11 +408,7 @@ class AverageResponseTimeStatistic(Resource): @api.doc("get_average_response_time_statistics") @api.doc(description="Get average response time statistics for an application") @api.doc(params={"app_id": "Application ID"}) - @api.expect( - api.parser() - .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)") - .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)") - ) + @api.expect(parser) @api.response( 200, "Average response time statistics retrieved successfully", @@ -508,15 +421,11 @@ class AverageResponseTimeStatistic(Resource): def get(self, app_model): account, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - .add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - ) args = parser.parse_args() - sql_query = """SELECT - DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, + converted_created_at = convert_datetime_to_date("created_at") + sql_query = f"""SELECT + {converted_created_at} AS date, AVG(provider_response_latency) AS latency FROM messages @@ -525,26 +434,17 @@ WHERE AND invoke_from != :invoke_from""" arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER} assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) - - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) + try: + start_datetime_utc, end_datetime_utc = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) + if start_datetime_utc: sql_query += " AND created_at >= :start" arg_dict["start"] = start_datetime_utc - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=0) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - + if end_datetime_utc: sql_query += " AND created_at < :end" arg_dict["end"] = end_datetime_utc @@ -565,11 +465,7 @@ class TokensPerSecondStatistic(Resource): @api.doc("get_tokens_per_second_statistics") @api.doc(description="Get tokens per second statistics for an application") @api.doc(params={"app_id": "Application ID"}) - @api.expect( - api.parser() - .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)") - .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)") - ) + @api.expect(parser) @api.response( 200, "Tokens per second statistics retrieved successfully", @@ -581,16 +477,11 @@ class TokensPerSecondStatistic(Resource): @account_initialization_required def get(self, app_model): account, _ = current_account_with_tenant() - - parser = ( - reqparse.RequestParser() - .add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - .add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args") - ) args = parser.parse_args() - sql_query = """SELECT - DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, + converted_created_at = convert_datetime_to_date("created_at") + sql_query = f"""SELECT + {converted_created_at} AS date, CASE WHEN SUM(provider_response_latency) = 0 THEN 0 ELSE (SUM(answer_tokens) / SUM(provider_response_latency)) @@ -602,26 +493,17 @@ WHERE AND invoke_from != :invoke_from""" arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER} assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) - - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) + try: + start_datetime_utc, end_datetime_utc = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) + if start_datetime_utc: sql_query += " AND created_at >= :start" arg_dict["start"] = start_datetime_utc - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=0) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - + if end_datetime_utc: sql_query += " AND created_at < :end" arg_dict["end"] = end_datetime_utc diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py index 89d6a3aa11..2f6808f11d 100644 --- a/api/controllers/console/app/workflow.py +++ b/api/controllers/console/app/workflow.py @@ -16,6 +16,7 @@ from controllers.console.wraps import account_initialization_required, edit_perm from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError from core.app.app_config.features.file_upload.manager import FileUploadConfigManager from core.app.apps.base_app_queue_manager import AppQueueManager +from core.app.apps.workflow.app_generator import SKIP_PREPARE_USER_INPUTS_KEY from core.app.entities.app_invoke_entities import InvokeFrom from core.file.models import File from core.helper.trace_id_helper import get_external_trace_id @@ -112,7 +113,18 @@ class DraftWorkflowApi(Resource): }, ) ) - @api.response(200, "Draft workflow synced successfully", workflow_fields) + @api.response( + 200, + "Draft workflow synced successfully", + api.model( + "SyncDraftWorkflowResponse", + { + "result": fields.String, + "hash": fields.String, + "updated_at": fields.String, + }, + ), + ) @api.response(400, "Invalid workflow configuration") @api.response(403, "Permission denied") @edit_permission_required @@ -574,6 +586,13 @@ class DraftWorkflowNodeRunApi(Resource): return workflow_node_execution +parser_publish = ( + reqparse.RequestParser() + .add_argument("marked_name", type=str, required=False, default="", location="json") + .add_argument("marked_comment", type=str, required=False, default="", location="json") +) + + @console_ns.route("/apps//workflows/publish") class PublishedWorkflowApi(Resource): @api.doc("get_published_workflow") @@ -598,6 +617,7 @@ class PublishedWorkflowApi(Resource): # return workflow, if not found, return None return workflow + @api.expect(parser_publish) @setup_required @login_required @account_initialization_required @@ -608,12 +628,8 @@ class PublishedWorkflowApi(Resource): Publish workflow """ current_user, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("marked_name", type=str, required=False, default="", location="json") - .add_argument("marked_comment", type=str, required=False, default="", location="json") - ) - args = parser.parse_args() + + args = parser_publish.parse_args() # Validate name and comment length if args.marked_name and len(args.marked_name) > 20: @@ -668,6 +684,9 @@ class DefaultBlockConfigsApi(Resource): return workflow_service.get_default_block_configs() +parser_block = reqparse.RequestParser().add_argument("q", type=str, location="args") + + @console_ns.route("/apps//workflows/default-workflow-block-configs/") class DefaultBlockConfigApi(Resource): @api.doc("get_default_block_config") @@ -675,6 +694,7 @@ class DefaultBlockConfigApi(Resource): @api.doc(params={"app_id": "Application ID", "block_type": "Block type"}) @api.response(200, "Default block configuration retrieved successfully") @api.response(404, "Block type not found") + @api.expect(parser_block) @setup_required @login_required @account_initialization_required @@ -684,8 +704,7 @@ class DefaultBlockConfigApi(Resource): """ Get default block config """ - parser = reqparse.RequestParser().add_argument("q", type=str, location="args") - args = parser.parse_args() + args = parser_block.parse_args() q = args.get("q") @@ -701,8 +720,18 @@ class DefaultBlockConfigApi(Resource): return workflow_service.get_default_block_config(node_type=block_type, filters=filters) +parser_convert = ( + reqparse.RequestParser() + .add_argument("name", type=str, required=False, nullable=True, location="json") + .add_argument("icon_type", type=str, required=False, nullable=True, location="json") + .add_argument("icon", type=str, required=False, nullable=True, location="json") + .add_argument("icon_background", type=str, required=False, nullable=True, location="json") +) + + @console_ns.route("/apps//convert-to-workflow") class ConvertToWorkflowApi(Resource): + @api.expect(parser_convert) @api.doc("convert_to_workflow") @api.doc(description="Convert application to workflow mode") @api.doc(params={"app_id": "Application ID"}) @@ -723,14 +752,7 @@ class ConvertToWorkflowApi(Resource): current_user, _ = current_account_with_tenant() if request.data: - parser = ( - reqparse.RequestParser() - .add_argument("name", type=str, required=False, nullable=True, location="json") - .add_argument("icon_type", type=str, required=False, nullable=True, location="json") - .add_argument("icon", type=str, required=False, nullable=True, location="json") - .add_argument("icon_background", type=str, required=False, nullable=True, location="json") - ) - args = parser.parse_args() + args = parser_convert.parse_args() else: args = {} @@ -744,8 +766,18 @@ class ConvertToWorkflowApi(Resource): } +parser_workflows = ( + reqparse.RequestParser() + .add_argument("page", type=inputs.int_range(1, 99999), required=False, default=1, location="args") + .add_argument("limit", type=inputs.int_range(1, 100), required=False, default=10, location="args") + .add_argument("user_id", type=str, required=False, location="args") + .add_argument("named_only", type=inputs.boolean, required=False, default=False, location="args") +) + + @console_ns.route("/apps//workflows") class PublishedAllWorkflowApi(Resource): + @api.expect(parser_workflows) @api.doc("get_all_published_workflows") @api.doc(description="Get all published workflows for an application") @api.doc(params={"app_id": "Application ID"}) @@ -762,16 +794,9 @@ class PublishedAllWorkflowApi(Resource): """ current_user, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("page", type=inputs.int_range(1, 99999), required=False, default=1, location="args") - .add_argument("limit", type=inputs.int_range(1, 100), required=False, default=20, location="args") - .add_argument("user_id", type=str, required=False, location="args") - .add_argument("named_only", type=inputs.boolean, required=False, default=False, location="args") - ) - args = parser.parse_args() - page = int(args.get("page", 1)) - limit = int(args.get("limit", 10)) + args = parser_workflows.parse_args() + page = args["page"] + limit = args["limit"] user_id = args.get("user_id") named_only = args.get("named_only", False) @@ -958,8 +983,9 @@ class DraftWorkflowTriggerRunApi(Resource): Poll for trigger events and execute full workflow when event arrives """ current_user, _ = current_account_with_tenant() - parser = reqparse.RequestParser() - parser.add_argument("node_id", type=str, required=True, location="json", nullable=False) + parser = reqparse.RequestParser().add_argument( + "node_id", type=str, required=True, location="json", nullable=False + ) args = parser.parse_args() node_id = args["node_id"] workflow_service = WorkflowService() @@ -979,11 +1005,13 @@ class DraftWorkflowTriggerRunApi(Resource): event = poller.poll() if not event: return jsonable_encoder({"status": "waiting", "retry_in": LISTENING_RETRY_IN}) + workflow_args = dict(event.workflow_args) + workflow_args[SKIP_PREPARE_USER_INPUTS_KEY] = True return helper.compact_generate_response( AppGenerateService.generate( app_model=app_model, user=current_user, - args=event.workflow_args, + args=workflow_args, invoke_from=InvokeFrom.DEBUGGER, streaming=True, root_node_id=node_id, @@ -992,7 +1020,7 @@ class DraftWorkflowTriggerRunApi(Resource): except InvokeRateLimitError as ex: raise InvokeRateLimitHttpError(ex.description) except PluginInvokeError as e: - raise ValueError(e.to_user_friendly_error()) + return jsonable_encoder({"status": "error", "error": e.to_user_friendly_error()}), 400 except Exception as e: logger.exception("Error polling trigger debug event") raise e @@ -1050,26 +1078,21 @@ class DraftWorkflowTriggerNodeApi(Resource): ) event = poller.poll() except PluginInvokeError as e: - return jsonable_encoder({"status": "error", "error": e.to_user_friendly_error()}), 500 + return jsonable_encoder({"status": "error", "error": e.to_user_friendly_error()}), 400 except Exception as e: logger.exception("Error polling trigger debug event") raise e if not event: return jsonable_encoder({"status": "waiting", "retry_in": LISTENING_RETRY_IN}) - workflow_args = dict(event.workflow_args or {}) - raw_files = workflow_args.get("files") + raw_files = event.workflow_args.get("files") files = _parse_file(draft_workflow, raw_files if isinstance(raw_files, list) else None) - if node_type == NodeType.TRIGGER_WEBHOOK: - user_inputs = workflow_args.get("inputs") or {} - else: - user_inputs = workflow_args try: node_execution = workflow_service.run_draft_workflow_node( app_model=app_model, draft_workflow=draft_workflow, node_id=node_id, - user_inputs=user_inputs, + user_inputs=event.workflow_args.get("inputs") or {}, account=current_user, query="", files=files, @@ -1077,7 +1100,9 @@ class DraftWorkflowTriggerNodeApi(Resource): return jsonable_encoder(node_execution) except Exception as e: logger.exception("Error running draft workflow trigger node") - return jsonable_encoder({"status": "error", "error": str(e)}), 500 + return jsonable_encoder( + {"status": "error", "error": "An unexpected error occurred while running the node."} + ), 400 @console_ns.route("/apps//workflows/draft/trigger/run-all") @@ -1112,8 +1137,9 @@ class DraftWorkflowTriggerRunAllApi(Resource): """ current_user, _ = current_account_with_tenant() - parser = reqparse.RequestParser() - parser.add_argument("node_ids", type=list, required=True, location="json", nullable=False) + parser = reqparse.RequestParser().add_argument( + "node_ids", type=list, required=True, location="json", nullable=False + ) args = parser.parse_args() node_ids = args["node_ids"] workflow_service = WorkflowService() @@ -1129,7 +1155,7 @@ class DraftWorkflowTriggerRunAllApi(Resource): node_ids=node_ids, ) except PluginInvokeError as e: - raise ValueError(e.to_user_friendly_error()) + return jsonable_encoder({"status": "error", "error": e.to_user_friendly_error()}), 400 except Exception as e: logger.exception("Error polling trigger debug event") raise e @@ -1137,10 +1163,12 @@ class DraftWorkflowTriggerRunAllApi(Resource): return jsonable_encoder({"status": "waiting", "retry_in": LISTENING_RETRY_IN}) try: + workflow_args = dict(trigger_debug_event.workflow_args) + workflow_args[SKIP_PREPARE_USER_INPUTS_KEY] = True response = AppGenerateService.generate( app_model=app_model, user=current_user, - args=trigger_debug_event.workflow_args, + args=workflow_args, invoke_from=InvokeFrom.DEBUGGER, streaming=True, root_node_id=trigger_debug_event.node_id, @@ -1149,9 +1177,9 @@ class DraftWorkflowTriggerRunAllApi(Resource): except InvokeRateLimitError as ex: raise InvokeRateLimitHttpError(ex.description) except Exception: - logger.exception("Error running draft workflow trigger webhook run") + logger.exception("Error running draft workflow trigger run-all") return jsonable_encoder( { "status": "error", } - ), 500 + ), 400 diff --git a/api/controllers/console/app/workflow_app_log.py b/api/controllers/console/app/workflow_app_log.py index cbf4e84ff0..d7ecc7c91b 100644 --- a/api/controllers/console/app/workflow_app_log.py +++ b/api/controllers/console/app/workflow_app_log.py @@ -28,6 +28,7 @@ class WorkflowAppLogApi(Resource): "created_at__after": "Filter logs created after this timestamp", "created_by_end_user_session_id": "Filter by end user session ID", "created_by_account": "Filter by account", + "detail": "Whether to return detailed logs", "page": "Page number (1-99999)", "limit": "Number of items per page (1-100)", } @@ -68,6 +69,7 @@ class WorkflowAppLogApi(Resource): required=False, default=None, ) + .add_argument("detail", type=bool, location="args", required=False, default=False) .add_argument("page", type=int_range(1, 99999), default=1, location="args") .add_argument("limit", type=int_range(1, 100), default=20, location="args") ) @@ -92,6 +94,7 @@ class WorkflowAppLogApi(Resource): created_at_after=args.created_at__after, page=args.page, limit=args.limit, + detail=args.detail, created_by_end_user_session_id=args.created_by_end_user_session_id, created_by_account=args.created_by_account, ) diff --git a/api/controllers/console/app/workflow_draft_variable.py b/api/controllers/console/app/workflow_draft_variable.py index 0722eb40d2..ca97d8520c 100644 --- a/api/controllers/console/app/workflow_draft_variable.py +++ b/api/controllers/console/app/workflow_draft_variable.py @@ -1,17 +1,18 @@ import logging -from typing import NoReturn +from collections.abc import Callable +from functools import wraps +from typing import NoReturn, ParamSpec, TypeVar from flask import Response from flask_restx import Resource, fields, inputs, marshal, marshal_with, reqparse from sqlalchemy.orm import Session -from werkzeug.exceptions import Forbidden from controllers.console import api, console_ns from controllers.console.app.error import ( DraftWorkflowNotExist, ) from controllers.console.app.wraps import get_app_model -from controllers.console.wraps import account_initialization_required, setup_required +from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required from controllers.web.error import InvalidArgumentError, NotFoundError from core.file import helpers as file_helpers from core.variables.segment_group import SegmentGroup @@ -21,8 +22,8 @@ from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIAB from extensions.ext_database import db from factories.file_factory import build_from_mapping, build_from_mappings from factories.variable_factory import build_segment_with_type -from libs.login import current_user, login_required -from models import Account, App, AppMode +from libs.login import login_required +from models import App, AppMode from models.workflow import WorkflowDraftVariable from services.workflow_draft_variable_service import WorkflowDraftVariableList, WorkflowDraftVariableService from services.workflow_service import WorkflowService @@ -140,8 +141,11 @@ _WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS = { "items": fields.List(fields.Nested(_WORKFLOW_DRAFT_VARIABLE_FIELDS), attribute=_get_items), } +P = ParamSpec("P") +R = TypeVar("R") -def _api_prerequisite(f): + +def _api_prerequisite(f: Callable[P, R]): """Common prerequisites for all draft workflow variable APIs. It ensures the following conditions are satisfied: @@ -155,11 +159,10 @@ def _api_prerequisite(f): @setup_required @login_required @account_initialization_required + @edit_permission_required @get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW]) - def wrapper(*args, **kwargs): - assert isinstance(current_user, Account) - if not current_user.has_edit_permission: - raise Forbidden() + @wraps(f) + def wrapper(*args: P.args, **kwargs: P.kwargs): return f(*args, **kwargs) return wrapper @@ -167,6 +170,7 @@ def _api_prerequisite(f): @console_ns.route("/apps//workflows/draft/variables") class WorkflowVariableCollectionApi(Resource): + @api.expect(_create_pagination_parser()) @api.doc("get_workflow_variables") @api.doc(description="Get draft workflow variables") @api.doc(params={"app_id": "Application ID"}) diff --git a/api/controllers/console/app/workflow_run.py b/api/controllers/console/app/workflow_run.py index 311aa81279..23c228efbe 100644 --- a/api/controllers/console/app/workflow_run.py +++ b/api/controllers/console/app/workflow_run.py @@ -30,23 +30,25 @@ def _parse_workflow_run_list_args(): Returns: Parsed arguments containing last_id, limit, status, and triggered_from filters """ - parser = reqparse.RequestParser() - parser.add_argument("last_id", type=uuid_value, location="args") - parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args") - parser.add_argument( - "status", - type=str, - choices=WORKFLOW_RUN_STATUS_CHOICES, - location="args", - required=False, - ) - parser.add_argument( - "triggered_from", - type=str, - choices=["debugging", "app-run"], - location="args", - required=False, - help="Filter by trigger source: debugging or app-run", + parser = ( + reqparse.RequestParser() + .add_argument("last_id", type=uuid_value, location="args") + .add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args") + .add_argument( + "status", + type=str, + choices=WORKFLOW_RUN_STATUS_CHOICES, + location="args", + required=False, + ) + .add_argument( + "triggered_from", + type=str, + choices=["debugging", "app-run"], + location="args", + required=False, + help="Filter by trigger source: debugging or app-run", + ) ) return parser.parse_args() @@ -58,28 +60,30 @@ def _parse_workflow_run_count_args(): Returns: Parsed arguments containing status, time_range, and triggered_from filters """ - parser = reqparse.RequestParser() - parser.add_argument( - "status", - type=str, - choices=WORKFLOW_RUN_STATUS_CHOICES, - location="args", - required=False, - ) - parser.add_argument( - "time_range", - type=time_duration, - location="args", - required=False, - help="Time range filter (e.g., 7d, 4h, 30m, 30s)", - ) - parser.add_argument( - "triggered_from", - type=str, - choices=["debugging", "app-run"], - location="args", - required=False, - help="Filter by trigger source: debugging or app-run", + parser = ( + reqparse.RequestParser() + .add_argument( + "status", + type=str, + choices=WORKFLOW_RUN_STATUS_CHOICES, + location="args", + required=False, + ) + .add_argument( + "time_range", + type=time_duration, + location="args", + required=False, + help="Time range filter (e.g., 7d, 4h, 30m, 30s)", + ) + .add_argument( + "triggered_from", + type=str, + choices=["debugging", "app-run"], + location="args", + required=False, + help="Filter by trigger source: debugging or app-run", + ) ) return parser.parse_args() diff --git a/api/controllers/console/app/workflow_statistic.py b/api/controllers/console/app/workflow_statistic.py index bbea04640a..ef5205c1ee 100644 --- a/api/controllers/console/app/workflow_statistic.py +++ b/api/controllers/console/app/workflow_statistic.py @@ -1,23 +1,26 @@ -from datetime import datetime -from decimal import Decimal - -import pytz -import sqlalchemy as sa -from flask import jsonify +from flask import abort, jsonify from flask_restx import Resource, reqparse +from sqlalchemy.orm import sessionmaker from controllers.console import api, console_ns from controllers.console.app.wraps import get_app_model from controllers.console.wraps import account_initialization_required, setup_required from extensions.ext_database import db +from libs.datetime_utils import parse_time_range from libs.helper import DatetimeString from libs.login import current_account_with_tenant, login_required from models.enums import WorkflowRunTriggeredFrom from models.model import AppMode +from repositories.factory import DifyAPIRepositoryFactory @console_ns.route("/apps//workflow/statistics/daily-conversations") class WorkflowDailyRunsStatistic(Resource): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + session_maker = sessionmaker(bind=db.engine, expire_on_commit=False) + self._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker) + @api.doc("get_workflow_daily_runs_statistic") @api.doc(description="Get workflow daily runs statistics") @api.doc(params={"app_id": "Application ID"}) @@ -37,57 +40,32 @@ class WorkflowDailyRunsStatistic(Resource): ) args = parser.parse_args() - sql_query = """SELECT - DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, - COUNT(id) AS runs -FROM - workflow_runs -WHERE - app_id = :app_id - AND triggered_from = :triggered_from""" - arg_dict = { - "tz": account.timezone, - "app_id": app_model.id, - "triggered_from": WorkflowRunTriggeredFrom.APP_RUN, - } assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) + try: + start_date, end_date = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) - - sql_query += " AND created_at >= :start" - arg_dict["start"] = start_datetime_utc - - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=0) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - - sql_query += " AND created_at < :end" - arg_dict["end"] = end_datetime_utc - - sql_query += " GROUP BY date ORDER BY date" - - response_data = [] - - with db.engine.begin() as conn: - rs = conn.execute(sa.text(sql_query), arg_dict) - for i in rs: - response_data.append({"date": str(i.date), "runs": i.runs}) + response_data = self._workflow_run_repo.get_daily_runs_statistics( + tenant_id=app_model.tenant_id, + app_id=app_model.id, + triggered_from=WorkflowRunTriggeredFrom.APP_RUN, + start_date=start_date, + end_date=end_date, + timezone=account.timezone, + ) return jsonify({"data": response_data}) @console_ns.route("/apps//workflow/statistics/daily-terminals") class WorkflowDailyTerminalsStatistic(Resource): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + session_maker = sessionmaker(bind=db.engine, expire_on_commit=False) + self._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker) + @api.doc("get_workflow_daily_terminals_statistic") @api.doc(description="Get workflow daily terminals statistics") @api.doc(params={"app_id": "Application ID"}) @@ -107,57 +85,32 @@ class WorkflowDailyTerminalsStatistic(Resource): ) args = parser.parse_args() - sql_query = """SELECT - DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, - COUNT(DISTINCT workflow_runs.created_by) AS terminal_count -FROM - workflow_runs -WHERE - app_id = :app_id - AND triggered_from = :triggered_from""" - arg_dict = { - "tz": account.timezone, - "app_id": app_model.id, - "triggered_from": WorkflowRunTriggeredFrom.APP_RUN, - } assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) + try: + start_date, end_date = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) - - sql_query += " AND created_at >= :start" - arg_dict["start"] = start_datetime_utc - - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=0) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - - sql_query += " AND created_at < :end" - arg_dict["end"] = end_datetime_utc - - sql_query += " GROUP BY date ORDER BY date" - - response_data = [] - - with db.engine.begin() as conn: - rs = conn.execute(sa.text(sql_query), arg_dict) - for i in rs: - response_data.append({"date": str(i.date), "terminal_count": i.terminal_count}) + response_data = self._workflow_run_repo.get_daily_terminals_statistics( + tenant_id=app_model.tenant_id, + app_id=app_model.id, + triggered_from=WorkflowRunTriggeredFrom.APP_RUN, + start_date=start_date, + end_date=end_date, + timezone=account.timezone, + ) return jsonify({"data": response_data}) @console_ns.route("/apps//workflow/statistics/token-costs") class WorkflowDailyTokenCostStatistic(Resource): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + session_maker = sessionmaker(bind=db.engine, expire_on_commit=False) + self._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker) + @api.doc("get_workflow_daily_token_cost_statistic") @api.doc(description="Get workflow daily token cost statistics") @api.doc(params={"app_id": "Application ID"}) @@ -177,62 +130,32 @@ class WorkflowDailyTokenCostStatistic(Resource): ) args = parser.parse_args() - sql_query = """SELECT - DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, - SUM(workflow_runs.total_tokens) AS token_count -FROM - workflow_runs -WHERE - app_id = :app_id - AND triggered_from = :triggered_from""" - arg_dict = { - "tz": account.timezone, - "app_id": app_model.id, - "triggered_from": WorkflowRunTriggeredFrom.APP_RUN, - } assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) + try: + start_date, end_date = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) - - sql_query += " AND created_at >= :start" - arg_dict["start"] = start_datetime_utc - - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=0) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - - sql_query += " AND created_at < :end" - arg_dict["end"] = end_datetime_utc - - sql_query += " GROUP BY date ORDER BY date" - - response_data = [] - - with db.engine.begin() as conn: - rs = conn.execute(sa.text(sql_query), arg_dict) - for i in rs: - response_data.append( - { - "date": str(i.date), - "token_count": i.token_count, - } - ) + response_data = self._workflow_run_repo.get_daily_token_cost_statistics( + tenant_id=app_model.tenant_id, + app_id=app_model.id, + triggered_from=WorkflowRunTriggeredFrom.APP_RUN, + start_date=start_date, + end_date=end_date, + timezone=account.timezone, + ) return jsonify({"data": response_data}) @console_ns.route("/apps//workflow/statistics/average-app-interactions") class WorkflowAverageAppInteractionStatistic(Resource): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + session_maker = sessionmaker(bind=db.engine, expire_on_commit=False) + self._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker) + @api.doc("get_workflow_average_app_interaction_statistic") @api.doc(description="Get workflow average app interaction statistics") @api.doc(params={"app_id": "Application ID"}) @@ -252,67 +175,20 @@ class WorkflowAverageAppInteractionStatistic(Resource): ) args = parser.parse_args() - sql_query = """SELECT - AVG(sub.interactions) AS interactions, - sub.date -FROM - ( - SELECT - DATE(DATE_TRUNC('day', c.created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, - c.created_by, - COUNT(c.id) AS interactions - FROM - workflow_runs c - WHERE - c.app_id = :app_id - AND c.triggered_from = :triggered_from - {{start}} - {{end}} - GROUP BY - date, c.created_by - ) sub -GROUP BY - sub.date""" - arg_dict = { - "tz": account.timezone, - "app_id": app_model.id, - "triggered_from": WorkflowRunTriggeredFrom.APP_RUN, - } assert account.timezone is not None - timezone = pytz.timezone(account.timezone) - utc_timezone = pytz.utc - if args["start"]: - start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M") - start_datetime = start_datetime.replace(second=0) + try: + start_date, end_date = parse_time_range(args["start"], args["end"], account.timezone) + except ValueError as e: + abort(400, description=str(e)) - start_datetime_timezone = timezone.localize(start_datetime) - start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone) - - sql_query = sql_query.replace("{{start}}", " AND c.created_at >= :start") - arg_dict["start"] = start_datetime_utc - else: - sql_query = sql_query.replace("{{start}}", "") - - if args["end"]: - end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M") - end_datetime = end_datetime.replace(second=0) - - end_datetime_timezone = timezone.localize(end_datetime) - end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone) - - sql_query = sql_query.replace("{{end}}", " AND c.created_at < :end") - arg_dict["end"] = end_datetime_utc - else: - sql_query = sql_query.replace("{{end}}", "") - - response_data = [] - - with db.engine.begin() as conn: - rs = conn.execute(sa.text(sql_query), arg_dict) - for i in rs: - response_data.append( - {"date": str(i.date), "interactions": float(i.interactions.quantize(Decimal("0.01")))} - ) + response_data = self._workflow_run_repo.get_average_app_interaction_statistics( + tenant_id=app_model.tenant_id, + app_id=app_model.id, + triggered_from=WorkflowRunTriggeredFrom.APP_RUN, + start_date=start_date, + end_date=end_date, + timezone=account.timezone, + ) return jsonify({"data": response_data}) diff --git a/api/controllers/console/app/workflow_trigger.py b/api/controllers/console/app/workflow_trigger.py index fd64261525..785813c5f0 100644 --- a/api/controllers/console/app/workflow_trigger.py +++ b/api/controllers/console/app/workflow_trigger.py @@ -3,12 +3,12 @@ import logging from flask_restx import Resource, marshal_with, reqparse from sqlalchemy import select from sqlalchemy.orm import Session -from werkzeug.exceptions import Forbidden, NotFound +from werkzeug.exceptions import NotFound from configs import dify_config from controllers.console import api from controllers.console.app.wraps import get_app_model -from controllers.console.wraps import account_initialization_required, setup_required +from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required from extensions.ext_database import db from fields.workflow_trigger_fields import trigger_fields, triggers_list_fields, webhook_trigger_fields from libs.login import current_user, login_required @@ -29,8 +29,7 @@ class WebhookTriggerApi(Resource): @marshal_with(webhook_trigger_fields) def get(self, app_model: App): """Get webhook trigger for a node""" - parser = reqparse.RequestParser() - parser.add_argument("node_id", type=str, required=True, help="Node ID is required") + parser = reqparse.RequestParser().add_argument("node_id", type=str, required=True, help="Node ID is required") args = parser.parse_args() node_id = str(args["node_id"]) @@ -95,19 +94,19 @@ class AppTriggerEnableApi(Resource): @setup_required @login_required @account_initialization_required + @edit_permission_required @get_app_model(mode=AppMode.WORKFLOW) @marshal_with(trigger_fields) def post(self, app_model: App): """Update app trigger (enable/disable)""" - parser = reqparse.RequestParser() - parser.add_argument("trigger_id", type=str, required=True, nullable=False, location="json") - parser.add_argument("enable_trigger", type=bool, required=True, nullable=False, location="json") + parser = ( + reqparse.RequestParser() + .add_argument("trigger_id", type=str, required=True, nullable=False, location="json") + .add_argument("enable_trigger", type=bool, required=True, nullable=False, location="json") + ) args = parser.parse_args() - assert isinstance(current_user, Account) assert current_user.current_tenant_id is not None - if not current_user.has_edit_permission: - raise Forbidden() trigger_id = args["trigger_id"] diff --git a/api/controllers/console/auth/data_source_bearer_auth.py b/api/controllers/console/auth/data_source_bearer_auth.py index a06435267b..9d7fcef183 100644 --- a/api/controllers/console/auth/data_source_bearer_auth.py +++ b/api/controllers/console/auth/data_source_bearer_auth.py @@ -1,8 +1,8 @@ from flask_restx import Resource, reqparse -from werkzeug.exceptions import Forbidden from controllers.console import console_ns from controllers.console.auth.error import ApiKeyAuthFailedError +from controllers.console.wraps import is_admin_or_owner_required from libs.login import current_account_with_tenant, login_required from services.auth.api_key_auth_service import ApiKeyAuthService @@ -39,12 +39,10 @@ class ApiKeyAuthDataSourceBinding(Resource): @setup_required @login_required @account_initialization_required + @is_admin_or_owner_required def post(self): # The role of the current user in the table must be admin or owner - current_user, current_tenant_id = current_account_with_tenant() - - if not current_user.is_admin_or_owner: - raise Forbidden() + _, current_tenant_id = current_account_with_tenant() parser = ( reqparse.RequestParser() .add_argument("category", type=str, required=True, nullable=False, location="json") @@ -65,12 +63,10 @@ class ApiKeyAuthDataSourceBindingDelete(Resource): @setup_required @login_required @account_initialization_required + @is_admin_or_owner_required def delete(self, binding_id): # The role of the current user in the table must be admin or owner - current_user, current_tenant_id = current_account_with_tenant() - - if not current_user.is_admin_or_owner: - raise Forbidden() + _, current_tenant_id = current_account_with_tenant() ApiKeyAuthService.delete_provider_auth(current_tenant_id, binding_id) diff --git a/api/controllers/console/auth/data_source_oauth.py b/api/controllers/console/auth/data_source_oauth.py index 0fd433d718..a27932ccd8 100644 --- a/api/controllers/console/auth/data_source_oauth.py +++ b/api/controllers/console/auth/data_source_oauth.py @@ -3,11 +3,11 @@ import logging import httpx from flask import current_app, redirect, request from flask_restx import Resource, fields -from werkzeug.exceptions import Forbidden from configs import dify_config from controllers.console import api, console_ns -from libs.login import current_account_with_tenant, login_required +from controllers.console.wraps import is_admin_or_owner_required +from libs.login import login_required from libs.oauth_data_source import NotionOAuth from ..wraps import account_initialization_required, setup_required @@ -42,11 +42,9 @@ class OAuthDataSource(Resource): ) @api.response(400, "Invalid provider") @api.response(403, "Admin privileges required") + @is_admin_or_owner_required def get(self, provider: str): # The role of the current user in the table must be admin or owner - current_user, _ = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() OAUTH_DATASOURCE_PROVIDERS = get_oauth_providers() with current_app.app_context(): oauth_provider = OAUTH_DATASOURCE_PROVIDERS.get(provider) diff --git a/api/controllers/console/billing/billing.py b/api/controllers/console/billing/billing.py index 705f5970dd..6efb4564ca 100644 --- a/api/controllers/console/billing/billing.py +++ b/api/controllers/console/billing/billing.py @@ -1,7 +1,11 @@ -from flask_restx import Resource, reqparse +import base64 -from controllers.console import console_ns +from flask_restx import Resource, fields, reqparse +from werkzeug.exceptions import BadRequest + +from controllers.console import api, console_ns from controllers.console.wraps import account_initialization_required, only_edition_cloud, setup_required +from enums.cloud_plan import CloudPlan from libs.login import current_account_with_tenant, login_required from services.billing_service import BillingService @@ -16,7 +20,13 @@ class Subscription(Resource): current_user, current_tenant_id = current_account_with_tenant() parser = ( reqparse.RequestParser() - .add_argument("plan", type=str, required=True, location="args", choices=["professional", "team"]) + .add_argument( + "plan", + type=str, + required=True, + location="args", + choices=[CloudPlan.PROFESSIONAL, CloudPlan.TEAM], + ) .add_argument("interval", type=str, required=True, location="args", choices=["month", "year"]) ) args = parser.parse_args() @@ -34,3 +44,37 @@ class Invoices(Resource): current_user, current_tenant_id = current_account_with_tenant() BillingService.is_tenant_owner_or_admin(current_user) return BillingService.get_invoices(current_user.email, current_tenant_id) + + +@console_ns.route("/billing/partners//tenants") +class PartnerTenants(Resource): + @api.doc("sync_partner_tenants_bindings") + @api.doc(description="Sync partner tenants bindings") + @api.doc(params={"partner_key": "Partner key"}) + @api.expect( + api.model( + "SyncPartnerTenantsBindingsRequest", + {"click_id": fields.String(required=True, description="Click Id from partner referral link")}, + ) + ) + @api.response(200, "Tenants synced to partner successfully") + @api.response(400, "Invalid partner information") + @setup_required + @login_required + @account_initialization_required + @only_edition_cloud + def put(self, partner_key: str): + current_user, _ = current_account_with_tenant() + parser = reqparse.RequestParser().add_argument("click_id", required=True, type=str, location="json") + args = parser.parse_args() + + try: + click_id = args["click_id"] + decoded_partner_key = base64.b64decode(partner_key).decode("utf-8") + except Exception: + raise BadRequest("Invalid partner_key") + + if not click_id or not decoded_partner_key or not current_user.id: + raise BadRequest("Invalid partner information") + + return BillingService.sync_partner_tenants_bindings(current_user.id, decoded_partner_key, click_id) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 50bf48450c..3aac571300 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -15,6 +15,7 @@ from controllers.console.wraps import ( account_initialization_required, cloud_edition_billing_rate_limit_check, enterprise_license_required, + is_admin_or_owner_required, setup_required, ) from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError @@ -753,13 +754,11 @@ class DatasetApiKeyApi(Resource): @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required @marshal_with(api_key_fields) def post(self): - # The role of the current user in the ta table must be admin or owner - current_user, current_tenant_id = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() + _, current_tenant_id = current_account_with_tenant() current_key_count = ( db.session.query(ApiToken) @@ -794,15 +793,11 @@ class DatasetApiDeleteApi(Resource): @api.response(204, "API key deleted successfully") @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def delete(self, api_key_id): - current_user, current_tenant_id = current_account_with_tenant() + _, current_tenant_id = current_account_with_tenant() api_key_id = str(api_key_id) - - # The role of the current user in the ta table must be admin or owner - if not current_user.is_admin_or_owner: - raise Forbidden() - key = ( db.session.query(ApiToken) .where( diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index 85fd0535c7..92c85b4951 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -162,6 +162,7 @@ class DatasetDocumentListApi(Resource): "keyword": "Search keyword", "sort": "Sort order (default: -created_at)", "fetch": "Fetch full details (default: false)", + "status": "Filter documents by display status", } ) @api.response(200, "Documents retrieved successfully") @@ -175,6 +176,7 @@ class DatasetDocumentListApi(Resource): limit = request.args.get("limit", default=20, type=int) search = request.args.get("keyword", default=None, type=str) sort = request.args.get("sort", default="-created_at", type=str) + status = request.args.get("status", default=None, type=str) # "yes", "true", "t", "y", "1" convert to True, while others convert to False. try: fetch_val = request.args.get("fetch", default="false") @@ -203,6 +205,9 @@ class DatasetDocumentListApi(Resource): query = select(Document).filter_by(dataset_id=str(dataset_id), tenant_id=current_tenant_id) + if status: + query = DocumentService.apply_display_status_filter(query, status) + if search: search = f"%{search}%" query = query.where(Document.name.like(search)) @@ -746,7 +751,7 @@ class DocumentApi(DocumentResource): "name": document.name, "created_from": document.created_from, "created_by": document.created_by, - "created_at": document.created_at.timestamp(), + "created_at": int(document.created_at.timestamp()), "tokens": document.tokens, "indexing_status": document.indexing_status, "completed_at": int(document.completed_at.timestamp()) if document.completed_at else None, @@ -779,7 +784,7 @@ class DocumentApi(DocumentResource): "name": document.name, "created_from": document.created_from, "created_by": document.created_by, - "created_at": document.created_at.timestamp(), + "created_at": int(document.created_at.timestamp()), "tokens": document.tokens, "indexing_status": document.indexing_status, "completed_at": int(document.completed_at.timestamp()) if document.completed_at else None, diff --git a/api/controllers/console/datasets/external.py b/api/controllers/console/datasets/external.py index 4f738db0e5..fe96a8199a 100644 --- a/api/controllers/console/datasets/external.py +++ b/api/controllers/console/datasets/external.py @@ -5,7 +5,7 @@ from werkzeug.exceptions import Forbidden, InternalServerError, NotFound import services from controllers.console import api, console_ns from controllers.console.datasets.error import DatasetNameDuplicateError -from controllers.console.wraps import account_initialization_required, setup_required +from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required from fields.dataset_fields import dataset_detail_fields from libs.login import current_account_with_tenant, login_required from services.dataset_service import DatasetService @@ -200,12 +200,10 @@ class ExternalDatasetCreateApi(Resource): @setup_required @login_required @account_initialization_required + @edit_permission_required def post(self): # The role of the current user in the ta table must be admin, owner, or editor current_user, current_tenant_id = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - parser = ( reqparse.RequestParser() .add_argument("external_knowledge_api_id", type=str, required=True, nullable=False, location="json") diff --git a/api/controllers/console/datasets/rag_pipeline/datasource_auth.py b/api/controllers/console/datasets/rag_pipeline/datasource_auth.py index 2111ee2ecf..f83ee69beb 100644 --- a/api/controllers/console/datasets/rag_pipeline/datasource_auth.py +++ b/api/controllers/console/datasets/rag_pipeline/datasource_auth.py @@ -3,7 +3,7 @@ from flask_restx import Resource, reqparse from werkzeug.exceptions import Forbidden, NotFound from configs import dify_config -from controllers.console import console_ns +from controllers.console import api, console_ns from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.utils.encoders import jsonable_encoder @@ -121,8 +121,16 @@ class DatasourceOAuthCallback(Resource): return redirect(f"{dify_config.CONSOLE_WEB_URL}/oauth-callback") +parser_datasource = ( + reqparse.RequestParser() + .add_argument("name", type=StrLen(max_length=100), required=False, nullable=True, location="json", default=None) + .add_argument("credentials", type=dict, required=True, nullable=False, location="json") +) + + @console_ns.route("/auth/plugin/datasource/") class DatasourceAuth(Resource): + @api.expect(parser_datasource) @setup_required @login_required @account_initialization_required @@ -130,14 +138,7 @@ class DatasourceAuth(Resource): def post(self, provider_id: str): _, current_tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument( - "name", type=StrLen(max_length=100), required=False, nullable=True, location="json", default=None - ) - .add_argument("credentials", type=dict, required=True, nullable=False, location="json") - ) - args = parser.parse_args() + args = parser_datasource.parse_args() datasource_provider_id = DatasourceProviderID(provider_id) datasource_provider_service = DatasourceProviderService() @@ -168,8 +169,14 @@ class DatasourceAuth(Resource): return {"result": datasources}, 200 +parser_datasource_delete = reqparse.RequestParser().add_argument( + "credential_id", type=str, required=True, nullable=False, location="json" +) + + @console_ns.route("/auth/plugin/datasource//delete") class DatasourceAuthDeleteApi(Resource): + @api.expect(parser_datasource_delete) @setup_required @login_required @account_initialization_required @@ -181,10 +188,7 @@ class DatasourceAuthDeleteApi(Resource): plugin_id = datasource_provider_id.plugin_id provider_name = datasource_provider_id.provider_name - parser = reqparse.RequestParser().add_argument( - "credential_id", type=str, required=True, nullable=False, location="json" - ) - args = parser.parse_args() + args = parser_datasource_delete.parse_args() datasource_provider_service = DatasourceProviderService() datasource_provider_service.remove_datasource_credentials( tenant_id=current_tenant_id, @@ -195,8 +199,17 @@ class DatasourceAuthDeleteApi(Resource): return {"result": "success"}, 200 +parser_datasource_update = ( + reqparse.RequestParser() + .add_argument("credentials", type=dict, required=False, nullable=True, location="json") + .add_argument("name", type=StrLen(max_length=100), required=False, nullable=True, location="json") + .add_argument("credential_id", type=str, required=True, nullable=False, location="json") +) + + @console_ns.route("/auth/plugin/datasource//update") class DatasourceAuthUpdateApi(Resource): + @api.expect(parser_datasource_update) @setup_required @login_required @account_initialization_required @@ -205,13 +218,7 @@ class DatasourceAuthUpdateApi(Resource): _, current_tenant_id = current_account_with_tenant() datasource_provider_id = DatasourceProviderID(provider_id) - parser = ( - reqparse.RequestParser() - .add_argument("credentials", type=dict, required=False, nullable=True, location="json") - .add_argument("name", type=StrLen(max_length=100), required=False, nullable=True, location="json") - .add_argument("credential_id", type=str, required=True, nullable=False, location="json") - ) - args = parser.parse_args() + args = parser_datasource_update.parse_args() datasource_provider_service = DatasourceProviderService() datasource_provider_service.update_datasource_credentials( @@ -251,8 +258,16 @@ class DatasourceHardCodeAuthListApi(Resource): return {"result": jsonable_encoder(datasources)}, 200 +parser_datasource_custom = ( + reqparse.RequestParser() + .add_argument("client_params", type=dict, required=False, nullable=True, location="json") + .add_argument("enable_oauth_custom_client", type=bool, required=False, nullable=True, location="json") +) + + @console_ns.route("/auth/plugin/datasource//custom-client") class DatasourceAuthOauthCustomClient(Resource): + @api.expect(parser_datasource_custom) @setup_required @login_required @account_initialization_required @@ -260,12 +275,7 @@ class DatasourceAuthOauthCustomClient(Resource): def post(self, provider_id: str): _, current_tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("client_params", type=dict, required=False, nullable=True, location="json") - .add_argument("enable_oauth_custom_client", type=bool, required=False, nullable=True, location="json") - ) - args = parser.parse_args() + args = parser_datasource_custom.parse_args() datasource_provider_id = DatasourceProviderID(provider_id) datasource_provider_service = DatasourceProviderService() datasource_provider_service.setup_oauth_custom_client_params( @@ -291,8 +301,12 @@ class DatasourceAuthOauthCustomClient(Resource): return {"result": "success"}, 200 +parser_default = reqparse.RequestParser().add_argument("id", type=str, required=True, nullable=False, location="json") + + @console_ns.route("/auth/plugin/datasource//default") class DatasourceAuthDefaultApi(Resource): + @api.expect(parser_default) @setup_required @login_required @account_initialization_required @@ -300,8 +314,7 @@ class DatasourceAuthDefaultApi(Resource): def post(self, provider_id: str): _, current_tenant_id = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument("id", type=str, required=True, nullable=False, location="json") - args = parser.parse_args() + args = parser_default.parse_args() datasource_provider_id = DatasourceProviderID(provider_id) datasource_provider_service = DatasourceProviderService() datasource_provider_service.set_default_datasource_provider( @@ -312,8 +325,16 @@ class DatasourceAuthDefaultApi(Resource): return {"result": "success"}, 200 +parser_update_name = ( + reqparse.RequestParser() + .add_argument("name", type=StrLen(max_length=100), required=True, nullable=False, location="json") + .add_argument("credential_id", type=str, required=True, nullable=False, location="json") +) + + @console_ns.route("/auth/plugin/datasource//update-name") class DatasourceUpdateProviderNameApi(Resource): + @api.expect(parser_update_name) @setup_required @login_required @account_initialization_required @@ -321,12 +342,7 @@ class DatasourceUpdateProviderNameApi(Resource): def post(self, provider_id: str): _, current_tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("name", type=StrLen(max_length=100), required=True, nullable=False, location="json") - .add_argument("credential_id", type=str, required=True, nullable=False, location="json") - ) - args = parser.parse_args() + args = parser_update_name.parse_args() datasource_provider_id = DatasourceProviderID(provider_id) datasource_provider_service = DatasourceProviderService() datasource_provider_service.update_datasource_provider_name( diff --git a/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py b/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py index 856e4a1c70..5e3b3428eb 100644 --- a/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py +++ b/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py @@ -1,10 +1,10 @@ from flask_restx import ( # type: ignore Resource, # type: ignore - reqparse, ) +from pydantic import BaseModel from werkzeug.exceptions import Forbidden -from controllers.console import console_ns +from controllers.console import api, console_ns from controllers.console.datasets.wraps import get_rag_pipeline from controllers.console.wraps import account_initialization_required, setup_required from libs.login import current_user, login_required @@ -12,9 +12,21 @@ from models import Account from models.dataset import Pipeline from services.rag_pipeline.rag_pipeline import RagPipelineService +DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}" + + +class Parser(BaseModel): + inputs: dict + datasource_type: str + credential_id: str | None = None + + +console_ns.schema_model(Parser.__name__, Parser.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)) + @console_ns.route("/rag/pipelines//workflows/published/datasource/nodes//preview") class DataSourceContentPreviewApi(Resource): + @api.expect(console_ns.models[Parser.__name__], validate=True) @setup_required @login_required @account_initialization_required @@ -26,21 +38,10 @@ class DataSourceContentPreviewApi(Resource): if not isinstance(current_user, Account): raise Forbidden() - parser = ( - reqparse.RequestParser() - .add_argument("inputs", type=dict, required=True, nullable=False, location="json") - .add_argument("datasource_type", type=str, required=True, location="json") - .add_argument("credential_id", type=str, required=False, location="json") - ) - args = parser.parse_args() - - inputs = args.get("inputs") - if inputs is None: - raise ValueError("missing inputs") - datasource_type = args.get("datasource_type") - if datasource_type is None: - raise ValueError("missing datasource_type") + args = Parser.model_validate(api.payload) + inputs = args.inputs + datasource_type = args.datasource_type rag_pipeline_service = RagPipelineService() preview_content = rag_pipeline_service.run_datasource_node_preview( pipeline=pipeline, @@ -49,6 +50,6 @@ class DataSourceContentPreviewApi(Resource): account=current_user, datasource_type=datasource_type, is_published=True, - credential_id=args.get("credential_id"), + credential_id=args.credential_id, ) return preview_content, 200 diff --git a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py index 2c28120e65..d658d65b71 100644 --- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py +++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py @@ -1,11 +1,11 @@ from flask_restx import Resource, marshal_with, reqparse # type: ignore from sqlalchemy.orm import Session -from werkzeug.exceptions import Forbidden from controllers.console import console_ns from controllers.console.datasets.wraps import get_rag_pipeline from controllers.console.wraps import ( account_initialization_required, + edit_permission_required, setup_required, ) from extensions.ext_database import db @@ -21,12 +21,11 @@ class RagPipelineImportApi(Resource): @setup_required @login_required @account_initialization_required + @edit_permission_required @marshal_with(pipeline_import_fields) def post(self): # Check user role first current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() parser = ( reqparse.RequestParser() @@ -71,12 +70,10 @@ class RagPipelineImportConfirmApi(Resource): @setup_required @login_required @account_initialization_required + @edit_permission_required @marshal_with(pipeline_import_fields) def post(self, import_id): current_user, _ = current_account_with_tenant() - # Check user role first - if not current_user.has_edit_permission: - raise Forbidden() # Create service with session with Session(db.engine) as session: @@ -98,12 +95,9 @@ class RagPipelineImportCheckDependenciesApi(Resource): @login_required @get_rag_pipeline @account_initialization_required + @edit_permission_required @marshal_with(pipeline_import_check_dependencies_fields) def get(self, pipeline: Pipeline): - current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - with Session(db.engine) as session: import_service = RagPipelineDslService(session) result = import_service.check_dependencies(pipeline=pipeline) @@ -117,12 +111,9 @@ class RagPipelineExportApi(Resource): @login_required @get_rag_pipeline @account_initialization_required + @edit_permission_required def get(self, pipeline: Pipeline): - current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - - # Add include_secret params + # Add include_secret params parser = reqparse.RequestParser().add_argument("include_secret", type=str, default="false", location="args") args = parser.parse_args() diff --git a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py index 5fe8572dfa..bc8d4fbf81 100644 --- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py +++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py @@ -9,7 +9,7 @@ from sqlalchemy.orm import Session from werkzeug.exceptions import Forbidden, InternalServerError, NotFound import services -from controllers.console import console_ns +from controllers.console import api, console_ns from controllers.console.app.error import ( ConversationCompletedError, DraftWorkflowNotExist, @@ -148,8 +148,12 @@ class DraftRagPipelineApi(Resource): } +parser_run = reqparse.RequestParser().add_argument("inputs", type=dict, location="json") + + @console_ns.route("/rag/pipelines//workflows/draft/iteration/nodes//run") class RagPipelineDraftRunIterationNodeApi(Resource): + @api.expect(parser_run) @setup_required @login_required @account_initialization_required @@ -162,8 +166,7 @@ class RagPipelineDraftRunIterationNodeApi(Resource): # The role of the current user in the ta table must be admin, owner, or editor current_user, _ = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument("inputs", type=dict, location="json") - args = parser.parse_args() + args = parser_run.parse_args() try: response = PipelineGenerateService.generate_single_iteration( @@ -184,9 +187,11 @@ class RagPipelineDraftRunIterationNodeApi(Resource): @console_ns.route("/rag/pipelines//workflows/draft/loop/nodes//run") class RagPipelineDraftRunLoopNodeApi(Resource): + @api.expect(parser_run) @setup_required @login_required @account_initialization_required + @edit_permission_required @get_rag_pipeline def post(self, pipeline: Pipeline, node_id: str): """ @@ -194,11 +199,8 @@ class RagPipelineDraftRunLoopNodeApi(Resource): """ # The role of the current user in the ta table must be admin, owner, or editor current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - parser = reqparse.RequestParser().add_argument("inputs", type=dict, location="json") - args = parser.parse_args() + args = parser_run.parse_args() try: response = PipelineGenerateService.generate_single_loop( @@ -217,11 +219,22 @@ class RagPipelineDraftRunLoopNodeApi(Resource): raise InternalServerError() +parser_draft_run = ( + reqparse.RequestParser() + .add_argument("inputs", type=dict, required=True, nullable=False, location="json") + .add_argument("datasource_type", type=str, required=True, location="json") + .add_argument("datasource_info_list", type=list, required=True, location="json") + .add_argument("start_node_id", type=str, required=True, location="json") +) + + @console_ns.route("/rag/pipelines//workflows/draft/run") class DraftRagPipelineRunApi(Resource): + @api.expect(parser_draft_run) @setup_required @login_required @account_initialization_required + @edit_permission_required @get_rag_pipeline def post(self, pipeline: Pipeline): """ @@ -229,17 +242,8 @@ class DraftRagPipelineRunApi(Resource): """ # The role of the current user in the ta table must be admin, owner, or editor current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - parser = ( - reqparse.RequestParser() - .add_argument("inputs", type=dict, required=True, nullable=False, location="json") - .add_argument("datasource_type", type=str, required=True, location="json") - .add_argument("datasource_info_list", type=list, required=True, location="json") - .add_argument("start_node_id", type=str, required=True, location="json") - ) - args = parser.parse_args() + args = parser_draft_run.parse_args() try: response = PipelineGenerateService.generate( @@ -255,11 +259,25 @@ class DraftRagPipelineRunApi(Resource): raise InvokeRateLimitHttpError(ex.description) +parser_published_run = ( + reqparse.RequestParser() + .add_argument("inputs", type=dict, required=True, nullable=False, location="json") + .add_argument("datasource_type", type=str, required=True, location="json") + .add_argument("datasource_info_list", type=list, required=True, location="json") + .add_argument("start_node_id", type=str, required=True, location="json") + .add_argument("is_preview", type=bool, required=True, location="json", default=False) + .add_argument("response_mode", type=str, required=True, location="json", default="streaming") + .add_argument("original_document_id", type=str, required=False, location="json") +) + + @console_ns.route("/rag/pipelines//workflows/published/run") class PublishedRagPipelineRunApi(Resource): + @api.expect(parser_published_run) @setup_required @login_required @account_initialization_required + @edit_permission_required @get_rag_pipeline def post(self, pipeline: Pipeline): """ @@ -267,20 +285,8 @@ class PublishedRagPipelineRunApi(Resource): """ # The role of the current user in the ta table must be admin, owner, or editor current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - parser = ( - reqparse.RequestParser() - .add_argument("inputs", type=dict, required=True, nullable=False, location="json") - .add_argument("datasource_type", type=str, required=True, location="json") - .add_argument("datasource_info_list", type=list, required=True, location="json") - .add_argument("start_node_id", type=str, required=True, location="json") - .add_argument("is_preview", type=bool, required=True, location="json", default=False) - .add_argument("response_mode", type=str, required=True, location="json", default="streaming") - .add_argument("original_document_id", type=str, required=False, location="json") - ) - args = parser.parse_args() + args = parser_published_run.parse_args() streaming = args["response_mode"] == "streaming" @@ -381,11 +387,21 @@ class PublishedRagPipelineRunApi(Resource): # # return result # +parser_rag_run = ( + reqparse.RequestParser() + .add_argument("inputs", type=dict, required=True, nullable=False, location="json") + .add_argument("datasource_type", type=str, required=True, location="json") + .add_argument("credential_id", type=str, required=False, location="json") +) + + @console_ns.route("/rag/pipelines//workflows/published/datasource/nodes//run") class RagPipelinePublishedDatasourceNodeRunApi(Resource): + @api.expect(parser_rag_run) @setup_required @login_required @account_initialization_required + @edit_permission_required @get_rag_pipeline def post(self, pipeline: Pipeline, node_id: str): """ @@ -393,16 +409,8 @@ class RagPipelinePublishedDatasourceNodeRunApi(Resource): """ # The role of the current user in the ta table must be admin, owner, or editor current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - parser = ( - reqparse.RequestParser() - .add_argument("inputs", type=dict, required=True, nullable=False, location="json") - .add_argument("datasource_type", type=str, required=True, location="json") - .add_argument("credential_id", type=str, required=False, location="json") - ) - args = parser.parse_args() + args = parser_rag_run.parse_args() inputs = args.get("inputs") if inputs is None: @@ -429,8 +437,10 @@ class RagPipelinePublishedDatasourceNodeRunApi(Resource): @console_ns.route("/rag/pipelines//workflows/draft/datasource/nodes//run") class RagPipelineDraftDatasourceNodeRunApi(Resource): + @api.expect(parser_rag_run) @setup_required @login_required + @edit_permission_required @account_initialization_required @get_rag_pipeline def post(self, pipeline: Pipeline, node_id: str): @@ -439,16 +449,8 @@ class RagPipelineDraftDatasourceNodeRunApi(Resource): """ # The role of the current user in the ta table must be admin, owner, or editor current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - parser = ( - reqparse.RequestParser() - .add_argument("inputs", type=dict, required=True, nullable=False, location="json") - .add_argument("datasource_type", type=str, required=True, location="json") - .add_argument("credential_id", type=str, required=False, location="json") - ) - args = parser.parse_args() + args = parser_rag_run.parse_args() inputs = args.get("inputs") if inputs is None: @@ -473,10 +475,17 @@ class RagPipelineDraftDatasourceNodeRunApi(Resource): ) +parser_run_api = reqparse.RequestParser().add_argument( + "inputs", type=dict, required=True, nullable=False, location="json" +) + + @console_ns.route("/rag/pipelines//workflows/draft/nodes//run") class RagPipelineDraftNodeRunApi(Resource): + @api.expect(parser_run_api) @setup_required @login_required + @edit_permission_required @account_initialization_required @get_rag_pipeline @marshal_with(workflow_run_node_execution_fields) @@ -486,13 +495,8 @@ class RagPipelineDraftNodeRunApi(Resource): """ # The role of the current user in the ta table must be admin, owner, or editor current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - parser = reqparse.RequestParser().add_argument( - "inputs", type=dict, required=True, nullable=False, location="json" - ) - args = parser.parse_args() + args = parser_run_api.parse_args() inputs = args.get("inputs") if inputs == None: @@ -513,6 +517,7 @@ class RagPipelineDraftNodeRunApi(Resource): class RagPipelineTaskStopApi(Resource): @setup_required @login_required + @edit_permission_required @account_initialization_required @get_rag_pipeline def post(self, pipeline: Pipeline, task_id: str): @@ -521,8 +526,6 @@ class RagPipelineTaskStopApi(Resource): """ # The role of the current user in the ta table must be admin, owner, or editor current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() AppQueueManager.set_stop_flag(task_id, InvokeFrom.DEBUGGER, current_user.id) @@ -534,6 +537,7 @@ class PublishedRagPipelineApi(Resource): @setup_required @login_required @account_initialization_required + @edit_permission_required @get_rag_pipeline @marshal_with(workflow_fields) def get(self, pipeline: Pipeline): @@ -541,9 +545,6 @@ class PublishedRagPipelineApi(Resource): Get published pipeline """ # The role of the current user in the ta table must be admin, owner, or editor - current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() if not pipeline.is_published: return None # fetch published workflow by pipeline @@ -556,6 +557,7 @@ class PublishedRagPipelineApi(Resource): @setup_required @login_required @account_initialization_required + @edit_permission_required @get_rag_pipeline def post(self, pipeline: Pipeline): """ @@ -563,9 +565,6 @@ class PublishedRagPipelineApi(Resource): """ # The role of the current user in the ta table must be admin, owner, or editor current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - rag_pipeline_service = RagPipelineService() with Session(db.engine) as session: pipeline = session.merge(pipeline) @@ -592,38 +591,33 @@ class DefaultRagPipelineBlockConfigsApi(Resource): @setup_required @login_required @account_initialization_required + @edit_permission_required @get_rag_pipeline def get(self, pipeline: Pipeline): """ Get default block config """ - # The role of the current user in the ta table must be admin, owner, or editor - current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - # Get default block configs rag_pipeline_service = RagPipelineService() return rag_pipeline_service.get_default_block_configs() +parser_default = reqparse.RequestParser().add_argument("q", type=str, location="args") + + @console_ns.route("/rag/pipelines//workflows/default-workflow-block-configs/") class DefaultRagPipelineBlockConfigApi(Resource): + @api.expect(parser_default) @setup_required @login_required @account_initialization_required + @edit_permission_required @get_rag_pipeline def get(self, pipeline: Pipeline, block_type: str): """ Get default block config """ - # The role of the current user in the ta table must be admin, owner, or editor - current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - - parser = reqparse.RequestParser().add_argument("q", type=str, location="args") - args = parser.parse_args() + args = parser_default.parse_args() q = args.get("q") @@ -639,11 +633,22 @@ class DefaultRagPipelineBlockConfigApi(Resource): return rag_pipeline_service.get_default_block_config(node_type=block_type, filters=filters) +parser_wf = ( + reqparse.RequestParser() + .add_argument("page", type=inputs.int_range(1, 99999), required=False, default=1, location="args") + .add_argument("limit", type=inputs.int_range(1, 100), required=False, default=10, location="args") + .add_argument("user_id", type=str, required=False, location="args") + .add_argument("named_only", type=inputs.boolean, required=False, default=False, location="args") +) + + @console_ns.route("/rag/pipelines//workflows") class PublishedAllRagPipelineApi(Resource): + @api.expect(parser_wf) @setup_required @login_required @account_initialization_required + @edit_permission_required @get_rag_pipeline @marshal_with(workflow_pagination_fields) def get(self, pipeline: Pipeline): @@ -651,19 +656,10 @@ class PublishedAllRagPipelineApi(Resource): Get published workflows """ current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - parser = ( - reqparse.RequestParser() - .add_argument("page", type=inputs.int_range(1, 99999), required=False, default=1, location="args") - .add_argument("limit", type=inputs.int_range(1, 100), required=False, default=20, location="args") - .add_argument("user_id", type=str, required=False, location="args") - .add_argument("named_only", type=inputs.boolean, required=False, default=False, location="args") - ) - args = parser.parse_args() - page = int(args.get("page", 1)) - limit = int(args.get("limit", 10)) + args = parser_wf.parse_args() + page = args["page"] + limit = args["limit"] user_id = args.get("user_id") named_only = args.get("named_only", False) @@ -691,11 +687,20 @@ class PublishedAllRagPipelineApi(Resource): } +parser_wf_id = ( + reqparse.RequestParser() + .add_argument("marked_name", type=str, required=False, location="json") + .add_argument("marked_comment", type=str, required=False, location="json") +) + + @console_ns.route("/rag/pipelines//workflows/") class RagPipelineByIdApi(Resource): + @api.expect(parser_wf_id) @setup_required @login_required @account_initialization_required + @edit_permission_required @get_rag_pipeline @marshal_with(workflow_fields) def patch(self, pipeline: Pipeline, workflow_id: str): @@ -704,22 +709,14 @@ class RagPipelineByIdApi(Resource): """ # Check permission current_user, _ = current_account_with_tenant() - if not current_user.has_edit_permission: - raise Forbidden() - parser = ( - reqparse.RequestParser() - .add_argument("marked_name", type=str, required=False, location="json") - .add_argument("marked_comment", type=str, required=False, location="json") - ) - args = parser.parse_args() + args = parser_wf_id.parse_args() # Validate name and comment length if args.marked_name and len(args.marked_name) > 20: raise ValueError("Marked name cannot exceed 20 characters") if args.marked_comment and len(args.marked_comment) > 100: raise ValueError("Marked comment cannot exceed 100 characters") - args = parser.parse_args() # Prepare update data update_data = {} @@ -752,8 +749,12 @@ class RagPipelineByIdApi(Resource): return workflow +parser_parameters = reqparse.RequestParser().add_argument("node_id", type=str, required=True, location="args") + + @console_ns.route("/rag/pipelines//workflows/published/processing/parameters") class PublishedRagPipelineSecondStepApi(Resource): + @api.expect(parser_parameters) @setup_required @login_required @account_initialization_required @@ -763,8 +764,7 @@ class PublishedRagPipelineSecondStepApi(Resource): """ Get second step parameters of rag pipeline """ - parser = reqparse.RequestParser().add_argument("node_id", type=str, required=True, location="args") - args = parser.parse_args() + args = parser_parameters.parse_args() node_id = args.get("node_id") if not node_id: raise ValueError("Node ID is required") @@ -777,6 +777,7 @@ class PublishedRagPipelineSecondStepApi(Resource): @console_ns.route("/rag/pipelines//workflows/published/pre-processing/parameters") class PublishedRagPipelineFirstStepApi(Resource): + @api.expect(parser_parameters) @setup_required @login_required @account_initialization_required @@ -786,8 +787,7 @@ class PublishedRagPipelineFirstStepApi(Resource): """ Get first step parameters of rag pipeline """ - parser = reqparse.RequestParser().add_argument("node_id", type=str, required=True, location="args") - args = parser.parse_args() + args = parser_parameters.parse_args() node_id = args.get("node_id") if not node_id: raise ValueError("Node ID is required") @@ -800,6 +800,7 @@ class PublishedRagPipelineFirstStepApi(Resource): @console_ns.route("/rag/pipelines//workflows/draft/pre-processing/parameters") class DraftRagPipelineFirstStepApi(Resource): + @api.expect(parser_parameters) @setup_required @login_required @account_initialization_required @@ -809,8 +810,7 @@ class DraftRagPipelineFirstStepApi(Resource): """ Get first step parameters of rag pipeline """ - parser = reqparse.RequestParser().add_argument("node_id", type=str, required=True, location="args") - args = parser.parse_args() + args = parser_parameters.parse_args() node_id = args.get("node_id") if not node_id: raise ValueError("Node ID is required") @@ -823,6 +823,7 @@ class DraftRagPipelineFirstStepApi(Resource): @console_ns.route("/rag/pipelines//workflows/draft/processing/parameters") class DraftRagPipelineSecondStepApi(Resource): + @api.expect(parser_parameters) @setup_required @login_required @account_initialization_required @@ -832,8 +833,7 @@ class DraftRagPipelineSecondStepApi(Resource): """ Get second step parameters of rag pipeline """ - parser = reqparse.RequestParser().add_argument("node_id", type=str, required=True, location="args") - args = parser.parse_args() + args = parser_parameters.parse_args() node_id = args.get("node_id") if not node_id: raise ValueError("Node ID is required") @@ -845,8 +845,16 @@ class DraftRagPipelineSecondStepApi(Resource): } +parser_wf_run = ( + reqparse.RequestParser() + .add_argument("last_id", type=uuid_value, location="args") + .add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args") +) + + @console_ns.route("/rag/pipelines//workflow-runs") class RagPipelineWorkflowRunListApi(Resource): + @api.expect(parser_wf_run) @setup_required @login_required @account_initialization_required @@ -856,12 +864,7 @@ class RagPipelineWorkflowRunListApi(Resource): """ Get workflow run list """ - parser = ( - reqparse.RequestParser() - .add_argument("last_id", type=uuid_value, location="args") - .add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args") - ) - args = parser.parse_args() + args = parser_wf_run.parse_args() rag_pipeline_service = RagPipelineService() result = rag_pipeline_service.get_rag_pipeline_paginate_workflow_runs(pipeline=pipeline, args=args) @@ -961,8 +964,18 @@ class RagPipelineTransformApi(Resource): return result +parser_var = ( + reqparse.RequestParser() + .add_argument("datasource_type", type=str, required=True, location="json") + .add_argument("datasource_info", type=dict, required=True, location="json") + .add_argument("start_node_id", type=str, required=True, location="json") + .add_argument("start_node_title", type=str, required=True, location="json") +) + + @console_ns.route("/rag/pipelines//workflows/draft/datasource/variables-inspect") class RagPipelineDatasourceVariableApi(Resource): + @api.expect(parser_var) @setup_required @login_required @account_initialization_required @@ -974,14 +987,7 @@ class RagPipelineDatasourceVariableApi(Resource): Set datasource variables """ current_user, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("datasource_type", type=str, required=True, location="json") - .add_argument("datasource_info", type=dict, required=True, location="json") - .add_argument("start_node_id", type=str, required=True, location="json") - .add_argument("start_node_title", type=str, required=True, location="json") - ) - args = parser.parse_args() + args = parser_var.parse_args() rag_pipeline_service = RagPipelineService() workflow_node_execution = rag_pipeline_service.set_datasource_variables( diff --git a/api/controllers/console/explore/recommended_app.py b/api/controllers/console/explore/recommended_app.py index 751012757a..11c7a1bc18 100644 --- a/api/controllers/console/explore/recommended_app.py +++ b/api/controllers/console/explore/recommended_app.py @@ -1,7 +1,7 @@ from flask_restx import Resource, fields, marshal_with, reqparse from constants.languages import languages -from controllers.console import console_ns +from controllers.console import api, console_ns from controllers.console.wraps import account_initialization_required from libs.helper import AppIconUrlField from libs.login import current_user, login_required @@ -35,15 +35,18 @@ recommended_app_list_fields = { } +parser_apps = reqparse.RequestParser().add_argument("language", type=str, location="args") + + @console_ns.route("/explore/apps") class RecommendedAppListApi(Resource): + @api.expect(parser_apps) @login_required @account_initialization_required @marshal_with(recommended_app_list_fields) def get(self): # language args - parser = reqparse.RequestParser().add_argument("language", type=str, location="args") - args = parser.parse_args() + args = parser_apps.parse_args() language = args.get("language") if language and language in languages: diff --git a/api/controllers/console/extension.py b/api/controllers/console/extension.py index 4e1a8aeb3e..a1d36def0d 100644 --- a/api/controllers/console/extension.py +++ b/api/controllers/console/extension.py @@ -66,13 +66,7 @@ class APIBasedExtensionAPI(Resource): @account_initialization_required @marshal_with(api_based_extension_fields) def post(self): - parser = ( - reqparse.RequestParser() - .add_argument("name", type=str, required=True, location="json") - .add_argument("api_endpoint", type=str, required=True, location="json") - .add_argument("api_key", type=str, required=True, location="json") - ) - args = parser.parse_args() + args = api.payload _, current_tenant_id = current_account_with_tenant() extension_data = APIBasedExtension( @@ -125,13 +119,7 @@ class APIBasedExtensionDetailAPI(Resource): extension_data_from_db = APIBasedExtensionService.get_with_tenant_id(current_tenant_id, api_based_extension_id) - parser = ( - reqparse.RequestParser() - .add_argument("name", type=str, required=True, location="json") - .add_argument("api_endpoint", type=str, required=True, location="json") - .add_argument("api_key", type=str, required=True, location="json") - ) - args = parser.parse_args() + args = api.payload extension_data_from_db.name = args["name"] extension_data_from_db.api_endpoint = args["api_endpoint"] diff --git a/api/controllers/console/files.py b/api/controllers/console/files.py index 1cd193f7ad..fdd7c2f479 100644 --- a/api/controllers/console/files.py +++ b/api/controllers/console/files.py @@ -8,6 +8,7 @@ import services from configs import dify_config from constants import DOCUMENT_EXTENSIONS from controllers.common.errors import ( + BlockedFileExtensionError, FilenameNotExistsError, FileTooLargeError, NoFileUploadedError, @@ -39,6 +40,7 @@ class FileApi(Resource): return { "file_size_limit": dify_config.UPLOAD_FILE_SIZE_LIMIT, "batch_count_limit": dify_config.UPLOAD_FILE_BATCH_LIMIT, + "file_upload_limit": dify_config.BATCH_UPLOAD_LIMIT, "image_file_size_limit": dify_config.UPLOAD_IMAGE_FILE_SIZE_LIMIT, "video_file_size_limit": dify_config.UPLOAD_VIDEO_FILE_SIZE_LIMIT, "audio_file_size_limit": dify_config.UPLOAD_AUDIO_FILE_SIZE_LIMIT, @@ -82,6 +84,8 @@ class FileApi(Resource): raise FileTooLargeError(file_too_large_error.description) except services.errors.file.UnsupportedFileTypeError: raise UnsupportedFileTypeError() + except services.errors.file.BlockedFileExtensionError as blocked_extension_error: + raise BlockedFileExtensionError(blocked_extension_error.description) return upload_file, 201 diff --git a/api/controllers/console/remote_files.py b/api/controllers/console/remote_files.py index 96c86dc0db..47c7ecde9a 100644 --- a/api/controllers/console/remote_files.py +++ b/api/controllers/console/remote_files.py @@ -10,6 +10,7 @@ from controllers.common.errors import ( RemoteFileUploadError, UnsupportedFileTypeError, ) +from controllers.console import api from core.file import helpers as file_helpers from core.helper import ssrf_proxy from extensions.ext_database import db @@ -36,12 +37,15 @@ class RemoteFileInfoApi(Resource): } +parser_upload = reqparse.RequestParser().add_argument("url", type=str, required=True, help="URL is required") + + @console_ns.route("/remote-files/upload") class RemoteFileUploadApi(Resource): + @api.expect(parser_upload) @marshal_with(file_fields_with_signed_url) def post(self): - parser = reqparse.RequestParser().add_argument("url", type=str, required=True, help="URL is required") - args = parser.parse_args() + args = parser_upload.parse_args() url = args["url"] diff --git a/api/controllers/console/setup.py b/api/controllers/console/setup.py index 1200349e2d..22929c851e 100644 --- a/api/controllers/console/setup.py +++ b/api/controllers/console/setup.py @@ -49,6 +49,7 @@ class SetupApi(Resource): "email": fields.String(required=True, description="Admin email address"), "name": fields.String(required=True, description="Admin name (max 30 characters)"), "password": fields.String(required=True, description="Admin password"), + "language": fields.String(required=False, description="Admin language"), }, ) ) diff --git a/api/controllers/console/tag/tags.py b/api/controllers/console/tag/tags.py index 40ae7fb4d0..ee032756eb 100644 --- a/api/controllers/console/tag/tags.py +++ b/api/controllers/console/tag/tags.py @@ -2,8 +2,8 @@ from flask import request from flask_restx import Resource, marshal_with, reqparse from werkzeug.exceptions import Forbidden -from controllers.console import console_ns -from controllers.console.wraps import account_initialization_required, setup_required +from controllers.console import api, console_ns +from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required from fields.tag_fields import dataset_tag_fields from libs.login import current_account_with_tenant, login_required from models.model import Tag @@ -16,6 +16,19 @@ def _validate_name(name): return name +parser_tags = ( + reqparse.RequestParser() + .add_argument( + "name", + nullable=False, + required=True, + help="Name must be between 1 to 50 characters.", + type=_validate_name, + ) + .add_argument("type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type.") +) + + @console_ns.route("/tags") class TagListApi(Resource): @setup_required @@ -30,6 +43,7 @@ class TagListApi(Resource): return tags, 200 + @api.expect(parser_tags) @setup_required @login_required @account_initialization_required @@ -39,20 +53,7 @@ class TagListApi(Resource): if not (current_user.has_edit_permission or current_user.is_dataset_editor): raise Forbidden() - parser = ( - reqparse.RequestParser() - .add_argument( - "name", - nullable=False, - required=True, - help="Name must be between 1 to 50 characters.", - type=_validate_name, - ) - .add_argument( - "type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type." - ) - ) - args = parser.parse_args() + args = parser_tags.parse_args() tag = TagService.save_tags(args) response = {"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": 0} @@ -60,8 +61,14 @@ class TagListApi(Resource): return response, 200 +parser_tag_id = reqparse.RequestParser().add_argument( + "name", nullable=False, required=True, help="Name must be between 1 to 50 characters.", type=_validate_name +) + + @console_ns.route("/tags/") class TagUpdateDeleteApi(Resource): + @api.expect(parser_tag_id) @setup_required @login_required @account_initialization_required @@ -72,10 +79,7 @@ class TagUpdateDeleteApi(Resource): if not (current_user.has_edit_permission or current_user.is_dataset_editor): raise Forbidden() - parser = reqparse.RequestParser().add_argument( - "name", nullable=False, required=True, help="Name must be between 1 to 50 characters.", type=_validate_name - ) - args = parser.parse_args() + args = parser_tag_id.parse_args() tag = TagService.update_tags(args, tag_id) binding_count = TagService.get_tag_binding_count(tag_id) @@ -87,20 +91,26 @@ class TagUpdateDeleteApi(Resource): @setup_required @login_required @account_initialization_required + @edit_permission_required def delete(self, tag_id): - current_user, _ = current_account_with_tenant() tag_id = str(tag_id) - # The role of the current user in the ta table must be admin, owner, or editor - if not current_user.has_edit_permission: - raise Forbidden() TagService.delete_tag(tag_id) return 204 +parser_create = ( + reqparse.RequestParser() + .add_argument("tag_ids", type=list, nullable=False, required=True, location="json", help="Tag IDs is required.") + .add_argument("target_id", type=str, nullable=False, required=True, location="json", help="Target ID is required.") + .add_argument("type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type.") +) + + @console_ns.route("/tag-bindings/create") class TagBindingCreateApi(Resource): + @api.expect(parser_create) @setup_required @login_required @account_initialization_required @@ -110,26 +120,23 @@ class TagBindingCreateApi(Resource): if not (current_user.has_edit_permission or current_user.is_dataset_editor): raise Forbidden() - parser = ( - reqparse.RequestParser() - .add_argument( - "tag_ids", type=list, nullable=False, required=True, location="json", help="Tag IDs is required." - ) - .add_argument( - "target_id", type=str, nullable=False, required=True, location="json", help="Target ID is required." - ) - .add_argument( - "type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type." - ) - ) - args = parser.parse_args() + args = parser_create.parse_args() TagService.save_tag_binding(args) return {"result": "success"}, 200 +parser_remove = ( + reqparse.RequestParser() + .add_argument("tag_id", type=str, nullable=False, required=True, help="Tag ID is required.") + .add_argument("target_id", type=str, nullable=False, required=True, help="Target ID is required.") + .add_argument("type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type.") +) + + @console_ns.route("/tag-bindings/remove") class TagBindingDeleteApi(Resource): + @api.expect(parser_remove) @setup_required @login_required @account_initialization_required @@ -139,15 +146,7 @@ class TagBindingDeleteApi(Resource): if not (current_user.has_edit_permission or current_user.is_dataset_editor): raise Forbidden() - parser = ( - reqparse.RequestParser() - .add_argument("tag_id", type=str, nullable=False, required=True, help="Tag ID is required.") - .add_argument("target_id", type=str, nullable=False, required=True, help="Target ID is required.") - .add_argument( - "type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type." - ) - ) - args = parser.parse_args() + args = parser_remove.parse_args() TagService.delete_tag_binding(args) return {"result": "success"}, 200 diff --git a/api/controllers/console/version.py b/api/controllers/console/version.py index 417486f59e..104a205fc8 100644 --- a/api/controllers/console/version.py +++ b/api/controllers/console/version.py @@ -11,16 +11,16 @@ from . import api, console_ns logger = logging.getLogger(__name__) +parser = reqparse.RequestParser().add_argument( + "current_version", type=str, required=True, location="args", help="Current application version" +) + @console_ns.route("/version") class VersionApi(Resource): @api.doc("check_version_update") @api.doc(description="Check for application version updates") - @api.expect( - api.parser().add_argument( - "current_version", type=str, required=True, location="args", help="Current application version" - ) - ) + @api.expect(parser) @api.response( 200, "Success", @@ -37,7 +37,6 @@ class VersionApi(Resource): ) def get(self): """Check for application version updates""" - parser = reqparse.RequestParser().add_argument("current_version", type=str, required=True, location="args") args = parser.parse_args() check_update_url = dify_config.CHECK_UPDATE_URL diff --git a/api/controllers/console/workspace/account.py b/api/controllers/console/workspace/account.py index 499a52370f..0833b39f41 100644 --- a/api/controllers/console/workspace/account.py +++ b/api/controllers/console/workspace/account.py @@ -8,7 +8,7 @@ from sqlalchemy.orm import Session from configs import dify_config from constants.languages import supported_language -from controllers.console import console_ns +from controllers.console import api, console_ns from controllers.console.auth.error import ( EmailAlreadyInUseError, EmailChangeLimitError, @@ -43,8 +43,19 @@ from services.billing_service import BillingService from services.errors.account import CurrentPasswordIncorrectError as ServiceCurrentPasswordIncorrectError +def _init_parser(): + parser = reqparse.RequestParser() + if dify_config.EDITION == "CLOUD": + parser.add_argument("invitation_code", type=str, location="json") + parser.add_argument("interface_language", type=supported_language, required=True, location="json").add_argument( + "timezone", type=timezone, required=True, location="json" + ) + return parser + + @console_ns.route("/account/init") class AccountInitApi(Resource): + @api.expect(_init_parser()) @setup_required @login_required def post(self): @@ -53,14 +64,7 @@ class AccountInitApi(Resource): if account.status == "active": raise AccountAlreadyInitedError() - parser = reqparse.RequestParser() - - if dify_config.EDITION == "CLOUD": - parser.add_argument("invitation_code", type=str, location="json") - parser.add_argument("interface_language", type=supported_language, required=True, location="json").add_argument( - "timezone", type=timezone, required=True, location="json" - ) - args = parser.parse_args() + args = _init_parser().parse_args() if dify_config.EDITION == "CLOUD": if not args["invitation_code"]: @@ -106,16 +110,19 @@ class AccountProfileApi(Resource): return current_user +parser_name = reqparse.RequestParser().add_argument("name", type=str, required=True, location="json") + + @console_ns.route("/account/name") class AccountNameApi(Resource): + @api.expect(parser_name) @setup_required @login_required @account_initialization_required @marshal_with(account_fields) def post(self): current_user, _ = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument("name", type=str, required=True, location="json") - args = parser.parse_args() + args = parser_name.parse_args() # Validate account name length if len(args["name"]) < 3 or len(args["name"]) > 30: @@ -126,68 +133,80 @@ class AccountNameApi(Resource): return updated_account +parser_avatar = reqparse.RequestParser().add_argument("avatar", type=str, required=True, location="json") + + @console_ns.route("/account/avatar") class AccountAvatarApi(Resource): + @api.expect(parser_avatar) @setup_required @login_required @account_initialization_required @marshal_with(account_fields) def post(self): current_user, _ = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument("avatar", type=str, required=True, location="json") - args = parser.parse_args() + args = parser_avatar.parse_args() updated_account = AccountService.update_account(current_user, avatar=args["avatar"]) return updated_account +parser_interface = reqparse.RequestParser().add_argument( + "interface_language", type=supported_language, required=True, location="json" +) + + @console_ns.route("/account/interface-language") class AccountInterfaceLanguageApi(Resource): + @api.expect(parser_interface) @setup_required @login_required @account_initialization_required @marshal_with(account_fields) def post(self): current_user, _ = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument( - "interface_language", type=supported_language, required=True, location="json" - ) - args = parser.parse_args() + args = parser_interface.parse_args() updated_account = AccountService.update_account(current_user, interface_language=args["interface_language"]) return updated_account +parser_theme = reqparse.RequestParser().add_argument( + "interface_theme", type=str, choices=["light", "dark"], required=True, location="json" +) + + @console_ns.route("/account/interface-theme") class AccountInterfaceThemeApi(Resource): + @api.expect(parser_theme) @setup_required @login_required @account_initialization_required @marshal_with(account_fields) def post(self): current_user, _ = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument( - "interface_theme", type=str, choices=["light", "dark"], required=True, location="json" - ) - args = parser.parse_args() + args = parser_theme.parse_args() updated_account = AccountService.update_account(current_user, interface_theme=args["interface_theme"]) return updated_account +parser_timezone = reqparse.RequestParser().add_argument("timezone", type=str, required=True, location="json") + + @console_ns.route("/account/timezone") class AccountTimezoneApi(Resource): + @api.expect(parser_timezone) @setup_required @login_required @account_initialization_required @marshal_with(account_fields) def post(self): current_user, _ = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument("timezone", type=str, required=True, location="json") - args = parser.parse_args() + args = parser_timezone.parse_args() # Validate timezone string, e.g. America/New_York, Asia/Shanghai if args["timezone"] not in pytz.all_timezones: @@ -198,21 +217,24 @@ class AccountTimezoneApi(Resource): return updated_account +parser_pw = ( + reqparse.RequestParser() + .add_argument("password", type=str, required=False, location="json") + .add_argument("new_password", type=str, required=True, location="json") + .add_argument("repeat_new_password", type=str, required=True, location="json") +) + + @console_ns.route("/account/password") class AccountPasswordApi(Resource): + @api.expect(parser_pw) @setup_required @login_required @account_initialization_required @marshal_with(account_fields) def post(self): current_user, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("password", type=str, required=False, location="json") - .add_argument("new_password", type=str, required=True, location="json") - .add_argument("repeat_new_password", type=str, required=True, location="json") - ) - args = parser.parse_args() + args = parser_pw.parse_args() if args["new_password"] != args["repeat_new_password"]: raise RepeatPasswordNotMatchError() @@ -294,20 +316,23 @@ class AccountDeleteVerifyApi(Resource): return {"result": "success", "data": token} +parser_delete = ( + reqparse.RequestParser() + .add_argument("token", type=str, required=True, location="json") + .add_argument("code", type=str, required=True, location="json") +) + + @console_ns.route("/account/delete") class AccountDeleteApi(Resource): + @api.expect(parser_delete) @setup_required @login_required @account_initialization_required def post(self): account, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("token", type=str, required=True, location="json") - .add_argument("code", type=str, required=True, location="json") - ) - args = parser.parse_args() + args = parser_delete.parse_args() if not AccountService.verify_account_deletion_code(args["token"], args["code"]): raise InvalidAccountDeletionCodeError() @@ -317,16 +342,19 @@ class AccountDeleteApi(Resource): return {"result": "success"} +parser_feedback = ( + reqparse.RequestParser() + .add_argument("email", type=str, required=True, location="json") + .add_argument("feedback", type=str, required=True, location="json") +) + + @console_ns.route("/account/delete/feedback") class AccountDeleteUpdateFeedbackApi(Resource): + @api.expect(parser_feedback) @setup_required def post(self): - parser = ( - reqparse.RequestParser() - .add_argument("email", type=str, required=True, location="json") - .add_argument("feedback", type=str, required=True, location="json") - ) - args = parser.parse_args() + args = parser_feedback.parse_args() BillingService.update_account_deletion_feedback(args["email"], args["feedback"]) @@ -351,6 +379,14 @@ class EducationVerifyApi(Resource): return BillingService.EducationIdentity.verify(account.id, account.email) +parser_edu = ( + reqparse.RequestParser() + .add_argument("token", type=str, required=True, location="json") + .add_argument("institution", type=str, required=True, location="json") + .add_argument("role", type=str, required=True, location="json") +) + + @console_ns.route("/account/education") class EducationApi(Resource): status_fields = { @@ -360,6 +396,7 @@ class EducationApi(Resource): "allow_refresh": fields.Boolean, } + @api.expect(parser_edu) @setup_required @login_required @account_initialization_required @@ -368,13 +405,7 @@ class EducationApi(Resource): def post(self): account, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("token", type=str, required=True, location="json") - .add_argument("institution", type=str, required=True, location="json") - .add_argument("role", type=str, required=True, location="json") - ) - args = parser.parse_args() + args = parser_edu.parse_args() return BillingService.EducationIdentity.activate(account, args["token"], args["institution"], args["role"]) @@ -394,6 +425,14 @@ class EducationApi(Resource): return res +parser_autocomplete = ( + reqparse.RequestParser() + .add_argument("keywords", type=str, required=True, location="args") + .add_argument("page", type=int, required=False, location="args", default=0) + .add_argument("limit", type=int, required=False, location="args", default=20) +) + + @console_ns.route("/account/education/autocomplete") class EducationAutoCompleteApi(Resource): data_fields = { @@ -402,6 +441,7 @@ class EducationAutoCompleteApi(Resource): "has_next": fields.Boolean, } + @api.expect(parser_autocomplete) @setup_required @login_required @account_initialization_required @@ -409,33 +449,30 @@ class EducationAutoCompleteApi(Resource): @cloud_edition_billing_enabled @marshal_with(data_fields) def get(self): - parser = ( - reqparse.RequestParser() - .add_argument("keywords", type=str, required=True, location="args") - .add_argument("page", type=int, required=False, location="args", default=0) - .add_argument("limit", type=int, required=False, location="args", default=20) - ) - args = parser.parse_args() + args = parser_autocomplete.parse_args() return BillingService.EducationIdentity.autocomplete(args["keywords"], args["page"], args["limit"]) +parser_change_email = ( + reqparse.RequestParser() + .add_argument("email", type=email, required=True, location="json") + .add_argument("language", type=str, required=False, location="json") + .add_argument("phase", type=str, required=False, location="json") + .add_argument("token", type=str, required=False, location="json") +) + + @console_ns.route("/account/change-email") class ChangeEmailSendEmailApi(Resource): + @api.expect(parser_change_email) @enable_change_email @setup_required @login_required @account_initialization_required def post(self): current_user, _ = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("email", type=email, required=True, location="json") - .add_argument("language", type=str, required=False, location="json") - .add_argument("phase", type=str, required=False, location="json") - .add_argument("token", type=str, required=False, location="json") - ) - args = parser.parse_args() + args = parser_change_email.parse_args() ip_address = extract_remote_ip(request) if AccountService.is_email_send_ip_limit(ip_address): @@ -470,20 +507,23 @@ class ChangeEmailSendEmailApi(Resource): return {"result": "success", "data": token} +parser_validity = ( + reqparse.RequestParser() + .add_argument("email", type=email, required=True, location="json") + .add_argument("code", type=str, required=True, location="json") + .add_argument("token", type=str, required=True, nullable=False, location="json") +) + + @console_ns.route("/account/change-email/validity") class ChangeEmailCheckApi(Resource): + @api.expect(parser_validity) @enable_change_email @setup_required @login_required @account_initialization_required def post(self): - parser = ( - reqparse.RequestParser() - .add_argument("email", type=email, required=True, location="json") - .add_argument("code", type=str, required=True, location="json") - .add_argument("token", type=str, required=True, nullable=False, location="json") - ) - args = parser.parse_args() + args = parser_validity.parse_args() user_email = args["email"] @@ -514,20 +554,23 @@ class ChangeEmailCheckApi(Resource): return {"is_valid": True, "email": token_data.get("email"), "token": new_token} +parser_reset = ( + reqparse.RequestParser() + .add_argument("new_email", type=email, required=True, location="json") + .add_argument("token", type=str, required=True, nullable=False, location="json") +) + + @console_ns.route("/account/change-email/reset") class ChangeEmailResetApi(Resource): + @api.expect(parser_reset) @enable_change_email @setup_required @login_required @account_initialization_required @marshal_with(account_fields) def post(self): - parser = ( - reqparse.RequestParser() - .add_argument("new_email", type=email, required=True, location="json") - .add_argument("token", type=str, required=True, nullable=False, location="json") - ) - args = parser.parse_args() + args = parser_reset.parse_args() if AccountService.is_account_in_freeze(args["new_email"]): raise AccountInFreezeError() @@ -555,12 +598,15 @@ class ChangeEmailResetApi(Resource): return updated_account +parser_check = reqparse.RequestParser().add_argument("email", type=email, required=True, location="json") + + @console_ns.route("/account/change-email/check-email-unique") class CheckEmailUnique(Resource): + @api.expect(parser_check) @setup_required def post(self): - parser = reqparse.RequestParser().add_argument("email", type=email, required=True, location="json") - args = parser.parse_args() + args = parser_check.parse_args() if AccountService.is_account_in_freeze(args["email"]): raise AccountInFreezeError() if not AccountService.check_email_unique(args["email"]): diff --git a/api/controllers/console/workspace/endpoint.py b/api/controllers/console/workspace/endpoint.py index d115f62d73..ae870a630e 100644 --- a/api/controllers/console/workspace/endpoint.py +++ b/api/controllers/console/workspace/endpoint.py @@ -1,8 +1,7 @@ from flask_restx import Resource, fields, reqparse -from werkzeug.exceptions import Forbidden from controllers.console import api, console_ns -from controllers.console.wraps import account_initialization_required, setup_required +from controllers.console.wraps import account_initialization_required, is_admin_or_owner_required, setup_required from core.model_runtime.utils.encoders import jsonable_encoder from core.plugin.impl.exc import PluginPermissionDeniedError from libs.login import current_account_with_tenant, login_required @@ -31,11 +30,10 @@ class EndpointCreateApi(Resource): @api.response(403, "Admin privileges required") @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self): user, tenant_id = current_account_with_tenant() - if not user.is_admin_or_owner: - raise Forbidden() parser = ( reqparse.RequestParser() @@ -168,6 +166,7 @@ class EndpointDeleteApi(Resource): @api.response(403, "Admin privileges required") @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self): user, tenant_id = current_account_with_tenant() @@ -175,9 +174,6 @@ class EndpointDeleteApi(Resource): parser = reqparse.RequestParser().add_argument("endpoint_id", type=str, required=True) args = parser.parse_args() - if not user.is_admin_or_owner: - raise Forbidden() - endpoint_id = args["endpoint_id"] return { @@ -207,6 +203,7 @@ class EndpointUpdateApi(Resource): @api.response(403, "Admin privileges required") @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self): user, tenant_id = current_account_with_tenant() @@ -223,9 +220,6 @@ class EndpointUpdateApi(Resource): settings = args["settings"] name = args["name"] - if not user.is_admin_or_owner: - raise Forbidden() - return { "success": EndpointService.update_endpoint( tenant_id=tenant_id, @@ -252,6 +246,7 @@ class EndpointEnableApi(Resource): @api.response(403, "Admin privileges required") @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self): user, tenant_id = current_account_with_tenant() @@ -261,9 +256,6 @@ class EndpointEnableApi(Resource): endpoint_id = args["endpoint_id"] - if not user.is_admin_or_owner: - raise Forbidden() - return { "success": EndpointService.enable_endpoint(tenant_id=tenant_id, user_id=user.id, endpoint_id=endpoint_id) } @@ -284,6 +276,7 @@ class EndpointDisableApi(Resource): @api.response(403, "Admin privileges required") @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self): user, tenant_id = current_account_with_tenant() @@ -293,9 +286,6 @@ class EndpointDisableApi(Resource): endpoint_id = args["endpoint_id"] - if not user.is_admin_or_owner: - raise Forbidden() - return { "success": EndpointService.disable_endpoint(tenant_id=tenant_id, user_id=user.id, endpoint_id=endpoint_id) } diff --git a/api/controllers/console/workspace/members.py b/api/controllers/console/workspace/members.py index d66f861799..3ca453f1da 100644 --- a/api/controllers/console/workspace/members.py +++ b/api/controllers/console/workspace/members.py @@ -5,7 +5,7 @@ from flask_restx import Resource, marshal_with, reqparse import services from configs import dify_config -from controllers.console import console_ns +from controllers.console import api, console_ns from controllers.console.auth.error import ( CannotTransferOwnerToSelfError, EmailCodeError, @@ -48,22 +48,25 @@ class MemberListApi(Resource): return {"result": "success", "accounts": members}, 200 +parser_invite = ( + reqparse.RequestParser() + .add_argument("emails", type=list, required=True, location="json") + .add_argument("role", type=str, required=True, default="admin", location="json") + .add_argument("language", type=str, required=False, location="json") +) + + @console_ns.route("/workspaces/current/members/invite-email") class MemberInviteEmailApi(Resource): """Invite a new member by email.""" + @api.expect(parser_invite) @setup_required @login_required @account_initialization_required @cloud_edition_billing_resource_check("members") def post(self): - parser = ( - reqparse.RequestParser() - .add_argument("emails", type=list, required=True, location="json") - .add_argument("role", type=str, required=True, default="admin", location="json") - .add_argument("language", type=str, required=False, location="json") - ) - args = parser.parse_args() + args = parser_invite.parse_args() invitee_emails = args["emails"] invitee_role = args["role"] @@ -143,16 +146,19 @@ class MemberCancelInviteApi(Resource): }, 200 +parser_update = reqparse.RequestParser().add_argument("role", type=str, required=True, location="json") + + @console_ns.route("/workspaces/current/members//update-role") class MemberUpdateRoleApi(Resource): """Update member role.""" + @api.expect(parser_update) @setup_required @login_required @account_initialization_required def put(self, member_id): - parser = reqparse.RequestParser().add_argument("role", type=str, required=True, location="json") - args = parser.parse_args() + args = parser_update.parse_args() new_role = args["role"] if not TenantAccountRole.is_valid_role(new_role): @@ -191,17 +197,20 @@ class DatasetOperatorMemberListApi(Resource): return {"result": "success", "accounts": members}, 200 +parser_send = reqparse.RequestParser().add_argument("language", type=str, required=False, location="json") + + @console_ns.route("/workspaces/current/members/send-owner-transfer-confirm-email") class SendOwnerTransferEmailApi(Resource): """Send owner transfer email.""" + @api.expect(parser_send) @setup_required @login_required @account_initialization_required @is_allow_transfer_owner def post(self): - parser = reqparse.RequestParser().add_argument("language", type=str, required=False, location="json") - args = parser.parse_args() + args = parser_send.parse_args() ip_address = extract_remote_ip(request) if AccountService.is_email_send_ip_limit(ip_address): raise EmailSendIpLimitError() @@ -229,19 +238,22 @@ class SendOwnerTransferEmailApi(Resource): return {"result": "success", "data": token} +parser_owner = ( + reqparse.RequestParser() + .add_argument("code", type=str, required=True, location="json") + .add_argument("token", type=str, required=True, nullable=False, location="json") +) + + @console_ns.route("/workspaces/current/members/owner-transfer-check") class OwnerTransferCheckApi(Resource): + @api.expect(parser_owner) @setup_required @login_required @account_initialization_required @is_allow_transfer_owner def post(self): - parser = ( - reqparse.RequestParser() - .add_argument("code", type=str, required=True, location="json") - .add_argument("token", type=str, required=True, nullable=False, location="json") - ) - args = parser.parse_args() + args = parser_owner.parse_args() # check if the current user is the owner of the workspace current_user, _ = current_account_with_tenant() if not current_user.current_tenant: @@ -276,17 +288,20 @@ class OwnerTransferCheckApi(Resource): return {"is_valid": True, "email": token_data.get("email"), "token": new_token} +parser_owner_transfer = reqparse.RequestParser().add_argument( + "token", type=str, required=True, nullable=False, location="json" +) + + @console_ns.route("/workspaces/current/members//owner-transfer") class OwnerTransfer(Resource): + @api.expect(parser_owner_transfer) @setup_required @login_required @account_initialization_required @is_allow_transfer_owner def post(self, member_id): - parser = reqparse.RequestParser().add_argument( - "token", type=str, required=True, nullable=False, location="json" - ) - args = parser.parse_args() + args = parser_owner_transfer.parse_args() # check if the current user is the owner of the workspace current_user, _ = current_account_with_tenant() diff --git a/api/controllers/console/workspace/model_providers.py b/api/controllers/console/workspace/model_providers.py index 04db975fc2..05731b3832 100644 --- a/api/controllers/console/workspace/model_providers.py +++ b/api/controllers/console/workspace/model_providers.py @@ -2,10 +2,9 @@ import io from flask import send_file from flask_restx import Resource, reqparse -from werkzeug.exceptions import Forbidden -from controllers.console import console_ns -from controllers.console.wraps import account_initialization_required, setup_required +from controllers.console import api, console_ns +from controllers.console.wraps import account_initialization_required, is_admin_or_owner_required, setup_required from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.utils.encoders import jsonable_encoder @@ -14,9 +13,19 @@ from libs.login import current_account_with_tenant, login_required from services.billing_service import BillingService from services.model_provider_service import ModelProviderService +parser_model = reqparse.RequestParser().add_argument( + "model_type", + type=str, + required=False, + nullable=True, + choices=[mt.value for mt in ModelType], + location="args", +) + @console_ns.route("/workspaces/current/model-providers") class ModelProviderListApi(Resource): + @api.expect(parser_model) @setup_required @login_required @account_initialization_required @@ -24,15 +33,7 @@ class ModelProviderListApi(Resource): _, current_tenant_id = current_account_with_tenant() tenant_id = current_tenant_id - parser = reqparse.RequestParser().add_argument( - "model_type", - type=str, - required=False, - nullable=True, - choices=[mt.value for mt in ModelType], - location="args", - ) - args = parser.parse_args() + args = parser_model.parse_args() model_provider_service = ModelProviderService() provider_list = model_provider_service.get_provider_list(tenant_id=tenant_id, model_type=args.get("model_type")) @@ -40,8 +41,30 @@ class ModelProviderListApi(Resource): return jsonable_encoder({"data": provider_list}) +parser_cred = reqparse.RequestParser().add_argument( + "credential_id", type=uuid_value, required=False, nullable=True, location="args" +) +parser_post_cred = ( + reqparse.RequestParser() + .add_argument("credentials", type=dict, required=True, nullable=False, location="json") + .add_argument("name", type=StrLen(30), required=False, nullable=True, location="json") +) + +parser_put_cred = ( + reqparse.RequestParser() + .add_argument("credential_id", type=uuid_value, required=True, nullable=False, location="json") + .add_argument("credentials", type=dict, required=True, nullable=False, location="json") + .add_argument("name", type=StrLen(30), required=False, nullable=True, location="json") +) + +parser_delete_cred = reqparse.RequestParser().add_argument( + "credential_id", type=uuid_value, required=True, nullable=False, location="json" +) + + @console_ns.route("/workspaces/current/model-providers//credentials") class ModelProviderCredentialApi(Resource): + @api.expect(parser_cred) @setup_required @login_required @account_initialization_required @@ -49,10 +72,7 @@ class ModelProviderCredentialApi(Resource): _, current_tenant_id = current_account_with_tenant() tenant_id = current_tenant_id # if credential_id is not provided, return current used credential - parser = reqparse.RequestParser().add_argument( - "credential_id", type=uuid_value, required=False, nullable=True, location="args" - ) - args = parser.parse_args() + args = parser_cred.parse_args() model_provider_service = ModelProviderService() credentials = model_provider_service.get_provider_credential( @@ -61,20 +81,14 @@ class ModelProviderCredentialApi(Resource): return {"credentials": credentials} + @api.expect(parser_post_cred) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self, provider: str): - current_user, current_tenant_id = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() - - parser = ( - reqparse.RequestParser() - .add_argument("credentials", type=dict, required=True, nullable=False, location="json") - .add_argument("name", type=StrLen(30), required=False, nullable=True, location="json") - ) - args = parser.parse_args() + _, current_tenant_id = current_account_with_tenant() + args = parser_post_cred.parse_args() model_provider_service = ModelProviderService() @@ -90,21 +104,15 @@ class ModelProviderCredentialApi(Resource): return {"result": "success"}, 201 + @api.expect(parser_put_cred) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def put(self, provider: str): - current_user, current_tenant_id = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() + _, current_tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("credential_id", type=uuid_value, required=True, nullable=False, location="json") - .add_argument("credentials", type=dict, required=True, nullable=False, location="json") - .add_argument("name", type=StrLen(30), required=False, nullable=True, location="json") - ) - args = parser.parse_args() + args = parser_put_cred.parse_args() model_provider_service = ModelProviderService() @@ -121,17 +129,14 @@ class ModelProviderCredentialApi(Resource): return {"result": "success"} + @api.expect(parser_delete_cred) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def delete(self, provider: str): - current_user, current_tenant_id = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() - parser = reqparse.RequestParser().add_argument( - "credential_id", type=uuid_value, required=True, nullable=False, location="json" - ) - args = parser.parse_args() + _, current_tenant_id = current_account_with_tenant() + args = parser_delete_cred.parse_args() model_provider_service = ModelProviderService() model_provider_service.remove_provider_credential( @@ -141,19 +146,21 @@ class ModelProviderCredentialApi(Resource): return {"result": "success"}, 204 +parser_switch = reqparse.RequestParser().add_argument( + "credential_id", type=str, required=True, nullable=False, location="json" +) + + @console_ns.route("/workspaces/current/model-providers//credentials/switch") class ModelProviderCredentialSwitchApi(Resource): + @api.expect(parser_switch) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self, provider: str): - current_user, current_tenant_id = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() - parser = reqparse.RequestParser().add_argument( - "credential_id", type=str, required=True, nullable=False, location="json" - ) - args = parser.parse_args() + _, current_tenant_id = current_account_with_tenant() + args = parser_switch.parse_args() service = ModelProviderService() service.switch_active_provider_credential( @@ -164,17 +171,20 @@ class ModelProviderCredentialSwitchApi(Resource): return {"result": "success"} +parser_validate = reqparse.RequestParser().add_argument( + "credentials", type=dict, required=True, nullable=False, location="json" +) + + @console_ns.route("/workspaces/current/model-providers//credentials/validate") class ModelProviderValidateApi(Resource): + @api.expect(parser_validate) @setup_required @login_required @account_initialization_required def post(self, provider: str): _, current_tenant_id = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument( - "credentials", type=dict, required=True, nullable=False, location="json" - ) - args = parser.parse_args() + args = parser_validate.parse_args() tenant_id = current_tenant_id @@ -218,27 +228,29 @@ class ModelProviderIconApi(Resource): return send_file(io.BytesIO(icon), mimetype=mimetype) +parser_preferred = reqparse.RequestParser().add_argument( + "preferred_provider_type", + type=str, + required=True, + nullable=False, + choices=["system", "custom"], + location="json", +) + + @console_ns.route("/workspaces/current/model-providers//preferred-provider-type") class PreferredProviderTypeUpdateApi(Resource): + @api.expect(parser_preferred) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self, provider: str): - current_user, current_tenant_id = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() + _, current_tenant_id = current_account_with_tenant() tenant_id = current_tenant_id - parser = reqparse.RequestParser().add_argument( - "preferred_provider_type", - type=str, - required=True, - nullable=False, - choices=["system", "custom"], - location="json", - ) - args = parser.parse_args() + args = parser_preferred.parse_args() model_provider_service = ModelProviderService() model_provider_service.switch_preferred_provider( diff --git a/api/controllers/console/workspace/models.py b/api/controllers/console/workspace/models.py index 5ab958d585..79079f692e 100644 --- a/api/controllers/console/workspace/models.py +++ b/api/controllers/console/workspace/models.py @@ -1,10 +1,9 @@ import logging from flask_restx import Resource, reqparse -from werkzeug.exceptions import Forbidden -from controllers.console import console_ns -from controllers.console.wraps import account_initialization_required, setup_required +from controllers.console import api, console_ns +from controllers.console.wraps import account_initialization_required, is_admin_or_owner_required, setup_required from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.utils.encoders import jsonable_encoder @@ -16,23 +15,29 @@ from services.model_provider_service import ModelProviderService logger = logging.getLogger(__name__) +parser_get_default = reqparse.RequestParser().add_argument( + "model_type", + type=str, + required=True, + nullable=False, + choices=[mt.value for mt in ModelType], + location="args", +) +parser_post_default = reqparse.RequestParser().add_argument( + "model_settings", type=list, required=True, nullable=False, location="json" +) + + @console_ns.route("/workspaces/current/default-model") class DefaultModelApi(Resource): + @api.expect(parser_get_default) @setup_required @login_required @account_initialization_required def get(self): _, tenant_id = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument( - "model_type", - type=str, - required=True, - nullable=False, - choices=[mt.value for mt in ModelType], - location="args", - ) - args = parser.parse_args() + args = parser_get_default.parse_args() model_provider_service = ModelProviderService() default_model_entity = model_provider_service.get_default_model_of_model_type( @@ -41,19 +46,15 @@ class DefaultModelApi(Resource): return jsonable_encoder({"data": default_model_entity}) + @api.expect(parser_post_default) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self): - current_user, tenant_id = current_account_with_tenant() + _, tenant_id = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() - - parser = reqparse.RequestParser().add_argument( - "model_settings", type=list, required=True, nullable=False, location="json" - ) - args = parser.parse_args() + args = parser_post_default.parse_args() model_provider_service = ModelProviderService() model_settings = args["model_settings"] for model_setting in model_settings: @@ -84,6 +85,35 @@ class DefaultModelApi(Resource): return {"result": "success"} +parser_post_models = ( + reqparse.RequestParser() + .add_argument("model", type=str, required=True, nullable=False, location="json") + .add_argument( + "model_type", + type=str, + required=True, + nullable=False, + choices=[mt.value for mt in ModelType], + location="json", + ) + .add_argument("load_balancing", type=dict, required=False, nullable=True, location="json") + .add_argument("config_from", type=str, required=False, nullable=True, location="json") + .add_argument("credential_id", type=uuid_value, required=False, nullable=True, location="json") +) +parser_delete_models = ( + reqparse.RequestParser() + .add_argument("model", type=str, required=True, nullable=False, location="json") + .add_argument( + "model_type", + type=str, + required=True, + nullable=False, + choices=[mt.value for mt in ModelType], + location="json", + ) +) + + @console_ns.route("/workspaces/current/model-providers//models") class ModelProviderModelApi(Resource): @setup_required @@ -97,32 +127,15 @@ class ModelProviderModelApi(Resource): return jsonable_encoder({"data": models}) + @api.expect(parser_post_models) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self, provider: str): # To save the model's load balance configs - current_user, tenant_id = current_account_with_tenant() - - if not current_user.is_admin_or_owner: - raise Forbidden() - - parser = ( - reqparse.RequestParser() - .add_argument("model", type=str, required=True, nullable=False, location="json") - .add_argument( - "model_type", - type=str, - required=True, - nullable=False, - choices=[mt.value for mt in ModelType], - location="json", - ) - .add_argument("load_balancing", type=dict, required=False, nullable=True, location="json") - .add_argument("config_from", type=str, required=False, nullable=True, location="json") - .add_argument("credential_id", type=uuid_value, required=False, nullable=True, location="json") - ) - args = parser.parse_args() + _, tenant_id = current_account_with_tenant() + args = parser_post_models.parse_args() if args.get("config_from", "") == "custom-model": if not args.get("credential_id"): @@ -160,28 +173,15 @@ class ModelProviderModelApi(Resource): return {"result": "success"}, 200 + @api.expect(parser_delete_models) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def delete(self, provider: str): - current_user, tenant_id = current_account_with_tenant() + _, tenant_id = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() - - parser = ( - reqparse.RequestParser() - .add_argument("model", type=str, required=True, nullable=False, location="json") - .add_argument( - "model_type", - type=str, - required=True, - nullable=False, - choices=[mt.value for mt in ModelType], - location="json", - ) - ) - args = parser.parse_args() + args = parser_delete_models.parse_args() model_provider_service = ModelProviderService() model_provider_service.remove_model( @@ -191,29 +191,76 @@ class ModelProviderModelApi(Resource): return {"result": "success"}, 204 +parser_get_credentials = ( + reqparse.RequestParser() + .add_argument("model", type=str, required=True, nullable=False, location="args") + .add_argument( + "model_type", + type=str, + required=True, + nullable=False, + choices=[mt.value for mt in ModelType], + location="args", + ) + .add_argument("config_from", type=str, required=False, nullable=True, location="args") + .add_argument("credential_id", type=uuid_value, required=False, nullable=True, location="args") +) + + +parser_post_cred = ( + reqparse.RequestParser() + .add_argument("model", type=str, required=True, nullable=False, location="json") + .add_argument( + "model_type", + type=str, + required=True, + nullable=False, + choices=[mt.value for mt in ModelType], + location="json", + ) + .add_argument("name", type=StrLen(30), required=False, nullable=True, location="json") + .add_argument("credentials", type=dict, required=True, nullable=False, location="json") +) +parser_put_cred = ( + reqparse.RequestParser() + .add_argument("model", type=str, required=True, nullable=False, location="json") + .add_argument( + "model_type", + type=str, + required=True, + nullable=False, + choices=[mt.value for mt in ModelType], + location="json", + ) + .add_argument("credential_id", type=uuid_value, required=True, nullable=False, location="json") + .add_argument("credentials", type=dict, required=True, nullable=False, location="json") + .add_argument("name", type=StrLen(30), required=False, nullable=True, location="json") +) +parser_delete_cred = ( + reqparse.RequestParser() + .add_argument("model", type=str, required=True, nullable=False, location="json") + .add_argument( + "model_type", + type=str, + required=True, + nullable=False, + choices=[mt.value for mt in ModelType], + location="json", + ) + .add_argument("credential_id", type=uuid_value, required=True, nullable=False, location="json") +) + + @console_ns.route("/workspaces/current/model-providers//models/credentials") class ModelProviderModelCredentialApi(Resource): + @api.expect(parser_get_credentials) @setup_required @login_required @account_initialization_required def get(self, provider: str): _, tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("model", type=str, required=True, nullable=False, location="args") - .add_argument( - "model_type", - type=str, - required=True, - nullable=False, - choices=[mt.value for mt in ModelType], - location="args", - ) - .add_argument("config_from", type=str, required=False, nullable=True, location="args") - .add_argument("credential_id", type=uuid_value, required=False, nullable=True, location="args") - ) - args = parser.parse_args() + args = parser_get_credentials.parse_args() model_provider_service = ModelProviderService() current_credential = model_provider_service.get_model_credential( @@ -257,30 +304,15 @@ class ModelProviderModelCredentialApi(Resource): } ) + @api.expect(parser_post_cred) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self, provider: str): - current_user, tenant_id = current_account_with_tenant() + _, tenant_id = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() - - parser = ( - reqparse.RequestParser() - .add_argument("model", type=str, required=True, nullable=False, location="json") - .add_argument( - "model_type", - type=str, - required=True, - nullable=False, - choices=[mt.value for mt in ModelType], - location="json", - ) - .add_argument("name", type=StrLen(30), required=False, nullable=True, location="json") - .add_argument("credentials", type=dict, required=True, nullable=False, location="json") - ) - args = parser.parse_args() + args = parser_post_cred.parse_args() model_provider_service = ModelProviderService() @@ -304,31 +336,14 @@ class ModelProviderModelCredentialApi(Resource): return {"result": "success"}, 201 + @api.expect(parser_put_cred) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def put(self, provider: str): - current_user, current_tenant_id = current_account_with_tenant() - - if not current_user.is_admin_or_owner: - raise Forbidden() - - parser = ( - reqparse.RequestParser() - .add_argument("model", type=str, required=True, nullable=False, location="json") - .add_argument( - "model_type", - type=str, - required=True, - nullable=False, - choices=[mt.value for mt in ModelType], - location="json", - ) - .add_argument("credential_id", type=uuid_value, required=True, nullable=False, location="json") - .add_argument("credentials", type=dict, required=True, nullable=False, location="json") - .add_argument("name", type=StrLen(30), required=False, nullable=True, location="json") - ) - args = parser.parse_args() + _, current_tenant_id = current_account_with_tenant() + args = parser_put_cred.parse_args() model_provider_service = ModelProviderService() @@ -347,28 +362,14 @@ class ModelProviderModelCredentialApi(Resource): return {"result": "success"} + @api.expect(parser_delete_cred) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def delete(self, provider: str): - current_user, current_tenant_id = current_account_with_tenant() - - if not current_user.is_admin_or_owner: - raise Forbidden() - parser = ( - reqparse.RequestParser() - .add_argument("model", type=str, required=True, nullable=False, location="json") - .add_argument( - "model_type", - type=str, - required=True, - nullable=False, - choices=[mt.value for mt in ModelType], - location="json", - ) - .add_argument("credential_id", type=uuid_value, required=True, nullable=False, location="json") - ) - args = parser.parse_args() + _, current_tenant_id = current_account_with_tenant() + args = parser_delete_cred.parse_args() model_provider_service = ModelProviderService() model_provider_service.remove_model_credential( @@ -382,30 +383,32 @@ class ModelProviderModelCredentialApi(Resource): return {"result": "success"}, 204 +parser_switch = ( + reqparse.RequestParser() + .add_argument("model", type=str, required=True, nullable=False, location="json") + .add_argument( + "model_type", + type=str, + required=True, + nullable=False, + choices=[mt.value for mt in ModelType], + location="json", + ) + .add_argument("credential_id", type=str, required=True, nullable=False, location="json") +) + + @console_ns.route("/workspaces/current/model-providers//models/credentials/switch") class ModelProviderModelCredentialSwitchApi(Resource): + @api.expect(parser_switch) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self, provider: str): - current_user, current_tenant_id = current_account_with_tenant() + _, current_tenant_id = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() - parser = ( - reqparse.RequestParser() - .add_argument("model", type=str, required=True, nullable=False, location="json") - .add_argument( - "model_type", - type=str, - required=True, - nullable=False, - choices=[mt.value for mt in ModelType], - location="json", - ) - .add_argument("credential_id", type=str, required=True, nullable=False, location="json") - ) - args = parser.parse_args() + args = parser_switch.parse_args() service = ModelProviderService() service.add_model_credential_to_model_list( @@ -418,29 +421,32 @@ class ModelProviderModelCredentialSwitchApi(Resource): return {"result": "success"} +parser_model_enable_disable = ( + reqparse.RequestParser() + .add_argument("model", type=str, required=True, nullable=False, location="json") + .add_argument( + "model_type", + type=str, + required=True, + nullable=False, + choices=[mt.value for mt in ModelType], + location="json", + ) +) + + @console_ns.route( "/workspaces/current/model-providers//models/enable", endpoint="model-provider-model-enable" ) class ModelProviderModelEnableApi(Resource): + @api.expect(parser_model_enable_disable) @setup_required @login_required @account_initialization_required def patch(self, provider: str): _, tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("model", type=str, required=True, nullable=False, location="json") - .add_argument( - "model_type", - type=str, - required=True, - nullable=False, - choices=[mt.value for mt in ModelType], - location="json", - ) - ) - args = parser.parse_args() + args = parser_model_enable_disable.parse_args() model_provider_service = ModelProviderService() model_provider_service.enable_model( @@ -454,25 +460,14 @@ class ModelProviderModelEnableApi(Resource): "/workspaces/current/model-providers//models/disable", endpoint="model-provider-model-disable" ) class ModelProviderModelDisableApi(Resource): + @api.expect(parser_model_enable_disable) @setup_required @login_required @account_initialization_required def patch(self, provider: str): _, tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("model", type=str, required=True, nullable=False, location="json") - .add_argument( - "model_type", - type=str, - required=True, - nullable=False, - choices=[mt.value for mt in ModelType], - location="json", - ) - ) - args = parser.parse_args() + args = parser_model_enable_disable.parse_args() model_provider_service = ModelProviderService() model_provider_service.disable_model( @@ -482,28 +477,31 @@ class ModelProviderModelDisableApi(Resource): return {"result": "success"} +parser_validate = ( + reqparse.RequestParser() + .add_argument("model", type=str, required=True, nullable=False, location="json") + .add_argument( + "model_type", + type=str, + required=True, + nullable=False, + choices=[mt.value for mt in ModelType], + location="json", + ) + .add_argument("credentials", type=dict, required=True, nullable=False, location="json") +) + + @console_ns.route("/workspaces/current/model-providers//models/credentials/validate") class ModelProviderModelValidateApi(Resource): + @api.expect(parser_validate) @setup_required @login_required @account_initialization_required def post(self, provider: str): _, tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("model", type=str, required=True, nullable=False, location="json") - .add_argument( - "model_type", - type=str, - required=True, - nullable=False, - choices=[mt.value for mt in ModelType], - location="json", - ) - .add_argument("credentials", type=dict, required=True, nullable=False, location="json") - ) - args = parser.parse_args() + args = parser_validate.parse_args() model_provider_service = ModelProviderService() @@ -530,16 +528,19 @@ class ModelProviderModelValidateApi(Resource): return response +parser_parameter = reqparse.RequestParser().add_argument( + "model", type=str, required=True, nullable=False, location="args" +) + + @console_ns.route("/workspaces/current/model-providers//models/parameter-rules") class ModelProviderModelParameterRuleApi(Resource): + @api.expect(parser_parameter) @setup_required @login_required @account_initialization_required def get(self, provider: str): - parser = reqparse.RequestParser().add_argument( - "model", type=str, required=True, nullable=False, location="args" - ) - args = parser.parse_args() + args = parser_parameter.parse_args() _, tenant_id = current_account_with_tenant() model_provider_service = ModelProviderService() diff --git a/api/controllers/console/workspace/plugin.py b/api/controllers/console/workspace/plugin.py index 6dd79a6d85..deae418e96 100644 --- a/api/controllers/console/workspace/plugin.py +++ b/api/controllers/console/workspace/plugin.py @@ -5,9 +5,9 @@ from flask_restx import Resource, reqparse from werkzeug.exceptions import Forbidden from configs import dify_config -from controllers.console import console_ns +from controllers.console import api, console_ns from controllers.console.workspace import plugin_permission_required -from controllers.console.wraps import account_initialization_required, setup_required +from controllers.console.wraps import account_initialization_required, is_admin_or_owner_required, setup_required from core.model_runtime.utils.encoders import jsonable_encoder from core.plugin.impl.exc import PluginDaemonClientSideError from libs.login import current_account_with_tenant, login_required @@ -37,19 +37,22 @@ class PluginDebuggingKeyApi(Resource): raise ValueError(e) +parser_list = ( + reqparse.RequestParser() + .add_argument("page", type=int, required=False, location="args", default=1) + .add_argument("page_size", type=int, required=False, location="args", default=256) +) + + @console_ns.route("/workspaces/current/plugin/list") class PluginListApi(Resource): + @api.expect(parser_list) @setup_required @login_required @account_initialization_required def get(self): _, tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("page", type=int, required=False, location="args", default=1) - .add_argument("page_size", type=int, required=False, location="args", default=256) - ) - args = parser.parse_args() + args = parser_list.parse_args() try: plugins_with_total = PluginService.list_with_total(tenant_id, args["page"], args["page_size"]) except PluginDaemonClientSideError as e: @@ -58,14 +61,17 @@ class PluginListApi(Resource): return jsonable_encoder({"plugins": plugins_with_total.list, "total": plugins_with_total.total}) +parser_latest = reqparse.RequestParser().add_argument("plugin_ids", type=list, required=True, location="json") + + @console_ns.route("/workspaces/current/plugin/list/latest-versions") class PluginListLatestVersionsApi(Resource): + @api.expect(parser_latest) @setup_required @login_required @account_initialization_required def post(self): - req = reqparse.RequestParser().add_argument("plugin_ids", type=list, required=True, location="json") - args = req.parse_args() + args = parser_latest.parse_args() try: versions = PluginService.list_latest_versions(args["plugin_ids"]) @@ -75,16 +81,19 @@ class PluginListLatestVersionsApi(Resource): return jsonable_encoder({"versions": versions}) +parser_ids = reqparse.RequestParser().add_argument("plugin_ids", type=list, required=True, location="json") + + @console_ns.route("/workspaces/current/plugin/list/installations/ids") class PluginListInstallationsFromIdsApi(Resource): + @api.expect(parser_ids) @setup_required @login_required @account_initialization_required def post(self): _, tenant_id = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument("plugin_ids", type=list, required=True, location="json") - args = parser.parse_args() + args = parser_ids.parse_args() try: plugins = PluginService.list_installations_from_ids(tenant_id, args["plugin_ids"]) @@ -94,16 +103,19 @@ class PluginListInstallationsFromIdsApi(Resource): return jsonable_encoder({"plugins": plugins}) +parser_icon = ( + reqparse.RequestParser() + .add_argument("tenant_id", type=str, required=True, location="args") + .add_argument("filename", type=str, required=True, location="args") +) + + @console_ns.route("/workspaces/current/plugin/icon") class PluginIconApi(Resource): + @api.expect(parser_icon) @setup_required def get(self): - req = ( - reqparse.RequestParser() - .add_argument("tenant_id", type=str, required=True, location="args") - .add_argument("filename", type=str, required=True, location="args") - ) - args = req.parse_args() + args = parser_icon.parse_args() try: icon_bytes, mimetype = PluginService.get_asset(args["tenant_id"], args["filename"]) @@ -120,12 +132,14 @@ class PluginAssetApi(Resource): @login_required @account_initialization_required def get(self): - req = reqparse.RequestParser() - req.add_argument("plugin_unique_identifier", type=str, required=True, location="args") - req.add_argument("file_name", type=str, required=True, location="args") + req = ( + reqparse.RequestParser() + .add_argument("plugin_unique_identifier", type=str, required=True, location="args") + .add_argument("file_name", type=str, required=True, location="args") + ) args = req.parse_args() - current_user, tenant_id = current_account_with_tenant() + _, tenant_id = current_account_with_tenant() try: binary = PluginService.extract_asset(tenant_id, args["plugin_unique_identifier"], args["file_name"]) return send_file(io.BytesIO(binary), mimetype="application/octet-stream") @@ -157,8 +171,17 @@ class PluginUploadFromPkgApi(Resource): return jsonable_encoder(response) +parser_github = ( + reqparse.RequestParser() + .add_argument("repo", type=str, required=True, location="json") + .add_argument("version", type=str, required=True, location="json") + .add_argument("package", type=str, required=True, location="json") +) + + @console_ns.route("/workspaces/current/plugin/upload/github") class PluginUploadFromGithubApi(Resource): + @api.expect(parser_github) @setup_required @login_required @account_initialization_required @@ -166,13 +189,7 @@ class PluginUploadFromGithubApi(Resource): def post(self): _, tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("repo", type=str, required=True, location="json") - .add_argument("version", type=str, required=True, location="json") - .add_argument("package", type=str, required=True, location="json") - ) - args = parser.parse_args() + args = parser_github.parse_args() try: response = PluginService.upload_pkg_from_github(tenant_id, args["repo"], args["version"], args["package"]) @@ -206,19 +223,21 @@ class PluginUploadFromBundleApi(Resource): return jsonable_encoder(response) +parser_pkg = reqparse.RequestParser().add_argument( + "plugin_unique_identifiers", type=list, required=True, location="json" +) + + @console_ns.route("/workspaces/current/plugin/install/pkg") class PluginInstallFromPkgApi(Resource): + @api.expect(parser_pkg) @setup_required @login_required @account_initialization_required @plugin_permission_required(install_required=True) def post(self): _, tenant_id = current_account_with_tenant() - - parser = reqparse.RequestParser().add_argument( - "plugin_unique_identifiers", type=list, required=True, location="json" - ) - args = parser.parse_args() + args = parser_pkg.parse_args() # check if all plugin_unique_identifiers are valid string for plugin_unique_identifier in args["plugin_unique_identifiers"]: @@ -233,8 +252,18 @@ class PluginInstallFromPkgApi(Resource): return jsonable_encoder(response) +parser_githubapi = ( + reqparse.RequestParser() + .add_argument("repo", type=str, required=True, location="json") + .add_argument("version", type=str, required=True, location="json") + .add_argument("package", type=str, required=True, location="json") + .add_argument("plugin_unique_identifier", type=str, required=True, location="json") +) + + @console_ns.route("/workspaces/current/plugin/install/github") class PluginInstallFromGithubApi(Resource): + @api.expect(parser_githubapi) @setup_required @login_required @account_initialization_required @@ -242,14 +271,7 @@ class PluginInstallFromGithubApi(Resource): def post(self): _, tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("repo", type=str, required=True, location="json") - .add_argument("version", type=str, required=True, location="json") - .add_argument("package", type=str, required=True, location="json") - .add_argument("plugin_unique_identifier", type=str, required=True, location="json") - ) - args = parser.parse_args() + args = parser_githubapi.parse_args() try: response = PluginService.install_from_github( @@ -265,8 +287,14 @@ class PluginInstallFromGithubApi(Resource): return jsonable_encoder(response) +parser_marketplace = reqparse.RequestParser().add_argument( + "plugin_unique_identifiers", type=list, required=True, location="json" +) + + @console_ns.route("/workspaces/current/plugin/install/marketplace") class PluginInstallFromMarketplaceApi(Resource): + @api.expect(parser_marketplace) @setup_required @login_required @account_initialization_required @@ -274,10 +302,7 @@ class PluginInstallFromMarketplaceApi(Resource): def post(self): _, tenant_id = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument( - "plugin_unique_identifiers", type=list, required=True, location="json" - ) - args = parser.parse_args() + args = parser_marketplace.parse_args() # check if all plugin_unique_identifiers are valid string for plugin_unique_identifier in args["plugin_unique_identifiers"]: @@ -292,19 +317,21 @@ class PluginInstallFromMarketplaceApi(Resource): return jsonable_encoder(response) +parser_pkgapi = reqparse.RequestParser().add_argument( + "plugin_unique_identifier", type=str, required=True, location="args" +) + + @console_ns.route("/workspaces/current/plugin/marketplace/pkg") class PluginFetchMarketplacePkgApi(Resource): + @api.expect(parser_pkgapi) @setup_required @login_required @account_initialization_required @plugin_permission_required(install_required=True) def get(self): _, tenant_id = current_account_with_tenant() - - parser = reqparse.RequestParser().add_argument( - "plugin_unique_identifier", type=str, required=True, location="args" - ) - args = parser.parse_args() + args = parser_pkgapi.parse_args() try: return jsonable_encoder( @@ -319,8 +346,14 @@ class PluginFetchMarketplacePkgApi(Resource): raise ValueError(e) +parser_fetch = reqparse.RequestParser().add_argument( + "plugin_unique_identifier", type=str, required=True, location="args" +) + + @console_ns.route("/workspaces/current/plugin/fetch-manifest") class PluginFetchManifestApi(Resource): + @api.expect(parser_fetch) @setup_required @login_required @account_initialization_required @@ -328,10 +361,7 @@ class PluginFetchManifestApi(Resource): def get(self): _, tenant_id = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument( - "plugin_unique_identifier", type=str, required=True, location="args" - ) - args = parser.parse_args() + args = parser_fetch.parse_args() try: return jsonable_encoder( @@ -345,8 +375,16 @@ class PluginFetchManifestApi(Resource): raise ValueError(e) +parser_tasks = ( + reqparse.RequestParser() + .add_argument("page", type=int, required=True, location="args") + .add_argument("page_size", type=int, required=True, location="args") +) + + @console_ns.route("/workspaces/current/plugin/tasks") class PluginFetchInstallTasksApi(Resource): + @api.expect(parser_tasks) @setup_required @login_required @account_initialization_required @@ -354,12 +392,7 @@ class PluginFetchInstallTasksApi(Resource): def get(self): _, tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("page", type=int, required=True, location="args") - .add_argument("page_size", type=int, required=True, location="args") - ) - args = parser.parse_args() + args = parser_tasks.parse_args() try: return jsonable_encoder( @@ -429,8 +462,16 @@ class PluginDeleteInstallTaskItemApi(Resource): raise ValueError(e) +parser_marketplace_api = ( + reqparse.RequestParser() + .add_argument("original_plugin_unique_identifier", type=str, required=True, location="json") + .add_argument("new_plugin_unique_identifier", type=str, required=True, location="json") +) + + @console_ns.route("/workspaces/current/plugin/upgrade/marketplace") class PluginUpgradeFromMarketplaceApi(Resource): + @api.expect(parser_marketplace_api) @setup_required @login_required @account_initialization_required @@ -438,12 +479,7 @@ class PluginUpgradeFromMarketplaceApi(Resource): def post(self): _, tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("original_plugin_unique_identifier", type=str, required=True, location="json") - .add_argument("new_plugin_unique_identifier", type=str, required=True, location="json") - ) - args = parser.parse_args() + args = parser_marketplace_api.parse_args() try: return jsonable_encoder( @@ -455,8 +491,19 @@ class PluginUpgradeFromMarketplaceApi(Resource): raise ValueError(e) +parser_github_post = ( + reqparse.RequestParser() + .add_argument("original_plugin_unique_identifier", type=str, required=True, location="json") + .add_argument("new_plugin_unique_identifier", type=str, required=True, location="json") + .add_argument("repo", type=str, required=True, location="json") + .add_argument("version", type=str, required=True, location="json") + .add_argument("package", type=str, required=True, location="json") +) + + @console_ns.route("/workspaces/current/plugin/upgrade/github") class PluginUpgradeFromGithubApi(Resource): + @api.expect(parser_github_post) @setup_required @login_required @account_initialization_required @@ -464,15 +511,7 @@ class PluginUpgradeFromGithubApi(Resource): def post(self): _, tenant_id = current_account_with_tenant() - parser = ( - reqparse.RequestParser() - .add_argument("original_plugin_unique_identifier", type=str, required=True, location="json") - .add_argument("new_plugin_unique_identifier", type=str, required=True, location="json") - .add_argument("repo", type=str, required=True, location="json") - .add_argument("version", type=str, required=True, location="json") - .add_argument("package", type=str, required=True, location="json") - ) - args = parser.parse_args() + args = parser_github_post.parse_args() try: return jsonable_encoder( @@ -489,15 +528,20 @@ class PluginUpgradeFromGithubApi(Resource): raise ValueError(e) +parser_uninstall = reqparse.RequestParser().add_argument( + "plugin_installation_id", type=str, required=True, location="json" +) + + @console_ns.route("/workspaces/current/plugin/uninstall") class PluginUninstallApi(Resource): + @api.expect(parser_uninstall) @setup_required @login_required @account_initialization_required @plugin_permission_required(install_required=True) def post(self): - req = reqparse.RequestParser().add_argument("plugin_installation_id", type=str, required=True, location="json") - args = req.parse_args() + args = parser_uninstall.parse_args() _, tenant_id = current_account_with_tenant() @@ -507,8 +551,16 @@ class PluginUninstallApi(Resource): raise ValueError(e) +parser_change_post = ( + reqparse.RequestParser() + .add_argument("install_permission", type=str, required=True, location="json") + .add_argument("debug_permission", type=str, required=True, location="json") +) + + @console_ns.route("/workspaces/current/plugin/permission/change") class PluginChangePermissionApi(Resource): + @api.expect(parser_change_post) @setup_required @login_required @account_initialization_required @@ -518,12 +570,7 @@ class PluginChangePermissionApi(Resource): if not user.is_admin_or_owner: raise Forbidden() - req = ( - reqparse.RequestParser() - .add_argument("install_permission", type=str, required=True, location="json") - .add_argument("debug_permission", type=str, required=True, location="json") - ) - args = req.parse_args() + args = parser_change_post.parse_args() install_permission = TenantPluginPermission.InstallPermission(args["install_permission"]) debug_permission = TenantPluginPermission.DebugPermission(args["debug_permission"]) @@ -558,29 +605,29 @@ class PluginFetchPermissionApi(Resource): ) +parser_dynamic = ( + reqparse.RequestParser() + .add_argument("plugin_id", type=str, required=True, location="args") + .add_argument("provider", type=str, required=True, location="args") + .add_argument("action", type=str, required=True, location="args") + .add_argument("parameter", type=str, required=True, location="args") + .add_argument("credential_id", type=str, required=False, location="args") + .add_argument("provider_type", type=str, required=True, location="args") +) + + @console_ns.route("/workspaces/current/plugin/parameters/dynamic-options") class PluginFetchDynamicSelectOptionsApi(Resource): + @api.expect(parser_dynamic) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def get(self): - # check if the user is admin or owner current_user, tenant_id = current_account_with_tenant() - if not current_user.is_admin_or_owner: - raise Forbidden() - user_id = current_user.id - parser = ( - reqparse.RequestParser() - .add_argument("plugin_id", type=str, required=True, location="args") - .add_argument("provider", type=str, required=True, location="args") - .add_argument("action", type=str, required=True, location="args") - .add_argument("parameter", type=str, required=True, location="args") - .add_argument("credential_id", type=str, required=False, location="args") - .add_argument("provider_type", type=str, required=True, location="args") - ) - args = parser.parse_args() + args = parser_dynamic.parse_args() try: options = PluginParameterService.get_dynamic_select_options( @@ -599,8 +646,16 @@ class PluginFetchDynamicSelectOptionsApi(Resource): return jsonable_encoder({"options": options}) +parser_change = ( + reqparse.RequestParser() + .add_argument("permission", type=dict, required=True, location="json") + .add_argument("auto_upgrade", type=dict, required=True, location="json") +) + + @console_ns.route("/workspaces/current/plugin/preferences/change") class PluginChangePreferencesApi(Resource): + @api.expect(parser_change) @setup_required @login_required @account_initialization_required @@ -609,12 +664,7 @@ class PluginChangePreferencesApi(Resource): if not user.is_admin_or_owner: raise Forbidden() - req = ( - reqparse.RequestParser() - .add_argument("permission", type=dict, required=True, location="json") - .add_argument("auto_upgrade", type=dict, required=True, location="json") - ) - args = req.parse_args() + args = parser_change.parse_args() permission = args["permission"] @@ -694,8 +744,12 @@ class PluginFetchPreferencesApi(Resource): return jsonable_encoder({"permission": permission_dict, "auto_upgrade": auto_upgrade_dict}) +parser_exclude = reqparse.RequestParser().add_argument("plugin_id", type=str, required=True, location="json") + + @console_ns.route("/workspaces/current/plugin/preferences/autoupgrade/exclude") class PluginAutoUpgradeExcludePluginApi(Resource): + @api.expect(parser_exclude) @setup_required @login_required @account_initialization_required @@ -703,8 +757,7 @@ class PluginAutoUpgradeExcludePluginApi(Resource): # exclude one single plugin _, tenant_id = current_account_with_tenant() - req = reqparse.RequestParser().add_argument("plugin_id", type=str, required=True, location="json") - args = req.parse_args() + args = parser_exclude.parse_args() return jsonable_encoder({"success": PluginAutoUpgradeService.exclude_plugin(tenant_id, args["plugin_id"])}) @@ -715,10 +768,12 @@ class PluginReadmeApi(Resource): @login_required @account_initialization_required def get(self): - current_user, tenant_id = current_account_with_tenant() - parser = reqparse.RequestParser() - parser.add_argument("plugin_unique_identifier", type=str, required=True, location="args") - parser.add_argument("language", type=str, required=False, location="args") + _, tenant_id = current_account_with_tenant() + parser = ( + reqparse.RequestParser() + .add_argument("plugin_unique_identifier", type=str, required=True, location="args") + .add_argument("language", type=str, required=False, location="args") + ) args = parser.parse_args() return jsonable_encoder( { diff --git a/api/controllers/console/workspace/tool_providers.py b/api/controllers/console/workspace/tool_providers.py index 613ca5368e..917059bb4c 100644 --- a/api/controllers/console/workspace/tool_providers.py +++ b/api/controllers/console/workspace/tool_providers.py @@ -6,22 +6,25 @@ from flask_restx import ( Resource, reqparse, ) +from sqlalchemy.orm import Session from werkzeug.exceptions import Forbidden from configs import dify_config -from controllers.console import console_ns +from controllers.console import api, console_ns from controllers.console.wraps import ( account_initialization_required, enterprise_license_required, + is_admin_or_owner_required, setup_required, ) +from core.entities.mcp_provider import MCPAuthentication, MCPConfiguration from core.mcp.auth.auth_flow import auth, handle_callback -from core.mcp.auth.auth_provider import OAuthClientProvider -from core.mcp.error import MCPAuthError, MCPError +from core.mcp.error import MCPAuthError, MCPError, MCPRefreshTokenError from core.mcp.mcp_client import MCPClient from core.model_runtime.utils.encoders import jsonable_encoder from core.plugin.entities.plugin_daemon import CredentialType from core.plugin.impl.oauth import OAuthHandler +from extensions.ext_database import db from libs.helper import StrLen, alphanumeric, uuid_value from libs.login import current_account_with_tenant, login_required from models.provider_ids import ToolProviderID @@ -30,7 +33,7 @@ from models.provider_ids import ToolProviderID from services.plugin.oauth_service import OAuthProxyService from services.tools.api_tools_manage_service import ApiToolManageService from services.tools.builtin_tools_manage_service import BuiltinToolManageService -from services.tools.mcp_tools_manage_service import MCPToolManageService +from services.tools.mcp_tools_manage_service import MCPToolManageService, OAuthDataType from services.tools.tool_labels_service import ToolLabelsService from services.tools.tools_manage_service import ToolCommonService from services.tools.tools_transform_service import ToolTransformService @@ -44,12 +47,25 @@ def is_valid_url(url: str) -> bool: try: parsed = urlparse(url) return all([parsed.scheme, parsed.netloc]) and parsed.scheme in ["http", "https"] - except Exception: + except (ValueError, TypeError): + # ValueError: Invalid URL format + # TypeError: url is not a string return False +parser_tool = reqparse.RequestParser().add_argument( + "type", + type=str, + choices=["builtin", "model", "api", "workflow", "mcp"], + required=False, + nullable=True, + location="args", +) + + @console_ns.route("/workspaces/current/tool-providers") class ToolProviderListApi(Resource): + @api.expect(parser_tool) @setup_required @login_required @account_initialization_required @@ -58,15 +74,7 @@ class ToolProviderListApi(Resource): user_id = user.id - req = reqparse.RequestParser().add_argument( - "type", - type=str, - choices=["builtin", "model", "api", "workflow", "mcp"], - required=False, - nullable=True, - location="args", - ) - args = req.parse_args() + args = parser_tool.parse_args() return ToolCommonService.list_tool_providers(user_id, tenant_id, args.get("type", None)) @@ -98,20 +106,22 @@ class ToolBuiltinProviderInfoApi(Resource): return jsonable_encoder(BuiltinToolManageService.get_builtin_tool_provider_info(tenant_id, provider)) +parser_delete = reqparse.RequestParser().add_argument( + "credential_id", type=str, required=True, nullable=False, location="json" +) + + @console_ns.route("/workspaces/current/tool-provider/builtin//delete") class ToolBuiltinProviderDeleteApi(Resource): + @api.expect(parser_delete) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self, provider): - user, tenant_id = current_account_with_tenant() - if not user.is_admin_or_owner: - raise Forbidden() + _, tenant_id = current_account_with_tenant() - req = reqparse.RequestParser().add_argument( - "credential_id", type=str, required=True, nullable=False, location="json" - ) - args = req.parse_args() + args = parser_delete.parse_args() return BuiltinToolManageService.delete_builtin_tool_provider( tenant_id, @@ -120,8 +130,17 @@ class ToolBuiltinProviderDeleteApi(Resource): ) +parser_add = ( + reqparse.RequestParser() + .add_argument("credentials", type=dict, required=True, nullable=False, location="json") + .add_argument("name", type=StrLen(30), required=False, nullable=False, location="json") + .add_argument("type", type=str, required=True, nullable=False, location="json") +) + + @console_ns.route("/workspaces/current/tool-provider/builtin//add") class ToolBuiltinProviderAddApi(Resource): + @api.expect(parser_add) @setup_required @login_required @account_initialization_required @@ -130,13 +149,7 @@ class ToolBuiltinProviderAddApi(Resource): user_id = user.id - parser = ( - reqparse.RequestParser() - .add_argument("credentials", type=dict, required=True, nullable=False, location="json") - .add_argument("name", type=StrLen(30), required=False, nullable=False, location="json") - .add_argument("type", type=str, required=True, nullable=False, location="json") - ) - args = parser.parse_args() + args = parser_add.parse_args() if args["type"] not in CredentialType.values(): raise ValueError(f"Invalid credential type: {args['type']}") @@ -151,27 +164,26 @@ class ToolBuiltinProviderAddApi(Resource): ) +parser_update = ( + reqparse.RequestParser() + .add_argument("credential_id", type=str, required=True, nullable=False, location="json") + .add_argument("credentials", type=dict, required=False, nullable=True, location="json") + .add_argument("name", type=StrLen(30), required=False, nullable=True, location="json") +) + + @console_ns.route("/workspaces/current/tool-provider/builtin//update") class ToolBuiltinProviderUpdateApi(Resource): + @api.expect(parser_update) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self, provider): user, tenant_id = current_account_with_tenant() - - if not user.is_admin_or_owner: - raise Forbidden() - user_id = user.id - parser = ( - reqparse.RequestParser() - .add_argument("credential_id", type=str, required=True, nullable=False, location="json") - .add_argument("credentials", type=dict, required=False, nullable=True, location="json") - .add_argument("name", type=StrLen(30), required=False, nullable=True, location="json") - ) - - args = parser.parse_args() + args = parser_update.parse_args() result = BuiltinToolManageService.update_builtin_tool_provider( user_id=user_id, @@ -209,32 +221,32 @@ class ToolBuiltinProviderIconApi(Resource): return send_file(io.BytesIO(icon_bytes), mimetype=mimetype, max_age=icon_cache_max_age) +parser_api_add = ( + reqparse.RequestParser() + .add_argument("credentials", type=dict, required=True, nullable=False, location="json") + .add_argument("schema_type", type=str, required=True, nullable=False, location="json") + .add_argument("schema", type=str, required=True, nullable=False, location="json") + .add_argument("provider", type=str, required=True, nullable=False, location="json") + .add_argument("icon", type=dict, required=True, nullable=False, location="json") + .add_argument("privacy_policy", type=str, required=False, nullable=True, location="json") + .add_argument("labels", type=list[str], required=False, nullable=True, location="json", default=[]) + .add_argument("custom_disclaimer", type=str, required=False, nullable=True, location="json") +) + + @console_ns.route("/workspaces/current/tool-provider/api/add") class ToolApiProviderAddApi(Resource): + @api.expect(parser_api_add) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self): user, tenant_id = current_account_with_tenant() - if not user.is_admin_or_owner: - raise Forbidden() - user_id = user.id - parser = ( - reqparse.RequestParser() - .add_argument("credentials", type=dict, required=True, nullable=False, location="json") - .add_argument("schema_type", type=str, required=True, nullable=False, location="json") - .add_argument("schema", type=str, required=True, nullable=False, location="json") - .add_argument("provider", type=str, required=True, nullable=False, location="json") - .add_argument("icon", type=dict, required=True, nullable=False, location="json") - .add_argument("privacy_policy", type=str, required=False, nullable=True, location="json") - .add_argument("labels", type=list[str], required=False, nullable=True, location="json", default=[]) - .add_argument("custom_disclaimer", type=str, required=False, nullable=True, location="json") - ) - - args = parser.parse_args() + args = parser_api_add.parse_args() return ApiToolManageService.create_api_tool_provider( user_id, @@ -250,8 +262,12 @@ class ToolApiProviderAddApi(Resource): ) +parser_remote = reqparse.RequestParser().add_argument("url", type=str, required=True, nullable=False, location="args") + + @console_ns.route("/workspaces/current/tool-provider/api/remote") class ToolApiProviderGetRemoteSchemaApi(Resource): + @api.expect(parser_remote) @setup_required @login_required @account_initialization_required @@ -260,9 +276,7 @@ class ToolApiProviderGetRemoteSchemaApi(Resource): user_id = user.id - parser = reqparse.RequestParser().add_argument("url", type=str, required=True, nullable=False, location="args") - - args = parser.parse_args() + args = parser_remote.parse_args() return ApiToolManageService.get_api_tool_provider_remote_schema( user_id, @@ -271,8 +285,14 @@ class ToolApiProviderGetRemoteSchemaApi(Resource): ) +parser_tools = reqparse.RequestParser().add_argument( + "provider", type=str, required=True, nullable=False, location="args" +) + + @console_ns.route("/workspaces/current/tool-provider/api/tools") class ToolApiProviderListToolsApi(Resource): + @api.expect(parser_tools) @setup_required @login_required @account_initialization_required @@ -281,11 +301,7 @@ class ToolApiProviderListToolsApi(Resource): user_id = user.id - parser = reqparse.RequestParser().add_argument( - "provider", type=str, required=True, nullable=False, location="args" - ) - - args = parser.parse_args() + args = parser_tools.parse_args() return jsonable_encoder( ApiToolManageService.list_api_tool_provider_tools( @@ -296,33 +312,33 @@ class ToolApiProviderListToolsApi(Resource): ) +parser_api_update = ( + reqparse.RequestParser() + .add_argument("credentials", type=dict, required=True, nullable=False, location="json") + .add_argument("schema_type", type=str, required=True, nullable=False, location="json") + .add_argument("schema", type=str, required=True, nullable=False, location="json") + .add_argument("provider", type=str, required=True, nullable=False, location="json") + .add_argument("original_provider", type=str, required=True, nullable=False, location="json") + .add_argument("icon", type=dict, required=True, nullable=False, location="json") + .add_argument("privacy_policy", type=str, required=True, nullable=True, location="json") + .add_argument("labels", type=list[str], required=False, nullable=True, location="json") + .add_argument("custom_disclaimer", type=str, required=True, nullable=True, location="json") +) + + @console_ns.route("/workspaces/current/tool-provider/api/update") class ToolApiProviderUpdateApi(Resource): + @api.expect(parser_api_update) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self): user, tenant_id = current_account_with_tenant() - if not user.is_admin_or_owner: - raise Forbidden() - user_id = user.id - parser = ( - reqparse.RequestParser() - .add_argument("credentials", type=dict, required=True, nullable=False, location="json") - .add_argument("schema_type", type=str, required=True, nullable=False, location="json") - .add_argument("schema", type=str, required=True, nullable=False, location="json") - .add_argument("provider", type=str, required=True, nullable=False, location="json") - .add_argument("original_provider", type=str, required=True, nullable=False, location="json") - .add_argument("icon", type=dict, required=True, nullable=False, location="json") - .add_argument("privacy_policy", type=str, required=True, nullable=True, location="json") - .add_argument("labels", type=list[str], required=False, nullable=True, location="json") - .add_argument("custom_disclaimer", type=str, required=True, nullable=True, location="json") - ) - - args = parser.parse_args() + args = parser_api_update.parse_args() return ApiToolManageService.update_api_tool_provider( user_id, @@ -339,24 +355,24 @@ class ToolApiProviderUpdateApi(Resource): ) +parser_api_delete = reqparse.RequestParser().add_argument( + "provider", type=str, required=True, nullable=False, location="json" +) + + @console_ns.route("/workspaces/current/tool-provider/api/delete") class ToolApiProviderDeleteApi(Resource): + @api.expect(parser_api_delete) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self): user, tenant_id = current_account_with_tenant() - if not user.is_admin_or_owner: - raise Forbidden() - user_id = user.id - parser = reqparse.RequestParser().add_argument( - "provider", type=str, required=True, nullable=False, location="json" - ) - - args = parser.parse_args() + args = parser_api_delete.parse_args() return ApiToolManageService.delete_api_tool_provider( user_id, @@ -365,8 +381,12 @@ class ToolApiProviderDeleteApi(Resource): ) +parser_get = reqparse.RequestParser().add_argument("provider", type=str, required=True, nullable=False, location="args") + + @console_ns.route("/workspaces/current/tool-provider/api/get") class ToolApiProviderGetApi(Resource): + @api.expect(parser_get) @setup_required @login_required @account_initialization_required @@ -375,11 +395,7 @@ class ToolApiProviderGetApi(Resource): user_id = user.id - parser = reqparse.RequestParser().add_argument( - "provider", type=str, required=True, nullable=False, location="args" - ) - - args = parser.parse_args() + args = parser_get.parse_args() return ApiToolManageService.get_api_tool_provider( user_id, @@ -403,40 +419,44 @@ class ToolBuiltinProviderCredentialsSchemaApi(Resource): ) +parser_schema = reqparse.RequestParser().add_argument( + "schema", type=str, required=True, nullable=False, location="json" +) + + @console_ns.route("/workspaces/current/tool-provider/api/schema") class ToolApiProviderSchemaApi(Resource): + @api.expect(parser_schema) @setup_required @login_required @account_initialization_required def post(self): - parser = reqparse.RequestParser().add_argument( - "schema", type=str, required=True, nullable=False, location="json" - ) - - args = parser.parse_args() + args = parser_schema.parse_args() return ApiToolManageService.parser_api_schema( schema=args["schema"], ) +parser_pre = ( + reqparse.RequestParser() + .add_argument("tool_name", type=str, required=True, nullable=False, location="json") + .add_argument("provider_name", type=str, required=False, nullable=False, location="json") + .add_argument("credentials", type=dict, required=True, nullable=False, location="json") + .add_argument("parameters", type=dict, required=True, nullable=False, location="json") + .add_argument("schema_type", type=str, required=True, nullable=False, location="json") + .add_argument("schema", type=str, required=True, nullable=False, location="json") +) + + @console_ns.route("/workspaces/current/tool-provider/api/test/pre") class ToolApiProviderPreviousTestApi(Resource): + @api.expect(parser_pre) @setup_required @login_required @account_initialization_required def post(self): - parser = ( - reqparse.RequestParser() - .add_argument("tool_name", type=str, required=True, nullable=False, location="json") - .add_argument("provider_name", type=str, required=False, nullable=False, location="json") - .add_argument("credentials", type=dict, required=True, nullable=False, location="json") - .add_argument("parameters", type=dict, required=True, nullable=False, location="json") - .add_argument("schema_type", type=str, required=True, nullable=False, location="json") - .add_argument("schema", type=str, required=True, nullable=False, location="json") - ) - - args = parser.parse_args() + args = parser_pre.parse_args() _, current_tenant_id = current_account_with_tenant() return ApiToolManageService.test_api_tool_preview( current_tenant_id, @@ -449,32 +469,32 @@ class ToolApiProviderPreviousTestApi(Resource): ) +parser_create = ( + reqparse.RequestParser() + .add_argument("workflow_app_id", type=uuid_value, required=True, nullable=False, location="json") + .add_argument("name", type=alphanumeric, required=True, nullable=False, location="json") + .add_argument("label", type=str, required=True, nullable=False, location="json") + .add_argument("description", type=str, required=True, nullable=False, location="json") + .add_argument("icon", type=dict, required=True, nullable=False, location="json") + .add_argument("parameters", type=list[dict], required=True, nullable=False, location="json") + .add_argument("privacy_policy", type=str, required=False, nullable=True, location="json", default="") + .add_argument("labels", type=list[str], required=False, nullable=True, location="json") +) + + @console_ns.route("/workspaces/current/tool-provider/workflow/create") class ToolWorkflowProviderCreateApi(Resource): + @api.expect(parser_create) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self): user, tenant_id = current_account_with_tenant() - if not user.is_admin_or_owner: - raise Forbidden() - user_id = user.id - reqparser = ( - reqparse.RequestParser() - .add_argument("workflow_app_id", type=uuid_value, required=True, nullable=False, location="json") - .add_argument("name", type=alphanumeric, required=True, nullable=False, location="json") - .add_argument("label", type=str, required=True, nullable=False, location="json") - .add_argument("description", type=str, required=True, nullable=False, location="json") - .add_argument("icon", type=dict, required=True, nullable=False, location="json") - .add_argument("parameters", type=list[dict], required=True, nullable=False, location="json") - .add_argument("privacy_policy", type=str, required=False, nullable=True, location="json", default="") - .add_argument("labels", type=list[str], required=False, nullable=True, location="json") - ) - - args = reqparser.parse_args() + args = parser_create.parse_args() return WorkflowToolManageService.create_workflow_tool( user_id=user_id, @@ -490,32 +510,31 @@ class ToolWorkflowProviderCreateApi(Resource): ) +parser_workflow_update = ( + reqparse.RequestParser() + .add_argument("workflow_tool_id", type=uuid_value, required=True, nullable=False, location="json") + .add_argument("name", type=alphanumeric, required=True, nullable=False, location="json") + .add_argument("label", type=str, required=True, nullable=False, location="json") + .add_argument("description", type=str, required=True, nullable=False, location="json") + .add_argument("icon", type=dict, required=True, nullable=False, location="json") + .add_argument("parameters", type=list[dict], required=True, nullable=False, location="json") + .add_argument("privacy_policy", type=str, required=False, nullable=True, location="json", default="") + .add_argument("labels", type=list[str], required=False, nullable=True, location="json") +) + + @console_ns.route("/workspaces/current/tool-provider/workflow/update") class ToolWorkflowProviderUpdateApi(Resource): + @api.expect(parser_workflow_update) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self): user, tenant_id = current_account_with_tenant() - - if not user.is_admin_or_owner: - raise Forbidden() - user_id = user.id - reqparser = ( - reqparse.RequestParser() - .add_argument("workflow_tool_id", type=uuid_value, required=True, nullable=False, location="json") - .add_argument("name", type=alphanumeric, required=True, nullable=False, location="json") - .add_argument("label", type=str, required=True, nullable=False, location="json") - .add_argument("description", type=str, required=True, nullable=False, location="json") - .add_argument("icon", type=dict, required=True, nullable=False, location="json") - .add_argument("parameters", type=list[dict], required=True, nullable=False, location="json") - .add_argument("privacy_policy", type=str, required=False, nullable=True, location="json", default="") - .add_argument("labels", type=list[str], required=False, nullable=True, location="json") - ) - - args = reqparser.parse_args() + args = parser_workflow_update.parse_args() if not args["workflow_tool_id"]: raise ValueError("incorrect workflow_tool_id") @@ -534,24 +553,24 @@ class ToolWorkflowProviderUpdateApi(Resource): ) +parser_workflow_delete = reqparse.RequestParser().add_argument( + "workflow_tool_id", type=uuid_value, required=True, nullable=False, location="json" +) + + @console_ns.route("/workspaces/current/tool-provider/workflow/delete") class ToolWorkflowProviderDeleteApi(Resource): + @api.expect(parser_workflow_delete) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self): user, tenant_id = current_account_with_tenant() - if not user.is_admin_or_owner: - raise Forbidden() - user_id = user.id - reqparser = reqparse.RequestParser().add_argument( - "workflow_tool_id", type=uuid_value, required=True, nullable=False, location="json" - ) - - args = reqparser.parse_args() + args = parser_workflow_delete.parse_args() return WorkflowToolManageService.delete_workflow_tool( user_id, @@ -560,8 +579,16 @@ class ToolWorkflowProviderDeleteApi(Resource): ) +parser_wf_get = ( + reqparse.RequestParser() + .add_argument("workflow_tool_id", type=uuid_value, required=False, nullable=True, location="args") + .add_argument("workflow_app_id", type=uuid_value, required=False, nullable=True, location="args") +) + + @console_ns.route("/workspaces/current/tool-provider/workflow/get") class ToolWorkflowProviderGetApi(Resource): + @api.expect(parser_wf_get) @setup_required @login_required @account_initialization_required @@ -570,13 +597,7 @@ class ToolWorkflowProviderGetApi(Resource): user_id = user.id - parser = ( - reqparse.RequestParser() - .add_argument("workflow_tool_id", type=uuid_value, required=False, nullable=True, location="args") - .add_argument("workflow_app_id", type=uuid_value, required=False, nullable=True, location="args") - ) - - args = parser.parse_args() + args = parser_wf_get.parse_args() if args.get("workflow_tool_id"): tool = WorkflowToolManageService.get_workflow_tool_by_tool_id( @@ -596,8 +617,14 @@ class ToolWorkflowProviderGetApi(Resource): return jsonable_encoder(tool) +parser_wf_tools = reqparse.RequestParser().add_argument( + "workflow_tool_id", type=uuid_value, required=True, nullable=False, location="args" +) + + @console_ns.route("/workspaces/current/tool-provider/workflow/tools") class ToolWorkflowProviderListToolApi(Resource): + @api.expect(parser_wf_tools) @setup_required @login_required @account_initialization_required @@ -606,11 +633,7 @@ class ToolWorkflowProviderListToolApi(Resource): user_id = user.id - parser = reqparse.RequestParser().add_argument( - "workflow_tool_id", type=uuid_value, required=True, nullable=False, location="args" - ) - - args = parser.parse_args() + args = parser_wf_tools.parse_args() return jsonable_encoder( WorkflowToolManageService.list_single_workflow_tools( @@ -695,18 +718,15 @@ class ToolLabelsApi(Resource): class ToolPluginOAuthApi(Resource): @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def get(self, provider): tool_provider = ToolProviderID(provider) plugin_id = tool_provider.plugin_id provider_name = tool_provider.provider_name - # todo check permission user, tenant_id = current_account_with_tenant() - if not user.is_admin_or_owner: - raise Forbidden() - oauth_client_params = BuiltinToolManageService.get_oauth_client(tenant_id=tenant_id, provider=provider) if oauth_client_params is None: raise Forbidden("no oauth available client config found for this tool provider") @@ -786,37 +806,43 @@ class ToolOAuthCallback(Resource): return redirect(f"{dify_config.CONSOLE_WEB_URL}/oauth-callback") +parser_default_cred = reqparse.RequestParser().add_argument( + "id", type=str, required=True, nullable=False, location="json" +) + + @console_ns.route("/workspaces/current/tool-provider/builtin//default-credential") class ToolBuiltinProviderSetDefaultApi(Resource): + @api.expect(parser_default_cred) @setup_required @login_required @account_initialization_required def post(self, provider): current_user, current_tenant_id = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument("id", type=str, required=True, nullable=False, location="json") - args = parser.parse_args() + args = parser_default_cred.parse_args() return BuiltinToolManageService.set_default_provider( tenant_id=current_tenant_id, user_id=current_user.id, provider=provider, id=args["id"] ) +parser_custom = ( + reqparse.RequestParser() + .add_argument("client_params", type=dict, required=False, nullable=True, location="json") + .add_argument("enable_oauth_custom_client", type=bool, required=False, nullable=True, location="json") +) + + @console_ns.route("/workspaces/current/tool-provider/builtin//oauth/custom-client") class ToolOAuthCustomClient(Resource): + @api.expect(parser_custom) @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required - def post(self, provider): - parser = ( - reqparse.RequestParser() - .add_argument("client_params", type=dict, required=False, nullable=True, location="json") - .add_argument("enable_oauth_custom_client", type=bool, required=False, nullable=True, location="json") - ) - args = parser.parse_args() + def post(self, provider: str): + args = parser_custom.parse_args() - user, tenant_id = current_account_with_tenant() - - if not user.is_admin_or_owner: - raise Forbidden() + _, tenant_id = current_account_with_tenant() return BuiltinToolManageService.save_custom_oauth_client_params( tenant_id=tenant_id, @@ -874,141 +900,191 @@ class ToolBuiltinProviderGetCredentialInfoApi(Resource): ) +parser_mcp = ( + reqparse.RequestParser() + .add_argument("server_url", type=str, required=True, nullable=False, location="json") + .add_argument("name", type=str, required=True, nullable=False, location="json") + .add_argument("icon", type=str, required=True, nullable=False, location="json") + .add_argument("icon_type", type=str, required=True, nullable=False, location="json") + .add_argument("icon_background", type=str, required=False, nullable=True, location="json", default="") + .add_argument("server_identifier", type=str, required=True, nullable=False, location="json") + .add_argument("configuration", type=dict, required=False, nullable=True, location="json", default={}) + .add_argument("headers", type=dict, required=False, nullable=True, location="json", default={}) + .add_argument("authentication", type=dict, required=False, nullable=True, location="json", default={}) +) +parser_mcp_put = ( + reqparse.RequestParser() + .add_argument("server_url", type=str, required=True, nullable=False, location="json") + .add_argument("name", type=str, required=True, nullable=False, location="json") + .add_argument("icon", type=str, required=True, nullable=False, location="json") + .add_argument("icon_type", type=str, required=True, nullable=False, location="json") + .add_argument("icon_background", type=str, required=False, nullable=True, location="json") + .add_argument("provider_id", type=str, required=True, nullable=False, location="json") + .add_argument("server_identifier", type=str, required=True, nullable=False, location="json") + .add_argument("configuration", type=dict, required=False, nullable=True, location="json", default={}) + .add_argument("headers", type=dict, required=False, nullable=True, location="json", default={}) + .add_argument("authentication", type=dict, required=False, nullable=True, location="json", default={}) +) +parser_mcp_delete = reqparse.RequestParser().add_argument( + "provider_id", type=str, required=True, nullable=False, location="json" +) + + @console_ns.route("/workspaces/current/tool-provider/mcp") class ToolProviderMCPApi(Resource): + @api.expect(parser_mcp) @setup_required @login_required @account_initialization_required def post(self): - parser = ( - reqparse.RequestParser() - .add_argument("server_url", type=str, required=True, nullable=False, location="json") - .add_argument("name", type=str, required=True, nullable=False, location="json") - .add_argument("icon", type=str, required=True, nullable=False, location="json") - .add_argument("icon_type", type=str, required=True, nullable=False, location="json") - .add_argument("icon_background", type=str, required=False, nullable=True, location="json", default="") - .add_argument("server_identifier", type=str, required=True, nullable=False, location="json") - .add_argument("timeout", type=float, required=False, nullable=False, location="json", default=30) - .add_argument("sse_read_timeout", type=float, required=False, nullable=False, location="json", default=300) - .add_argument("headers", type=dict, required=False, nullable=True, location="json", default={}) - ) - args = parser.parse_args() + args = parser_mcp.parse_args() user, tenant_id = current_account_with_tenant() - if not is_valid_url(args["server_url"]): - raise ValueError("Server URL is not valid.") - return jsonable_encoder( - MCPToolManageService.create_mcp_provider( + + # Parse and validate models + configuration = MCPConfiguration.model_validate(args["configuration"]) + authentication = MCPAuthentication.model_validate(args["authentication"]) if args["authentication"] else None + + # Create provider + with Session(db.engine) as session, session.begin(): + service = MCPToolManageService(session=session) + result = service.create_provider( tenant_id=tenant_id, + user_id=user.id, server_url=args["server_url"], name=args["name"], icon=args["icon"], icon_type=args["icon_type"], icon_background=args["icon_background"], - user_id=user.id, server_identifier=args["server_identifier"], - timeout=args["timeout"], - sse_read_timeout=args["sse_read_timeout"], headers=args["headers"], + configuration=configuration, + authentication=authentication, ) - ) + return jsonable_encoder(result) + @api.expect(parser_mcp_put) @setup_required @login_required @account_initialization_required def put(self): - parser = ( - reqparse.RequestParser() - .add_argument("server_url", type=str, required=True, nullable=False, location="json") - .add_argument("name", type=str, required=True, nullable=False, location="json") - .add_argument("icon", type=str, required=True, nullable=False, location="json") - .add_argument("icon_type", type=str, required=True, nullable=False, location="json") - .add_argument("icon_background", type=str, required=False, nullable=True, location="json") - .add_argument("provider_id", type=str, required=True, nullable=False, location="json") - .add_argument("server_identifier", type=str, required=True, nullable=False, location="json") - .add_argument("timeout", type=float, required=False, nullable=True, location="json") - .add_argument("sse_read_timeout", type=float, required=False, nullable=True, location="json") - .add_argument("headers", type=dict, required=False, nullable=True, location="json") - ) - args = parser.parse_args() - if not is_valid_url(args["server_url"]): - if "[__HIDDEN__]" in args["server_url"]: - pass - else: - raise ValueError("Server URL is not valid.") + args = parser_mcp_put.parse_args() + configuration = MCPConfiguration.model_validate(args["configuration"]) + authentication = MCPAuthentication.model_validate(args["authentication"]) if args["authentication"] else None _, current_tenant_id = current_account_with_tenant() - MCPToolManageService.update_mcp_provider( - tenant_id=current_tenant_id, - provider_id=args["provider_id"], - server_url=args["server_url"], - name=args["name"], - icon=args["icon"], - icon_type=args["icon_type"], - icon_background=args["icon_background"], - server_identifier=args["server_identifier"], - timeout=args.get("timeout"), - sse_read_timeout=args.get("sse_read_timeout"), - headers=args.get("headers"), - ) - return {"result": "success"} + # Step 1: Validate server URL change if needed (includes URL format validation and network operation) + validation_result = None + with Session(db.engine) as session: + service = MCPToolManageService(session=session) + validation_result = service.validate_server_url_change( + tenant_id=current_tenant_id, provider_id=args["provider_id"], new_server_url=args["server_url"] + ) + + # No need to check for errors here, exceptions will be raised directly + + # Step 2: Perform database update in a transaction + with Session(db.engine) as session, session.begin(): + service = MCPToolManageService(session=session) + service.update_provider( + tenant_id=current_tenant_id, + provider_id=args["provider_id"], + server_url=args["server_url"], + name=args["name"], + icon=args["icon"], + icon_type=args["icon_type"], + icon_background=args["icon_background"], + server_identifier=args["server_identifier"], + headers=args["headers"], + configuration=configuration, + authentication=authentication, + validation_result=validation_result, + ) + return {"result": "success"} + + @api.expect(parser_mcp_delete) @setup_required @login_required @account_initialization_required def delete(self): - parser = reqparse.RequestParser().add_argument( - "provider_id", type=str, required=True, nullable=False, location="json" - ) - args = parser.parse_args() + args = parser_mcp_delete.parse_args() _, current_tenant_id = current_account_with_tenant() - MCPToolManageService.delete_mcp_tool(tenant_id=current_tenant_id, provider_id=args["provider_id"]) - return {"result": "success"} + + with Session(db.engine) as session, session.begin(): + service = MCPToolManageService(session=session) + service.delete_provider(tenant_id=current_tenant_id, provider_id=args["provider_id"]) + return {"result": "success"} + + +parser_auth = ( + reqparse.RequestParser() + .add_argument("provider_id", type=str, required=True, nullable=False, location="json") + .add_argument("authorization_code", type=str, required=False, nullable=True, location="json") +) @console_ns.route("/workspaces/current/tool-provider/mcp/auth") class ToolMCPAuthApi(Resource): + @api.expect(parser_auth) @setup_required @login_required @account_initialization_required def post(self): - parser = ( - reqparse.RequestParser() - .add_argument("provider_id", type=str, required=True, nullable=False, location="json") - .add_argument("authorization_code", type=str, required=False, nullable=True, location="json") - ) - args = parser.parse_args() + args = parser_auth.parse_args() provider_id = args["provider_id"] _, tenant_id = current_account_with_tenant() - provider = MCPToolManageService.get_mcp_provider_by_provider_id(provider_id, tenant_id) - if not provider: - raise ValueError("provider not found") - try: - with MCPClient( - provider.decrypted_server_url, - provider_id, - tenant_id, - authed=False, - authorization_code=args["authorization_code"], - for_list=True, - headers=provider.decrypted_headers, - timeout=provider.timeout, - sse_read_timeout=provider.sse_read_timeout, - ): - MCPToolManageService.update_mcp_provider_credentials( - mcp_provider=provider, - credentials=provider.decrypted_credentials, - authed=True, - ) - return {"result": "success"} - except MCPAuthError: - auth_provider = OAuthClientProvider(provider_id, tenant_id, for_list=True) - return auth(auth_provider, provider.decrypted_server_url, args["authorization_code"]) - except MCPError as e: - MCPToolManageService.update_mcp_provider_credentials( - mcp_provider=provider, - credentials={}, - authed=False, - ) + with Session(db.engine) as session, session.begin(): + service = MCPToolManageService(session=session) + db_provider = service.get_provider(provider_id=provider_id, tenant_id=tenant_id) + if not db_provider: + raise ValueError("provider not found") + + # Convert to entity + provider_entity = db_provider.to_entity() + server_url = provider_entity.decrypt_server_url() + headers = provider_entity.decrypt_authentication() + + # Try to connect without active transaction + try: + # Use MCPClientWithAuthRetry to handle authentication automatically + with MCPClient( + server_url=server_url, + headers=headers, + timeout=provider_entity.timeout, + sse_read_timeout=provider_entity.sse_read_timeout, + ): + # Update credentials in new transaction + with Session(db.engine) as session, session.begin(): + service = MCPToolManageService(session=session) + service.update_provider_credentials( + provider_id=provider_id, + tenant_id=tenant_id, + credentials=provider_entity.credentials, + authed=True, + ) + return {"result": "success"} + except MCPAuthError as e: + try: + # Pass the extracted OAuth metadata hints to auth() + auth_result = auth( + provider_entity, + args.get("authorization_code"), + resource_metadata_url=e.resource_metadata_url, + scope_hint=e.scope_hint, + ) + with Session(db.engine) as session, session.begin(): + service = MCPToolManageService(session=session) + response = service.execute_auth_actions(auth_result) + return response + except MCPRefreshTokenError as e: + with Session(db.engine) as session, session.begin(): + service = MCPToolManageService(session=session) + service.clear_provider_credentials(provider_id=provider_id, tenant_id=tenant_id) + raise ValueError(f"Failed to refresh token, please try to authorize again: {e}") from e + except (MCPError, ValueError) as e: + with Session(db.engine) as session, session.begin(): + service = MCPToolManageService(session=session) + service.clear_provider_credentials(provider_id=provider_id, tenant_id=tenant_id) raise ValueError(f"Failed to connect to MCP server: {e}") from e @@ -1019,8 +1095,10 @@ class ToolMCPDetailApi(Resource): @account_initialization_required def get(self, provider_id): _, tenant_id = current_account_with_tenant() - provider = MCPToolManageService.get_mcp_provider_by_provider_id(provider_id, tenant_id) - return jsonable_encoder(ToolTransformService.mcp_provider_to_user_provider(provider, for_list=True)) + with Session(db.engine) as session, session.begin(): + service = MCPToolManageService(session=session) + provider = service.get_provider(provider_id=provider_id, tenant_id=tenant_id) + return jsonable_encoder(ToolTransformService.mcp_provider_to_user_provider(provider, for_list=True)) @console_ns.route("/workspaces/current/tools/mcp") @@ -1031,9 +1109,12 @@ class ToolMCPListAllApi(Resource): def get(self): _, tenant_id = current_account_with_tenant() - tools = MCPToolManageService.retrieve_mcp_tools(tenant_id=tenant_id) + with Session(db.engine) as session, session.begin(): + service = MCPToolManageService(session=session) + # Skip sensitive data decryption for list view to improve performance + tools = service.list_providers(tenant_id=tenant_id, include_sensitive=False) - return [tool.to_dict() for tool in tools] + return [tool.to_dict() for tool in tools] @console_ns.route("/workspaces/current/tool-provider/mcp/update/") @@ -1043,23 +1124,38 @@ class ToolMCPUpdateApi(Resource): @account_initialization_required def get(self, provider_id): _, tenant_id = current_account_with_tenant() - tools = MCPToolManageService.list_mcp_tool_from_remote_server( - tenant_id=tenant_id, - provider_id=provider_id, - ) - return jsonable_encoder(tools) + with Session(db.engine) as session, session.begin(): + service = MCPToolManageService(session=session) + tools = service.list_provider_tools( + tenant_id=tenant_id, + provider_id=provider_id, + ) + return jsonable_encoder(tools) + + +parser_cb = ( + reqparse.RequestParser() + .add_argument("code", type=str, required=True, nullable=False, location="args") + .add_argument("state", type=str, required=True, nullable=False, location="args") +) @console_ns.route("/mcp/oauth/callback") class ToolMCPCallbackApi(Resource): + @api.expect(parser_cb) def get(self): - parser = ( - reqparse.RequestParser() - .add_argument("code", type=str, required=True, nullable=False, location="args") - .add_argument("state", type=str, required=True, nullable=False, location="args") - ) - args = parser.parse_args() + args = parser_cb.parse_args() state_key = args["state"] authorization_code = args["code"] - handle_callback(state_key, authorization_code) + + # Create service instance for handle_callback + with Session(db.engine) as session, session.begin(): + mcp_service = MCPToolManageService(session=session) + # handle_callback now returns state data and tokens + state_data, tokens = handle_callback(state_key, authorization_code) + # Save tokens using the service layer + mcp_service.save_oauth_data( + state_data.provider_id, state_data.tenant_id, tokens.model_dump(), OAuthDataType.TOKENS + ) + return redirect(f"{dify_config.CONSOLE_WEB_URL}/oauth-callback") diff --git a/api/controllers/console/workspace/trigger_providers.py b/api/controllers/console/workspace/trigger_providers.py index 631b82979b..b2abae0b3d 100644 --- a/api/controllers/console/workspace/trigger_providers.py +++ b/api/controllers/console/workspace/trigger_providers.py @@ -7,7 +7,7 @@ from werkzeug.exceptions import BadRequest, Forbidden from configs import dify_config from controllers.console import api -from controllers.console.wraps import account_initialization_required, setup_required +from controllers.console.wraps import account_initialization_required, is_admin_or_owner_required, setup_required from controllers.web.error import NotFoundError from core.model_runtime.utils.encoders import jsonable_encoder from core.plugin.entities.plugin_daemon import CredentialType @@ -20,8 +20,8 @@ from models.account import Account from models.provider_ids import TriggerProviderID from services.plugin.oauth_service import OAuthProxyService from services.trigger.trigger_provider_service import TriggerProviderService -from services.trigger.trigger_service import TriggerService from services.trigger.trigger_subscription_builder_service import TriggerSubscriptionBuilderService +from services.trigger.trigger_subscription_operator_service import TriggerSubscriptionOperatorService logger = logging.getLogger(__name__) @@ -67,14 +67,12 @@ class TriggerProviderInfoApi(Resource): class TriggerSubscriptionListApi(Resource): @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def get(self, provider): """List all trigger subscriptions for the current tenant's provider""" user = current_user - assert isinstance(user, Account) assert user.current_tenant_id is not None - if not user.is_admin_or_owner: - raise Forbidden() try: return jsonable_encoder( @@ -92,17 +90,16 @@ class TriggerSubscriptionListApi(Resource): class TriggerSubscriptionBuilderCreateApi(Resource): @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self, provider): """Add a new subscription instance for a trigger provider""" user = current_user - assert isinstance(user, Account) assert user.current_tenant_id is not None - if not user.is_admin_or_owner: - raise Forbidden() - parser = reqparse.RequestParser() - parser.add_argument("credential_type", type=str, required=False, nullable=True, location="json") + parser = reqparse.RequestParser().add_argument( + "credential_type", type=str, required=False, nullable=True, location="json" + ) args = parser.parse_args() try: @@ -133,18 +130,17 @@ class TriggerSubscriptionBuilderGetApi(Resource): class TriggerSubscriptionBuilderVerifyApi(Resource): @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self, provider, subscription_builder_id): """Verify a subscription instance for a trigger provider""" user = current_user - assert isinstance(user, Account) assert user.current_tenant_id is not None - if not user.is_admin_or_owner: - raise Forbidden() - - parser = reqparse.RequestParser() - # The credentials of the subscription builder - parser.add_argument("credentials", type=dict, required=False, nullable=True, location="json") + parser = ( + reqparse.RequestParser() + # The credentials of the subscription builder + .add_argument("credentials", type=dict, required=False, nullable=True, location="json") + ) args = parser.parse_args() try: @@ -173,15 +169,17 @@ class TriggerSubscriptionBuilderUpdateApi(Resource): assert isinstance(user, Account) assert user.current_tenant_id is not None - parser = reqparse.RequestParser() - # The name of the subscription builder - parser.add_argument("name", type=str, required=False, nullable=True, location="json") - # The parameters of the subscription builder - parser.add_argument("parameters", type=dict, required=False, nullable=True, location="json") - # The properties of the subscription builder - parser.add_argument("properties", type=dict, required=False, nullable=True, location="json") - # The credentials of the subscription builder - parser.add_argument("credentials", type=dict, required=False, nullable=True, location="json") + parser = ( + reqparse.RequestParser() + # The name of the subscription builder + .add_argument("name", type=str, required=False, nullable=True, location="json") + # The parameters of the subscription builder + .add_argument("parameters", type=dict, required=False, nullable=True, location="json") + # The properties of the subscription builder + .add_argument("properties", type=dict, required=False, nullable=True, location="json") + # The credentials of the subscription builder + .add_argument("credentials", type=dict, required=False, nullable=True, location="json") + ) args = parser.parse_args() try: return jsonable_encoder( @@ -223,24 +221,23 @@ class TriggerSubscriptionBuilderLogsApi(Resource): class TriggerSubscriptionBuilderBuildApi(Resource): @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self, provider, subscription_builder_id): """Build a subscription instance for a trigger provider""" user = current_user - assert isinstance(user, Account) assert user.current_tenant_id is not None - if not user.is_admin_or_owner: - raise Forbidden() - - parser = reqparse.RequestParser() - # The name of the subscription builder - parser.add_argument("name", type=str, required=False, nullable=True, location="json") - # The parameters of the subscription builder - parser.add_argument("parameters", type=dict, required=False, nullable=True, location="json") - # The properties of the subscription builder - parser.add_argument("properties", type=dict, required=False, nullable=True, location="json") - # The credentials of the subscription builder - parser.add_argument("credentials", type=dict, required=False, nullable=True, location="json") + parser = ( + reqparse.RequestParser() + # The name of the subscription builder + .add_argument("name", type=str, required=False, nullable=True, location="json") + # The parameters of the subscription builder + .add_argument("parameters", type=dict, required=False, nullable=True, location="json") + # The properties of the subscription builder + .add_argument("properties", type=dict, required=False, nullable=True, location="json") + # The credentials of the subscription builder + .add_argument("credentials", type=dict, required=False, nullable=True, location="json") + ) args = parser.parse_args() try: # Use atomic update_and_build to prevent race conditions @@ -264,14 +261,12 @@ class TriggerSubscriptionBuilderBuildApi(Resource): class TriggerSubscriptionDeleteApi(Resource): @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required - def post(self, subscription_id): + def post(self, subscription_id: str): """Delete a subscription instance""" user = current_user - assert isinstance(user, Account) assert user.current_tenant_id is not None - if not user.is_admin_or_owner: - raise Forbidden() try: with Session(db.engine) as session: @@ -282,7 +277,7 @@ class TriggerSubscriptionDeleteApi(Resource): subscription_id=subscription_id, ) # Delete plugin triggers - TriggerService.delete_plugin_trigger_by_subscription( + TriggerSubscriptionOperatorService.delete_plugin_trigger_by_subscription( session=session, tenant_id=user.current_tenant_id, subscription_id=subscription_id, @@ -427,7 +422,7 @@ class TriggerOAuthCallbackApi(Resource): expires_at = credentials_response.expires_at if not credentials: - raise Exception("Failed to get OAuth credentials") + raise ValueError("Failed to get OAuth credentials from the provider.") # Update subscription builder TriggerSubscriptionBuilderService.update_trigger_subscription_builder( @@ -446,14 +441,12 @@ class TriggerOAuthCallbackApi(Resource): class TriggerOAuthClientManageApi(Resource): @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def get(self, provider): """Get OAuth client configuration for a provider""" user = current_user - assert isinstance(user, Account) assert user.current_tenant_id is not None - if not user.is_admin_or_owner: - raise Forbidden() try: provider_id = TriggerProviderID(provider) @@ -469,9 +462,7 @@ class TriggerOAuthClientManageApi(Resource): tenant_id=user.current_tenant_id, provider_id=provider_id, ) - - # Check if there's a system OAuth client - system_client = TriggerProviderService.get_oauth_client( + system_client_exists = TriggerProviderService.is_oauth_system_client_exists( tenant_id=user.current_tenant_id, provider_id=provider_id, ) @@ -479,8 +470,8 @@ class TriggerOAuthClientManageApi(Resource): redirect_uri = f"{dify_config.CONSOLE_API_URL}/console/api/oauth/plugin/{provider}/trigger/callback" return jsonable_encoder( { - "configured": bool(custom_params or system_client), - "system_configured": bool(system_client), + "configured": bool(custom_params or system_client_exists), + "system_configured": system_client_exists, "custom_configured": bool(custom_params), "oauth_client_schema": provider_controller.get_oauth_client_schema(), "custom_enabled": is_custom_enabled, @@ -495,18 +486,18 @@ class TriggerOAuthClientManageApi(Resource): @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def post(self, provider): """Configure custom OAuth client for a provider""" user = current_user - assert isinstance(user, Account) assert user.current_tenant_id is not None - if not user.is_admin_or_owner: - raise Forbidden() - parser = reqparse.RequestParser() - parser.add_argument("client_params", type=dict, required=False, nullable=True, location="json") - parser.add_argument("enabled", type=bool, required=False, nullable=True, location="json") + parser = ( + reqparse.RequestParser() + .add_argument("client_params", type=dict, required=False, nullable=True, location="json") + .add_argument("enabled", type=bool, required=False, nullable=True, location="json") + ) args = parser.parse_args() try: @@ -526,14 +517,12 @@ class TriggerOAuthClientManageApi(Resource): @setup_required @login_required + @is_admin_or_owner_required @account_initialization_required def delete(self, provider): """Remove custom OAuth client configuration""" user = current_user - assert isinstance(user, Account) assert user.current_tenant_id is not None - if not user.is_admin_or_owner: - raise Forbidden() try: provider_id = TriggerProviderID(provider) diff --git a/api/controllers/console/workspace/workspace.py b/api/controllers/console/workspace/workspace.py index f9856df9ea..1548a18b90 100644 --- a/api/controllers/console/workspace/workspace.py +++ b/api/controllers/console/workspace/workspace.py @@ -13,7 +13,7 @@ from controllers.common.errors import ( TooManyFilesError, UnsupportedFileTypeError, ) -from controllers.console import console_ns +from controllers.console import api, console_ns from controllers.console.admin import admin_required from controllers.console.error import AccountNotLinkTenantError from controllers.console.wraps import ( @@ -21,6 +21,7 @@ from controllers.console.wraps import ( cloud_edition_billing_resource_check, setup_required, ) +from enums.cloud_plan import CloudPlan from extensions.ext_database import db from libs.helper import TimestampField from libs.login import current_account_with_tenant, login_required @@ -83,7 +84,7 @@ class TenantListApi(Resource): "name": tenant.name, "status": tenant.status, "created_at": tenant.created_at, - "plan": features.billing.subscription.plan if features.billing.enabled else "sandbox", + "plan": features.billing.subscription.plan if features.billing.enabled else CloudPlan.SANDBOX, "current": tenant.id == current_tenant_id if current_tenant_id else False, } @@ -127,7 +128,7 @@ class TenantApi(Resource): @login_required @account_initialization_required @marshal_with(tenant_fields) - def get(self): + def post(self): if request.path == "/info": logger.warning("Deprecated URL /info was used.") @@ -149,15 +150,18 @@ class TenantApi(Resource): return WorkspaceService.get_tenant_info(tenant), 200 +parser_switch = reqparse.RequestParser().add_argument("tenant_id", type=str, required=True, location="json") + + @console_ns.route("/workspaces/switch") class SwitchWorkspaceApi(Resource): + @api.expect(parser_switch) @setup_required @login_required @account_initialization_required def post(self): current_user, _ = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument("tenant_id", type=str, required=True, location="json") - args = parser.parse_args() + args = parser_switch.parse_args() # check if tenant_id is valid, 403 if not try: @@ -241,16 +245,19 @@ class WebappLogoWorkspaceApi(Resource): return {"id": upload_file.id}, 201 +parser_info = reqparse.RequestParser().add_argument("name", type=str, required=True, location="json") + + @console_ns.route("/workspaces/info") class WorkspaceInfoApi(Resource): + @api.expect(parser_info) @setup_required @login_required @account_initialization_required # Change workspace name def post(self): _, current_tenant_id = current_account_with_tenant() - parser = reqparse.RequestParser().add_argument("name", type=str, required=True, location="json") - args = parser.parse_args() + args = parser_info.parse_args() if not current_tenant_id: raise ValueError("No current tenant") diff --git a/api/controllers/console/wraps.py b/api/controllers/console/wraps.py index 8572a6dc9b..f40f566a36 100644 --- a/api/controllers/console/wraps.py +++ b/api/controllers/console/wraps.py @@ -10,6 +10,7 @@ from flask import abort, request from configs import dify_config from controllers.console.workspace.error import AccountNotInitializedError +from enums.cloud_plan import CloudPlan from extensions.ext_database import db from extensions.ext_redis import redis_client from libs.login import current_account_with_tenant @@ -133,7 +134,7 @@ def cloud_edition_billing_knowledge_limit_check(resource: str): features = FeatureService.get_features(current_tenant_id) if features.billing.enabled: if resource == "add_segment": - if features.billing.subscription.plan == "sandbox": + if features.billing.subscription.plan == CloudPlan.SANDBOX: abort( 403, "To unlock this feature and elevate your Dify experience, please upgrade to a paid plan.", @@ -314,3 +315,19 @@ def edit_permission_required(f: Callable[P, R]): return f(*args, **kwargs) return decorated_function + + +def is_admin_or_owner_required(f: Callable[P, R]): + @wraps(f) + def decorated_function(*args: P.args, **kwargs: P.kwargs): + from werkzeug.exceptions import Forbidden + + from libs.login import current_user + from models import Account + + user = current_user._get_current_object() + if not isinstance(user, Account) or not user.is_admin_or_owner: + raise Forbidden() + return f(*args, **kwargs) + + return decorated_function diff --git a/api/controllers/service_api/app/annotation.py b/api/controllers/service_api/app/annotation.py index ed013b1674..f26718555a 100644 --- a/api/controllers/service_api/app/annotation.py +++ b/api/controllers/service_api/app/annotation.py @@ -3,14 +3,12 @@ from typing import Literal from flask import request from flask_restx import Api, Namespace, Resource, fields, reqparse from flask_restx.api import HTTPStatus -from werkzeug.exceptions import Forbidden +from controllers.console.wraps import edit_permission_required from controllers.service_api import service_api_ns from controllers.service_api.wraps import validate_app_token from extensions.ext_redis import redis_client from fields.annotation_fields import annotation_fields, build_annotation_model -from libs.login import current_user -from models import Account from models.model import App from services.annotation_service import AppAnnotationService @@ -161,14 +159,10 @@ class AnnotationUpdateDeleteApi(Resource): } ) @validate_app_token + @edit_permission_required @service_api_ns.marshal_with(build_annotation_model(service_api_ns)) - def put(self, app_model: App, annotation_id): + def put(self, app_model: App, annotation_id: str): """Update an existing annotation.""" - assert isinstance(current_user, Account) - if not current_user.has_edit_permission: - raise Forbidden() - - annotation_id = str(annotation_id) args = annotation_create_parser.parse_args() annotation = AppAnnotationService.update_app_annotation_directly(args, app_model.id, annotation_id) return annotation @@ -185,13 +179,8 @@ class AnnotationUpdateDeleteApi(Resource): } ) @validate_app_token - def delete(self, app_model: App, annotation_id): + @edit_permission_required + def delete(self, app_model: App, annotation_id: str): """Delete an annotation.""" - assert isinstance(current_user, Account) - - if not current_user.has_edit_permission: - raise Forbidden() - - annotation_id = str(annotation_id) AppAnnotationService.delete_app_annotation(app_model.id, annotation_id) return {"result": "success"}, 204 diff --git a/api/controllers/service_api/dataset/dataset.py b/api/controllers/service_api/dataset/dataset.py index 9d5566919b..4cca3e6ce8 100644 --- a/api/controllers/service_api/dataset/dataset.py +++ b/api/controllers/service_api/dataset/dataset.py @@ -5,6 +5,7 @@ from flask_restx import marshal, reqparse from werkzeug.exceptions import Forbidden, NotFound import services +from controllers.console.wraps import edit_permission_required from controllers.service_api import service_api_ns from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError, InvalidActionError from controllers.service_api.wraps import ( @@ -619,11 +620,9 @@ class DatasetTagsApi(DatasetApiResource): } ) @validate_dataset_token + @edit_permission_required def delete(self, _, dataset_id): """Delete a knowledge type tag.""" - assert isinstance(current_user, Account) - if not current_user.has_edit_permission: - raise Forbidden() args = tag_delete_parser.parse_args() TagService.delete_tag(args["tag_id"]) diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index 893cd7c923..ed47e706b6 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -1,7 +1,10 @@ import json +from typing import Self +from uuid import UUID from flask import request from flask_restx import marshal, reqparse +from pydantic import BaseModel, model_validator from sqlalchemy import desc, select from werkzeug.exceptions import Forbidden, NotFound @@ -31,7 +34,7 @@ from fields.document_fields import document_fields, document_status_fields from libs.login import current_user from models.dataset import Dataset, Document, DocumentSegment from services.dataset_service import DatasetService, DocumentService -from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig +from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig, ProcessRule, RetrievalModel from services.file_service import FileService # Define parsers for document operations @@ -51,15 +54,26 @@ document_text_create_parser = ( .add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json") ) -document_text_update_parser = ( - reqparse.RequestParser() - .add_argument("name", type=str, required=False, nullable=True, location="json") - .add_argument("text", type=str, required=False, nullable=True, location="json") - .add_argument("process_rule", type=dict, required=False, nullable=True, location="json") - .add_argument("doc_form", type=str, default="text_model", required=False, nullable=False, location="json") - .add_argument("doc_language", type=str, default="English", required=False, nullable=False, location="json") - .add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json") -) +DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}" + + +class DocumentTextUpdate(BaseModel): + name: str | None = None + text: str | None = None + process_rule: ProcessRule | None = None + doc_form: str = "text_model" + doc_language: str = "English" + retrieval_model: RetrievalModel | None = None + + @model_validator(mode="after") + def check_text_and_name(self) -> Self: + if self.text is not None and self.name is None: + raise ValueError("name is required when text is provided") + return self + + +for m in [ProcessRule, RetrievalModel, DocumentTextUpdate]: + service_api_ns.schema_model(m.__name__, m.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)) # type: ignore @service_api_ns.route( @@ -160,7 +174,7 @@ class DocumentAddByTextApi(DatasetApiResource): class DocumentUpdateByTextApi(DatasetApiResource): """Resource for update documents.""" - @service_api_ns.expect(document_text_update_parser) + @service_api_ns.expect(service_api_ns.models[DocumentTextUpdate.__name__], validate=True) @service_api_ns.doc("update_document_by_text") @service_api_ns.doc(description="Update an existing document by providing text content") @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) @@ -173,12 +187,10 @@ class DocumentUpdateByTextApi(DatasetApiResource): ) @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_rate_limit_check("knowledge", "dataset") - def post(self, tenant_id, dataset_id, document_id): + def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID): """Update document by text.""" - args = document_text_update_parser.parse_args() - dataset_id = str(dataset_id) - tenant_id = str(tenant_id) - dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() + args = DocumentTextUpdate.model_validate(service_api_ns.payload).model_dump(exclude_unset=True) + dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == str(dataset_id)).first() if not dataset: raise ValueError("Dataset does not exist.") @@ -198,11 +210,9 @@ class DocumentUpdateByTextApi(DatasetApiResource): # indexing_technique is already set in dataset since this is an update args["indexing_technique"] = dataset.indexing_technique - if args["text"]: + if args.get("text"): text = args.get("text") name = args.get("name") - if text is None or name is None: - raise ValueError("Both text and name must be strings.") if not current_user: raise ValueError("current_user is required") upload_file = FileService(db.engine).upload_text( @@ -456,12 +466,16 @@ class DocumentListApi(DatasetApiResource): page = request.args.get("page", default=1, type=int) limit = request.args.get("limit", default=20, type=int) search = request.args.get("keyword", default=None, type=str) + status = request.args.get("status", default=None, type=str) dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() if not dataset: raise NotFound("Dataset not found.") query = select(Document).filter_by(dataset_id=str(dataset_id), tenant_id=tenant_id) + if status: + query = DocumentService.apply_display_status_filter(query, status) + if search: search = f"%{search}%" query = query.where(Document.name.like(search)) @@ -592,7 +606,7 @@ class DocumentApi(DatasetApiResource): "name": document.name, "created_from": document.created_from, "created_by": document.created_by, - "created_at": document.created_at.timestamp(), + "created_at": int(document.created_at.timestamp()), "tokens": document.tokens, "indexing_status": document.indexing_status, "completed_at": int(document.completed_at.timestamp()) if document.completed_at else None, @@ -625,7 +639,7 @@ class DocumentApi(DatasetApiResource): "name": document.name, "created_from": document.created_from, "created_by": document.created_by, - "created_at": document.created_at.timestamp(), + "created_at": int(document.created_at.timestamp()), "tokens": document.tokens, "indexing_status": document.indexing_status, "completed_at": int(document.completed_at.timestamp()) if document.completed_at else None, diff --git a/api/controllers/service_api/dataset/segment.py b/api/controllers/service_api/dataset/segment.py index 81abd19fed..9ca500b044 100644 --- a/api/controllers/service_api/dataset/segment.py +++ b/api/controllers/service_api/dataset/segment.py @@ -2,6 +2,7 @@ from flask import request from flask_restx import marshal, reqparse from werkzeug.exceptions import NotFound +from configs import dify_config from controllers.service_api import service_api_ns from controllers.service_api.app.error import ProviderNotInitializeError from controllers.service_api.wraps import ( @@ -107,6 +108,10 @@ class SegmentApi(DatasetApiResource): # validate args args = segment_create_parser.parse_args() if args["segments"] is not None: + segments_limit = dify_config.DATASET_MAX_SEGMENTS_PER_REQUEST + if segments_limit > 0 and len(args["segments"]) > segments_limit: + raise ValueError(f"Exceeded maximum segments limit of {segments_limit}.") + for args_item in args["segments"]: SegmentService.segment_create_args_validate(args_item, document) segments = SegmentService.multi_create_segment(args["segments"], document, dataset) diff --git a/api/controllers/service_api/wraps.py b/api/controllers/service_api/wraps.py index 340e605b85..c07e18c686 100644 --- a/api/controllers/service_api/wraps.py +++ b/api/controllers/service_api/wraps.py @@ -13,6 +13,7 @@ from sqlalchemy import select, update from sqlalchemy.orm import Session from werkzeug.exceptions import Forbidden, NotFound, Unauthorized +from enums.cloud_plan import CloudPlan from extensions.ext_database import db from extensions.ext_redis import redis_client from libs.datetime_utils import naive_utc_now @@ -67,6 +68,7 @@ def validate_app_token(view: Callable[P, R] | None = None, *, fetch_user_arg: Fe kwargs["app_model"] = app_model + # If caller needs end-user context, attach EndUser to current_user if fetch_user_arg: if fetch_user_arg.fetch_from == WhereisUserArg.QUERY: user_id = request.args.get("user") @@ -75,7 +77,6 @@ def validate_app_token(view: Callable[P, R] | None = None, *, fetch_user_arg: Fe elif fetch_user_arg.fetch_from == WhereisUserArg.FORM: user_id = request.form.get("user") else: - # use default-user user_id = None if not user_id and fetch_user_arg.required: @@ -90,6 +91,28 @@ def validate_app_token(view: Callable[P, R] | None = None, *, fetch_user_arg: Fe # Set EndUser as current logged-in user for flask_login.current_user current_app.login_manager._update_request_context_with_user(end_user) # type: ignore user_logged_in.send(current_app._get_current_object(), user=end_user) # type: ignore + else: + # For service API without end-user context, ensure an Account is logged in + # so services relying on current_account_with_tenant() work correctly. + tenant_owner_info = ( + db.session.query(Tenant, Account) + .join(TenantAccountJoin, Tenant.id == TenantAccountJoin.tenant_id) + .join(Account, TenantAccountJoin.account_id == Account.id) + .where( + Tenant.id == app_model.tenant_id, + TenantAccountJoin.role == "owner", + Tenant.status == TenantStatus.NORMAL, + ) + .one_or_none() + ) + + if tenant_owner_info: + tenant_model, account = tenant_owner_info + account.current_tenant = tenant_model + current_app.login_manager._update_request_context_with_user(account) # type: ignore + user_logged_in.send(current_app._get_current_object(), user=current_user) # type: ignore + else: + raise Unauthorized("Tenant owner account not found or tenant is not active.") return view_func(*args, **kwargs) @@ -139,7 +162,7 @@ def cloud_edition_billing_knowledge_limit_check(resource: str, api_token_type: s features = FeatureService.get_features(api_token.tenant_id) if features.billing.enabled: if resource == "add_segment": - if features.billing.subscription.plan == "sandbox": + if features.billing.subscription.plan == CloudPlan.SANDBOX: raise Forbidden( "To unlock this feature and elevate your Dify experience, please upgrade to a paid plan." ) diff --git a/api/controllers/trigger/trigger.py b/api/controllers/trigger/trigger.py index 4640f0b198..e69b22d880 100644 --- a/api/controllers/trigger/trigger.py +++ b/api/controllers/trigger/trigger.py @@ -37,8 +37,7 @@ def trigger_endpoint(endpoint_id: str): return jsonify({"error": "Endpoint not found"}), 404 return response except ValueError as e: - logger.exception("Endpoint processing failed for {endpoint_id}: {e}") - return jsonify({"error": "Endpoint processing failed", "message": str(e)}), 500 - except Exception as e: + return jsonify({"error": "Endpoint processing failed", "message": str(e)}), 400 + except Exception: logger.exception("Webhook processing failed for {endpoint_id}") - return jsonify({"error": "Internal server error", "message": str(e)}), 500 + return jsonify({"error": "Internal server error"}), 500 diff --git a/api/controllers/trigger/webhook.py b/api/controllers/trigger/webhook.py index 0eb42e8f0d..cec5c3d8ae 100644 --- a/api/controllers/trigger/webhook.py +++ b/api/controllers/trigger/webhook.py @@ -102,4 +102,4 @@ def handle_webhook_debug(webhook_id: str): raise except Exception as e: logger.exception("Webhook debug processing failed for %s", webhook_id) - return jsonify({"error": "Internal server error", "message": str(e)}), 500 + return jsonify({"error": "Internal server error", "message": "An internal error has occurred."}), 500 diff --git a/api/controllers/web/audio.py b/api/controllers/web/audio.py index 3103851088..b9fef48c4d 100644 --- a/api/controllers/web/audio.py +++ b/api/controllers/web/audio.py @@ -88,12 +88,6 @@ class AudioApi(WebApiResource): @web_ns.route("/text-to-audio") class TextApi(WebApiResource): - text_to_audio_response_fields = { - "audio_url": fields.String, - "duration": fields.Float, - } - - @marshal_with(text_to_audio_response_fields) @web_ns.doc("Text to Audio") @web_ns.doc(description="Convert text to audio using text-to-speech service.") @web_ns.doc( diff --git a/api/controllers/web/login.py b/api/controllers/web/login.py index 244ef47982..538d0c44be 100644 --- a/api/controllers/web/login.py +++ b/api/controllers/web/login.py @@ -81,6 +81,7 @@ class LoginStatusApi(Resource): ) def get(self): app_code = request.args.get("app_code") + user_id = request.args.get("user_id") token = extract_webapp_access_token(request) if not app_code: return { @@ -103,7 +104,7 @@ class LoginStatusApi(Resource): user_logged_in = False try: - _ = decode_jwt_token(app_code=app_code) + _ = decode_jwt_token(app_code=app_code, user_id=user_id) app_logged_in = True except Exception: app_logged_in = False diff --git a/api/controllers/web/wraps.py b/api/controllers/web/wraps.py index 9efd9f25d1..152137f39c 100644 --- a/api/controllers/web/wraps.py +++ b/api/controllers/web/wraps.py @@ -38,7 +38,7 @@ def validate_jwt_token(view: Callable[Concatenate[App, EndUser, P], R] | None = return decorator -def decode_jwt_token(app_code: str | None = None): +def decode_jwt_token(app_code: str | None = None, user_id: str | None = None): system_features = FeatureService.get_system_features() if not app_code: app_code = str(request.headers.get(HEADER_NAME_APP_CODE)) @@ -63,6 +63,10 @@ def decode_jwt_token(app_code: str | None = None): if not end_user: raise NotFound() + # Validate user_id against end_user's session_id if provided + if user_id is not None and end_user.session_id != user_id: + raise Unauthorized("Authentication has expired.") + # for enterprise webapp auth app_web_auth_enabled = False webapp_settings = None diff --git a/api/core/app/apps/advanced_chat/app_runner.py b/api/core/app/apps/advanced_chat/app_runner.py index 587c663482..c029e00553 100644 --- a/api/core/app/apps/advanced_chat/app_runner.py +++ b/api/core/app/apps/advanced_chat/app_runner.py @@ -1,6 +1,6 @@ import logging import time -from collections.abc import Mapping +from collections.abc import Mapping, Sequence from typing import Any, cast from sqlalchemy import select @@ -25,6 +25,7 @@ from core.moderation.input_moderation import InputModeration from core.variables.variables import VariableUnion from core.workflow.enums import WorkflowType from core.workflow.graph_engine.command_channels.redis_channel import RedisChannel +from core.workflow.graph_engine.layers.base import GraphEngineLayer from core.workflow.graph_engine.layers.persistence import PersistenceWorkflowInfo, WorkflowPersistenceLayer from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository @@ -61,11 +62,13 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner): app: App, workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, + graph_engine_layers: Sequence[GraphEngineLayer] = (), ): super().__init__( queue_manager=queue_manager, variable_loader=variable_loader, app_id=application_generate_entity.app_config.app_id, + graph_engine_layers=graph_engine_layers, ) self.application_generate_entity = application_generate_entity self.conversation = conversation @@ -195,6 +198,8 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner): ) workflow_entry.graph_engine.layer(persistence_layer) + for layer in self._graph_engine_layers: + workflow_entry.graph_engine.layer(layer) generator = workflow_entry.run() diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py index 8c0102d9bd..01c377956b 100644 --- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py +++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py @@ -1,3 +1,4 @@ +import json import logging import re import time @@ -60,6 +61,7 @@ from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTas from core.app.task_pipeline.message_cycle_manager import MessageCycleManager from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk from core.model_runtime.entities.llm_entities import LLMUsage +from core.model_runtime.utils.encoders import jsonable_encoder from core.ops.ops_trace_manager import TraceQueueManager from core.workflow.enums import WorkflowExecutionStatus from core.workflow.nodes import NodeType @@ -391,6 +393,14 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport): if should_direct_answer: return + current_time = time.perf_counter() + if self._task_state.first_token_time is None and delta_text.strip(): + self._task_state.first_token_time = current_time + self._task_state.is_streaming_response = True + + if delta_text.strip(): + self._task_state.last_token_time = current_time + # Only publish tts message at text chunk streaming if tts_publisher and queue_message: tts_publisher.publish(queue_message) @@ -772,7 +782,33 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport): message.answer = answer_text message.updated_at = naive_utc_now() message.provider_response_latency = time.perf_counter() - self._base_task_pipeline.start_at - message.message_metadata = self._task_state.metadata.model_dump_json() + + # Set usage first before dumping metadata + if graph_runtime_state and graph_runtime_state.llm_usage: + usage = graph_runtime_state.llm_usage + message.message_tokens = usage.prompt_tokens + message.message_unit_price = usage.prompt_unit_price + message.message_price_unit = usage.prompt_price_unit + message.answer_tokens = usage.completion_tokens + message.answer_unit_price = usage.completion_unit_price + message.answer_price_unit = usage.completion_price_unit + message.total_price = usage.total_price + message.currency = usage.currency + self._task_state.metadata.usage = usage + else: + usage = LLMUsage.empty_usage() + self._task_state.metadata.usage = usage + + # Add streaming metrics to usage if available + if self._task_state.is_streaming_response and self._task_state.first_token_time: + start_time = self._base_task_pipeline.start_at + first_token_time = self._task_state.first_token_time + last_token_time = self._task_state.last_token_time or first_token_time + usage.time_to_first_token = round(first_token_time - start_time, 3) + usage.time_to_generate = round(last_token_time - first_token_time, 3) + + metadata = self._task_state.metadata.model_dump() + message.message_metadata = json.dumps(jsonable_encoder(metadata)) message_files = [ MessageFile( message_id=message.id, @@ -790,20 +826,6 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport): ] session.add_all(message_files) - if graph_runtime_state and graph_runtime_state.llm_usage: - usage = graph_runtime_state.llm_usage - message.message_tokens = usage.prompt_tokens - message.message_unit_price = usage.prompt_unit_price - message.message_price_unit = usage.prompt_price_unit - message.answer_tokens = usage.completion_tokens - message.answer_unit_price = usage.completion_unit_price - message.answer_price_unit = usage.completion_price_unit - message.total_price = usage.total_price - message.currency = usage.currency - self._task_state.metadata.usage = usage - else: - self._task_state.metadata.usage = LLMUsage.empty_usage() - def _seed_graph_runtime_state_from_queue_manager(self) -> None: """Bootstrap the cached runtime state from the queue manager when present.""" candidate = self._base_task_pipeline.queue_manager.graph_runtime_state diff --git a/api/core/app/apps/agent_chat/app_runner.py b/api/core/app/apps/agent_chat/app_runner.py index 759398b556..2760466a3b 100644 --- a/api/core/app/apps/agent_chat/app_runner.py +++ b/api/core/app/apps/agent_chat/app_runner.py @@ -144,7 +144,7 @@ class AgentChatAppRunner(AppRunner): prompt_template_entity=app_config.prompt_template, inputs=dict(inputs), files=list(files), - query=query or "", + query=query, memory=memory, ) @@ -172,7 +172,7 @@ class AgentChatAppRunner(AppRunner): prompt_template_entity=app_config.prompt_template, inputs=dict(inputs), files=list(files), - query=query or "", + query=query, memory=memory, ) diff --git a/api/core/app/apps/base_app_runner.py b/api/core/app/apps/base_app_runner.py index 61ac040c05..9a9832dd4a 100644 --- a/api/core/app/apps/base_app_runner.py +++ b/api/core/app/apps/base_app_runner.py @@ -79,7 +79,7 @@ class AppRunner: prompt_template_entity: PromptTemplateEntity, inputs: Mapping[str, str], files: Sequence["File"], - query: str | None = None, + query: str = "", context: str | None = None, memory: TokenBufferMemory | None = None, image_detail_config: ImagePromptMessageContent.DETAIL | None = None, @@ -105,7 +105,7 @@ class AppRunner: app_mode=AppMode.value_of(app_record.mode), prompt_template_entity=prompt_template_entity, inputs=inputs, - query=query or "", + query=query, files=files, context=context, memory=memory, diff --git a/api/core/app/apps/common/workflow_response_converter.py b/api/core/app/apps/common/workflow_response_converter.py index fc4a1da3f7..14795a430c 100644 --- a/api/core/app/apps/common/workflow_response_converter.py +++ b/api/core/app/apps/common/workflow_response_converter.py @@ -4,7 +4,7 @@ from dataclasses import dataclass from datetime import datetime from typing import Any, NewType, Union -from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity +from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom, WorkflowAppGenerateEntity from core.app.entities.queue_entities import ( QueueAgentLogEvent, QueueIterationCompletedEvent, @@ -52,7 +52,7 @@ from core.workflow.workflow_entry import WorkflowEntry from core.workflow.workflow_type_encoder import WorkflowRuntimeTypeConverter from libs.datetime_utils import naive_utc_now from models import Account, EndUser -from services.variable_truncator import VariableTruncator +from services.variable_truncator import BaseTruncator, DummyVariableTruncator, VariableTruncator NodeExecutionId = NewType("NodeExecutionId", str) @@ -71,6 +71,8 @@ class _NodeSnapshot: class WorkflowResponseConverter: + _truncator: BaseTruncator + def __init__( self, *, @@ -82,7 +84,13 @@ class WorkflowResponseConverter: self._user = user self._system_variables = system_variables self._workflow_inputs = self._prepare_workflow_inputs() - self._truncator = VariableTruncator.default() + + # Disable truncation for SERVICE_API calls to keep backward compatibility. + if application_generate_entity.invoke_from == InvokeFrom.SERVICE_API: + self._truncator = DummyVariableTruncator() + else: + self._truncator = VariableTruncator.default() + self._node_snapshots: dict[NodeExecutionId, _NodeSnapshot] = {} self._workflow_execution_id: str | None = None self._workflow_started_at: datetime | None = None diff --git a/api/core/app/apps/message_based_app_generator.py b/api/core/app/apps/message_based_app_generator.py index 7a51b8f3a5..53e67fd578 100644 --- a/api/core/app/apps/message_based_app_generator.py +++ b/api/core/app/apps/message_based_app_generator.py @@ -190,7 +190,7 @@ class MessageBasedAppGenerator(BaseAppGenerator): override_model_configs=json.dumps(override_model_configs) if override_model_configs else None, conversation_id=conversation.id, inputs=application_generate_entity.inputs, - query=application_generate_entity.query or "", + query=application_generate_entity.query, message="", message_tokens=0, message_unit_price=0, diff --git a/api/core/app/apps/pipeline/pipeline_generator.py b/api/core/app/apps/pipeline/pipeline_generator.py index f8bfbce37a..13eb40fd60 100644 --- a/api/core/app/apps/pipeline/pipeline_generator.py +++ b/api/core/app/apps/pipeline/pipeline_generator.py @@ -41,18 +41,14 @@ from core.workflow.repositories.workflow_execution_repository import WorkflowExe from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository from core.workflow.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader from extensions.ext_database import db -from extensions.ext_redis import redis_client from libs.flask_utils import preserve_flask_contexts from models import Account, EndUser, Workflow, WorkflowNodeExecutionTriggeredFrom from models.dataset import Document, DocumentPipelineExecutionLog, Pipeline from models.enums import WorkflowRunTriggeredFrom from models.model import AppMode from services.datasource_provider_service import DatasourceProviderService -from services.feature_service import FeatureService -from services.file_service import FileService +from services.rag_pipeline.rag_pipeline_task_proxy import RagPipelineTaskProxy from services.workflow_draft_variable_service import DraftVarLoader, WorkflowDraftVariableService -from tasks.rag_pipeline.priority_rag_pipeline_run_task import priority_rag_pipeline_run_task -from tasks.rag_pipeline.rag_pipeline_run_task import rag_pipeline_run_task logger = logging.getLogger(__name__) @@ -167,7 +163,7 @@ class PipelineGenerator(BaseAppGenerator): datasource_type=datasource_type, datasource_info=json.dumps(datasource_info), datasource_node_id=start_node_id, - input_data=inputs, + input_data=dict(inputs), pipeline_id=pipeline.id, created_by=user.id, ) @@ -248,34 +244,7 @@ class PipelineGenerator(BaseAppGenerator): ) if rag_pipeline_invoke_entities: - # store the rag_pipeline_invoke_entities to object storage - text = [item.model_dump() for item in rag_pipeline_invoke_entities] - name = "rag_pipeline_invoke_entities.json" - # Convert list to proper JSON string - json_text = json.dumps(text) - upload_file = FileService(db.engine).upload_text(json_text, name, user.id, dataset.tenant_id) - features = FeatureService.get_features(dataset.tenant_id) - if features.billing.enabled and features.billing.subscription.plan == "sandbox": - tenant_pipeline_task_key = f"tenant_pipeline_task:{dataset.tenant_id}" - tenant_self_pipeline_task_queue = f"tenant_self_pipeline_task_queue:{dataset.tenant_id}" - - if redis_client.get(tenant_pipeline_task_key): - # Add to waiting queue using List operations (lpush) - redis_client.lpush(tenant_self_pipeline_task_queue, upload_file.id) - else: - # Set flag and execute task - redis_client.set(tenant_pipeline_task_key, 1, ex=60 * 60) - rag_pipeline_run_task.delay( # type: ignore - rag_pipeline_invoke_entities_file_id=upload_file.id, - tenant_id=dataset.tenant_id, - ) - - else: - priority_rag_pipeline_run_task.delay( # type: ignore - rag_pipeline_invoke_entities_file_id=upload_file.id, - tenant_id=dataset.tenant_id, - ) - + RagPipelineTaskProxy(dataset.tenant_id, user.id, rag_pipeline_invoke_entities).delay() # return batch, dataset, documents return { "batch": batch, diff --git a/api/core/app/apps/workflow/app_generator.py b/api/core/app/apps/workflow/app_generator.py index 9f0f788a59..0165c74295 100644 --- a/api/core/app/apps/workflow/app_generator.py +++ b/api/core/app/apps/workflow/app_generator.py @@ -3,7 +3,7 @@ import logging import threading import uuid from collections.abc import Generator, Mapping, Sequence -from typing import Any, Literal, Optional, Union, overload +from typing import Any, Literal, Union, overload from flask import Flask, current_app from pydantic import ValidationError @@ -39,10 +39,16 @@ from models import Account, App, EndUser, Workflow, WorkflowNodeExecutionTrigger from models.enums import WorkflowRunTriggeredFrom from services.workflow_draft_variable_service import DraftVarLoader, WorkflowDraftVariableService +SKIP_PREPARE_USER_INPUTS_KEY = "_skip_prepare_user_inputs" + logger = logging.getLogger(__name__) class WorkflowAppGenerator(BaseAppGenerator): + @staticmethod + def _should_prepare_user_inputs(args: Mapping[str, Any]) -> bool: + return not bool(args.get(SKIP_PREPARE_USER_INPUTS_KEY)) + @overload def generate( self, @@ -54,9 +60,9 @@ class WorkflowAppGenerator(BaseAppGenerator): invoke_from: InvokeFrom, streaming: Literal[True], call_depth: int, - triggered_from: Optional[WorkflowRunTriggeredFrom] = None, - root_node_id: Optional[str] = None, - layers: Optional[Sequence[GraphEngineLayer]] = None, + triggered_from: WorkflowRunTriggeredFrom | None = None, + root_node_id: str | None = None, + graph_engine_layers: Sequence[GraphEngineLayer] = (), ) -> Generator[Mapping[str, Any] | str, None, None]: ... @overload @@ -70,9 +76,9 @@ class WorkflowAppGenerator(BaseAppGenerator): invoke_from: InvokeFrom, streaming: Literal[False], call_depth: int, - triggered_from: Optional[WorkflowRunTriggeredFrom] = None, - root_node_id: Optional[str] = None, - layers: Optional[Sequence[GraphEngineLayer]] = None, + triggered_from: WorkflowRunTriggeredFrom | None = None, + root_node_id: str | None = None, + graph_engine_layers: Sequence[GraphEngineLayer] = (), ) -> Mapping[str, Any]: ... @overload @@ -86,9 +92,9 @@ class WorkflowAppGenerator(BaseAppGenerator): invoke_from: InvokeFrom, streaming: bool, call_depth: int, - triggered_from: Optional[WorkflowRunTriggeredFrom] = None, - root_node_id: Optional[str] = None, - layers: Optional[Sequence[GraphEngineLayer]] = None, + triggered_from: WorkflowRunTriggeredFrom | None = None, + root_node_id: str | None = None, + graph_engine_layers: Sequence[GraphEngineLayer] = (), ) -> Union[Mapping[str, Any], Generator[Mapping[str, Any] | str, None, None]]: ... def generate( @@ -101,9 +107,9 @@ class WorkflowAppGenerator(BaseAppGenerator): invoke_from: InvokeFrom, streaming: bool = True, call_depth: int = 0, - triggered_from: Optional[WorkflowRunTriggeredFrom] = None, - root_node_id: Optional[str] = None, - layers: Optional[Sequence[GraphEngineLayer]] = None, + triggered_from: WorkflowRunTriggeredFrom | None = None, + root_node_id: str | None = None, + graph_engine_layers: Sequence[GraphEngineLayer] = (), ) -> Union[Mapping[str, Any], Generator[Mapping[str, Any] | str, None, None]]: files: Sequence[Mapping[str, Any]] = args.get("files") or [] @@ -139,8 +145,9 @@ class WorkflowAppGenerator(BaseAppGenerator): **extract_external_trace_id_from_args(args), } workflow_run_id = str(uuid.uuid4()) - if triggered_from in (WorkflowRunTriggeredFrom.DEBUGGING, WorkflowRunTriggeredFrom.APP_RUN): - # start node get inputs + # FIXME (Yeuoly): we need to remove the SKIP_PREPARE_USER_INPUTS_KEY from the args + # trigger shouldn't prepare user inputs + if self._should_prepare_user_inputs(args): inputs = self._prepare_user_inputs( user_inputs=inputs, variables=app_config.variables, @@ -202,7 +209,7 @@ class WorkflowAppGenerator(BaseAppGenerator): workflow_node_execution_repository=workflow_node_execution_repository, streaming=streaming, root_node_id=root_node_id, - layers=layers, + graph_engine_layers=graph_engine_layers, ) def resume(self, *, workflow_run_id: str) -> None: @@ -223,8 +230,8 @@ class WorkflowAppGenerator(BaseAppGenerator): workflow_node_execution_repository: WorkflowNodeExecutionRepository, streaming: bool = True, variable_loader: VariableLoader = DUMMY_VARIABLE_LOADER, - root_node_id: Optional[str] = None, - layers: Optional[Sequence[GraphEngineLayer]] = None, + root_node_id: str | None = None, + graph_engine_layers: Sequence[GraphEngineLayer] = (), ) -> Union[Mapping[str, Any], Generator[str | Mapping[str, Any], None, None]]: """ Generate App response. @@ -263,7 +270,7 @@ class WorkflowAppGenerator(BaseAppGenerator): "root_node_id": root_node_id, "workflow_execution_repository": workflow_execution_repository, "workflow_node_execution_repository": workflow_node_execution_repository, - "layers": layers, + "graph_engine_layers": graph_engine_layers, }, ) @@ -457,8 +464,8 @@ class WorkflowAppGenerator(BaseAppGenerator): variable_loader: VariableLoader, workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, - root_node_id: Optional[str] = None, - layers: Optional[Sequence[GraphEngineLayer]] = None, + root_node_id: str | None = None, + graph_engine_layers: Sequence[GraphEngineLayer] = (), ) -> None: """ Generate worker in a new thread. @@ -503,7 +510,7 @@ class WorkflowAppGenerator(BaseAppGenerator): workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, root_node_id=root_node_id, - layers=layers, + graph_engine_layers=graph_engine_layers, ) try: diff --git a/api/core/app/apps/workflow/app_runner.py b/api/core/app/apps/workflow/app_runner.py index 439ecb2491..d8460df390 100644 --- a/api/core/app/apps/workflow/app_runner.py +++ b/api/core/app/apps/workflow/app_runner.py @@ -1,12 +1,11 @@ import logging import time from collections.abc import Sequence -from typing import Optional, cast +from typing import cast from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.apps.workflow.app_config_manager import WorkflowAppConfig from core.app.apps.workflow_app_runner import WorkflowBasedAppRunner -from core.app.engine_layers.suspend_layer import SuspendLayer from core.app.entities.app_invoke_entities import InvokeFrom, WorkflowAppGenerateEntity from core.workflow.enums import WorkflowType from core.workflow.graph_engine.command_channels.redis_channel import RedisChannel @@ -19,6 +18,7 @@ from core.workflow.system_variable import SystemVariable from core.workflow.variable_loader import VariableLoader from core.workflow.workflow_entry import WorkflowEntry from extensions.ext_redis import redis_client +from libs.datetime_utils import naive_utc_now from models.enums import UserFrom from models.workflow import Workflow @@ -38,15 +38,16 @@ class WorkflowAppRunner(WorkflowBasedAppRunner): variable_loader: VariableLoader, workflow: Workflow, system_user_id: str, - root_node_id: Optional[str] = None, + root_node_id: str | None = None, workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, - layers: Optional[Sequence[GraphEngineLayer]] = None, + graph_engine_layers: Sequence[GraphEngineLayer] = (), ): super().__init__( queue_manager=queue_manager, variable_loader=variable_loader, app_id=application_generate_entity.app_config.app_id, + graph_engine_layers=graph_engine_layers, ) self.application_generate_entity = application_generate_entity self._workflow = workflow @@ -54,7 +55,6 @@ class WorkflowAppRunner(WorkflowBasedAppRunner): self._root_node_id = root_node_id self._workflow_execution_repository = workflow_execution_repository self._workflow_node_execution_repository = workflow_node_execution_repository - self._layers = layers or [] def run(self): """ @@ -67,6 +67,7 @@ class WorkflowAppRunner(WorkflowBasedAppRunner): files=self.application_generate_entity.files, user_id=self._sys_user_id, app_id=app_config.app_id, + timestamp=int(naive_utc_now().timestamp()), workflow_id=app_config.workflow_id, workflow_execution_id=self.application_generate_entity.workflow_execution_id, ) @@ -142,12 +143,8 @@ class WorkflowAppRunner(WorkflowBasedAppRunner): trace_manager=self.application_generate_entity.trace_manager, ) - suspend_layer = SuspendLayer() - workflow_entry.graph_engine.layer(persistence_layer) - workflow_entry.graph_engine.layer(suspend_layer) - - for layer in self._layers: + for layer in self._graph_engine_layers: workflow_entry.graph_engine.layer(layer) generator = workflow_entry.run() diff --git a/api/core/app/apps/workflow/generate_task_pipeline.py b/api/core/app/apps/workflow/generate_task_pipeline.py index 08e2fce48c..4157870620 100644 --- a/api/core/app/apps/workflow/generate_task_pipeline.py +++ b/api/core/app/apps/workflow/generate_task_pipeline.py @@ -644,14 +644,15 @@ class WorkflowAppGenerateTaskPipeline(GraphRuntimeStateSupport): if not workflow_run_id: return - workflow_app_log = WorkflowAppLog() - workflow_app_log.tenant_id = self._application_generate_entity.app_config.tenant_id - workflow_app_log.app_id = self._application_generate_entity.app_config.app_id - workflow_app_log.workflow_id = self._workflow.id - workflow_app_log.workflow_run_id = workflow_run_id - workflow_app_log.created_from = created_from.value - workflow_app_log.created_by_role = self._created_by_role - workflow_app_log.created_by = self._user_id + workflow_app_log = WorkflowAppLog( + tenant_id=self._application_generate_entity.app_config.tenant_id, + app_id=self._application_generate_entity.app_config.app_id, + workflow_id=self._workflow.id, + workflow_run_id=workflow_run_id, + created_from=created_from.value, + created_by_role=self._created_by_role, + created_by=self._user_id, + ) session.add(workflow_app_log) session.commit() diff --git a/api/core/app/apps/workflow_app_runner.py b/api/core/app/apps/workflow_app_runner.py index 2190c03222..0e125b3538 100644 --- a/api/core/app/apps/workflow_app_runner.py +++ b/api/core/app/apps/workflow_app_runner.py @@ -1,6 +1,6 @@ import time -from collections.abc import Mapping -from typing import Any, Optional, cast +from collections.abc import Mapping, Sequence +from typing import Any, cast from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom from core.app.entities.app_invoke_entities import InvokeFrom @@ -27,6 +27,7 @@ from core.app.entities.queue_entities import ( ) from core.workflow.entities import GraphInitParams from core.workflow.graph import Graph +from core.workflow.graph_engine.layers.base import GraphEngineLayer from core.workflow.graph_events import ( GraphEngineEvent, GraphRunFailedEvent, @@ -69,10 +70,12 @@ class WorkflowBasedAppRunner: queue_manager: AppQueueManager, variable_loader: VariableLoader = DUMMY_VARIABLE_LOADER, app_id: str, + graph_engine_layers: Sequence[GraphEngineLayer] = (), ): self._queue_manager = queue_manager self._variable_loader = variable_loader self._app_id = app_id + self._graph_engine_layers = graph_engine_layers def _init_graph( self, @@ -81,7 +84,7 @@ class WorkflowBasedAppRunner: workflow_id: str = "", tenant_id: str = "", user_id: str = "", - root_node_id: Optional[str] = None, + root_node_id: str | None = None, ) -> Graph: """ Init graph diff --git a/api/core/app/entities/app_invoke_entities.py b/api/core/app/entities/app_invoke_entities.py index 1ce86f1f9a..5143dbf1e8 100644 --- a/api/core/app/entities/app_invoke_entities.py +++ b/api/core/app/entities/app_invoke_entities.py @@ -44,6 +44,9 @@ class InvokeFrom(StrEnum): DEBUGGER = "debugger" PUBLISHED = "published" + # VALIDATION indicates that this invocation is from validation. + VALIDATION = "validation" + @classmethod def value_of(cls, value: str): """ @@ -110,6 +113,11 @@ class AppGenerateEntity(BaseModel): inputs: Mapping[str, Any] files: Sequence[File] + + # Unique identifier of the user initiating the execution. + # This corresponds to `Account.id` for platform users or `EndUser.id` for end users. + # + # Note: The `user_id` field does not indicate whether the user is a platform user or an end user. user_id: str # extras @@ -135,7 +143,7 @@ class EasyUIBasedAppGenerateEntity(AppGenerateEntity): app_config: EasyUIBasedAppConfig = None # type: ignore model_conf: ModelConfigWithCredentialsEntity - query: str | None = None + query: str = "" # pydantic configs model_config = ConfigDict(protected_namespaces=()) diff --git a/api/core/app/entities/task_entities.py b/api/core/app/entities/task_entities.py index 72a92add04..79a5e657b3 100644 --- a/api/core/app/entities/task_entities.py +++ b/api/core/app/entities/task_entities.py @@ -48,6 +48,9 @@ class WorkflowTaskState(TaskState): """ answer: str = "" + first_token_time: float | None = None + last_token_time: float | None = None + is_streaming_response: bool = False class StreamEvent(StrEnum): diff --git a/api/core/app/layers/pause_state_persist_layer.py b/api/core/app/layers/pause_state_persist_layer.py new file mode 100644 index 0000000000..412eb98dd4 --- /dev/null +++ b/api/core/app/layers/pause_state_persist_layer.py @@ -0,0 +1,133 @@ +from typing import Annotated, Literal, Self, TypeAlias + +from pydantic import BaseModel, Field +from sqlalchemy import Engine +from sqlalchemy.orm import Session, sessionmaker + +from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity +from core.workflow.graph_engine.layers.base import GraphEngineLayer +from core.workflow.graph_events.base import GraphEngineEvent +from core.workflow.graph_events.graph import GraphRunPausedEvent +from models.model import AppMode +from repositories.api_workflow_run_repository import APIWorkflowRunRepository +from repositories.factory import DifyAPIRepositoryFactory + + +# Wrapper types for `WorkflowAppGenerateEntity` and +# `AdvancedChatAppGenerateEntity`. These wrappers enable type discrimination +# and correct reconstruction of the entity field during (de)serialization. +class _WorkflowGenerateEntityWrapper(BaseModel): + type: Literal[AppMode.WORKFLOW] = AppMode.WORKFLOW + entity: WorkflowAppGenerateEntity + + +class _AdvancedChatAppGenerateEntityWrapper(BaseModel): + type: Literal[AppMode.ADVANCED_CHAT] = AppMode.ADVANCED_CHAT + entity: AdvancedChatAppGenerateEntity + + +_GenerateEntityUnion: TypeAlias = Annotated[ + _WorkflowGenerateEntityWrapper | _AdvancedChatAppGenerateEntityWrapper, + Field(discriminator="type"), +] + + +class WorkflowResumptionContext(BaseModel): + """WorkflowResumptionContext captures all state necessary for resumption.""" + + version: Literal["1"] = "1" + + # Only workflow / chatflow could be paused. + generate_entity: _GenerateEntityUnion + serialized_graph_runtime_state: str + + def dumps(self) -> str: + return self.model_dump_json() + + @classmethod + def loads(cls, value: str) -> Self: + return cls.model_validate_json(value) + + def get_generate_entity(self) -> WorkflowAppGenerateEntity | AdvancedChatAppGenerateEntity: + return self.generate_entity.entity + + +class PauseStatePersistenceLayer(GraphEngineLayer): + def __init__( + self, + session_factory: Engine | sessionmaker[Session], + generate_entity: WorkflowAppGenerateEntity | AdvancedChatAppGenerateEntity, + state_owner_user_id: str, + ): + """Create a PauseStatePersistenceLayer. + + The `state_owner_user_id` is used when creating state file for pause. + It generally should id of the creator of workflow. + """ + if isinstance(session_factory, Engine): + session_factory = sessionmaker(session_factory) + self._session_maker = session_factory + self._state_owner_user_id = state_owner_user_id + self._generate_entity = generate_entity + + def _get_repo(self) -> APIWorkflowRunRepository: + return DifyAPIRepositoryFactory.create_api_workflow_run_repository(self._session_maker) + + def on_graph_start(self) -> None: + """ + Called when graph execution starts. + + This is called after the engine has been initialized but before any nodes + are executed. Layers can use this to set up resources or log start information. + """ + pass + + def on_event(self, event: GraphEngineEvent) -> None: + """ + Called for every event emitted by the engine. + + This method receives all events generated during graph execution, including: + - Graph lifecycle events (start, success, failure) + - Node execution events (start, success, failure, retry) + - Stream events for response nodes + - Container events (iteration, loop) + + Args: + event: The event emitted by the engine + """ + if not isinstance(event, GraphRunPausedEvent): + return + + assert self.graph_runtime_state is not None + + entity_wrapper: _GenerateEntityUnion + if isinstance(self._generate_entity, WorkflowAppGenerateEntity): + entity_wrapper = _WorkflowGenerateEntityWrapper(entity=self._generate_entity) + else: + entity_wrapper = _AdvancedChatAppGenerateEntityWrapper(entity=self._generate_entity) + + state = WorkflowResumptionContext( + serialized_graph_runtime_state=self.graph_runtime_state.dumps(), + generate_entity=entity_wrapper, + ) + + workflow_run_id: str | None = self.graph_runtime_state.system_variable.workflow_execution_id + assert workflow_run_id is not None + repo = self._get_repo() + repo.create_workflow_pause( + workflow_run_id=workflow_run_id, + state_owner_user_id=self._state_owner_user_id, + state=state.dumps(), + ) + + def on_graph_end(self, error: Exception | None) -> None: + """ + Called when graph execution ends. + + This is called after all nodes have been executed or when execution is + aborted. Layers can use this to clean up resources or log final state. + + Args: + error: The exception that caused execution to fail, or None if successful + """ + pass diff --git a/api/core/app/engine_layers/suspend_layer.py b/api/core/app/layers/suspend_layer.py similarity index 100% rename from api/core/app/engine_layers/suspend_layer.py rename to api/core/app/layers/suspend_layer.py diff --git a/api/core/app/engine_layers/timeslice_layer.py b/api/core/app/layers/timeslice_layer.py similarity index 100% rename from api/core/app/engine_layers/timeslice_layer.py rename to api/core/app/layers/timeslice_layer.py diff --git a/api/core/app/engine_layers/trigger_post_layer.py b/api/core/app/layers/trigger_post_layer.py similarity index 87% rename from api/core/app/engine_layers/trigger_post_layer.py rename to api/core/app/layers/trigger_post_layer.py index 1309295b1a..fe1a46a945 100644 --- a/api/core/app/engine_layers/trigger_post_layer.py +++ b/api/core/app/layers/trigger_post_layer.py @@ -3,12 +3,11 @@ from datetime import UTC, datetime from typing import Any, ClassVar from pydantic import TypeAdapter -from sqlalchemy.orm import Session +from sqlalchemy.orm import Session, sessionmaker from core.workflow.graph_engine.layers.base import GraphEngineLayer from core.workflow.graph_events.base import GraphEngineEvent from core.workflow.graph_events.graph import GraphRunFailedEvent, GraphRunPausedEvent, GraphRunSucceededEvent -from models.engine import db from models.enums import WorkflowTriggerStatus from repositories.sqlalchemy_workflow_trigger_log_repository import SQLAlchemyWorkflowTriggerLogRepository from tasks.workflow_cfs_scheduler.cfs_scheduler import AsyncWorkflowCFSPlanEntity @@ -32,10 +31,12 @@ class TriggerPostLayer(GraphEngineLayer): cfs_plan_scheduler_entity: AsyncWorkflowCFSPlanEntity, start_time: datetime, trigger_log_id: str, + session_maker: sessionmaker[Session], ): self.trigger_log_id = trigger_log_id self.start_time = start_time self.cfs_plan_scheduler_entity = cfs_plan_scheduler_entity + self.session_maker = session_maker def on_graph_start(self): pass @@ -45,7 +46,7 @@ class TriggerPostLayer(GraphEngineLayer): Update trigger log with success or failure. """ if isinstance(event, tuple(self._STATUS_MAP.keys())): - with Session(db.engine) as session: + with self.session_maker() as session: repo = SQLAlchemyWorkflowTriggerLogRepository(session) trigger_log = repo.get_by_id(self.trigger_log_id) if not trigger_log: @@ -62,7 +63,10 @@ class TriggerPostLayer(GraphEngineLayer): outputs = self.graph_runtime_state.outputs - workflow_run_id = outputs.get("workflow_run_id") + # BASICLY, workflow_execution_id is the same as workflow_run_id + workflow_run_id = self.graph_runtime_state.system_variable.workflow_execution_id + assert workflow_run_id, "Workflow run id is not set" + total_tokens = self.graph_runtime_state.total_tokens # Update trigger log with success diff --git a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py index 67abb569e3..da2ebac3bd 100644 --- a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py +++ b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py @@ -121,7 +121,7 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline): if self._application_generate_entity.app_config.app_mode != AppMode.COMPLETION: # start generate conversation name thread self._conversation_name_generate_thread = self._message_cycle_manager.generate_conversation_name( - conversation_id=self._conversation_id, query=self._application_generate_entity.query or "" + conversation_id=self._conversation_id, query=self._application_generate_entity.query ) generator = self._wrapper_process_stream_response(trace_manager=self._application_generate_entity.trace_manager) diff --git a/api/core/app/task_pipeline/message_cycle_manager.py b/api/core/app/task_pipeline/message_cycle_manager.py index 7a384e5c92..e7daeb4a32 100644 --- a/api/core/app/task_pipeline/message_cycle_manager.py +++ b/api/core/app/task_pipeline/message_cycle_manager.py @@ -140,7 +140,27 @@ class MessageCycleManager: if not self._application_generate_entity.app_config.additional_features: raise ValueError("Additional features not found") if self._application_generate_entity.app_config.additional_features.show_retrieve_source: - self._task_state.metadata.retriever_resources = event.retriever_resources + merged_resources = [r for r in self._task_state.metadata.retriever_resources or [] if r] + existing_ids = {(r.dataset_id, r.document_id) for r in merged_resources if r.dataset_id and r.document_id} + + # Add new unique resources from the event + for resource in event.retriever_resources or []: + if not resource: + continue + + is_duplicate = ( + resource.dataset_id + and resource.document_id + and (resource.dataset_id, resource.document_id) in existing_ids + ) + + if not is_duplicate: + merged_resources.append(resource) + + for i, resource in enumerate(merged_resources, 1): + resource.position = i + + self._task_state.metadata.retriever_resources = merged_resources def message_file_to_stream_response(self, event: QueueMessageFileEvent) -> MessageFileStreamResponse | None: """ diff --git a/api/core/datasource/__base/datasource_runtime.py b/api/core/datasource/__base/datasource_runtime.py index c5d6c1d771..e021ed74a7 100644 --- a/api/core/datasource/__base/datasource_runtime.py +++ b/api/core/datasource/__base/datasource_runtime.py @@ -1,14 +1,10 @@ -from typing import TYPE_CHECKING, Any, Optional +from typing import Any from pydantic import BaseModel, Field -# Import InvokeFrom locally to avoid circular import from core.app.entities.app_invoke_entities import InvokeFrom from core.datasource.entities.datasource_entities import DatasourceInvokeFrom -if TYPE_CHECKING: - from core.app.entities.app_invoke_entities import InvokeFrom - class DatasourceRuntime(BaseModel): """ @@ -17,7 +13,7 @@ class DatasourceRuntime(BaseModel): tenant_id: str datasource_id: str | None = None - invoke_from: Optional["InvokeFrom"] = None + invoke_from: InvokeFrom | None = None datasource_invoke_from: DatasourceInvokeFrom | None = None credentials: dict[str, Any] = Field(default_factory=dict) runtime_parameters: dict[str, Any] = Field(default_factory=dict) diff --git a/api/core/entities/document_task.py b/api/core/entities/document_task.py new file mode 100644 index 0000000000..27ab5c84f7 --- /dev/null +++ b/api/core/entities/document_task.py @@ -0,0 +1,15 @@ +from collections.abc import Sequence +from dataclasses import dataclass + + +@dataclass +class DocumentTask: + """Document task entity for document indexing operations. + + This class represents a document indexing task that can be queued + and processed by the document indexing system. + """ + + tenant_id: str + dataset_id: str + document_ids: Sequence[str] diff --git a/api/core/entities/mcp_provider.py b/api/core/entities/mcp_provider.py new file mode 100644 index 0000000000..7484cea04a --- /dev/null +++ b/api/core/entities/mcp_provider.py @@ -0,0 +1,329 @@ +import json +from datetime import datetime +from enum import StrEnum +from typing import TYPE_CHECKING, Any +from urllib.parse import urlparse + +from pydantic import BaseModel + +from configs import dify_config +from core.entities.provider_entities import BasicProviderConfig +from core.file import helpers as file_helpers +from core.helper import encrypter +from core.helper.provider_cache import NoOpProviderCredentialCache +from core.mcp.types import OAuthClientInformation, OAuthClientMetadata, OAuthTokens +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ToolProviderType + +if TYPE_CHECKING: + from models.tools import MCPToolProvider + +# Constants +CLIENT_NAME = "Dify" +CLIENT_URI = "https://github.com/langgenius/dify" +DEFAULT_TOKEN_TYPE = "Bearer" +DEFAULT_EXPIRES_IN = 3600 +MASK_CHAR = "*" +MIN_UNMASK_LENGTH = 6 + + +class MCPSupportGrantType(StrEnum): + """The supported grant types for MCP""" + + AUTHORIZATION_CODE = "authorization_code" + CLIENT_CREDENTIALS = "client_credentials" + REFRESH_TOKEN = "refresh_token" + + +class MCPAuthentication(BaseModel): + client_id: str + client_secret: str | None = None + + +class MCPConfiguration(BaseModel): + timeout: float = 30 + sse_read_timeout: float = 300 + + +class MCPProviderEntity(BaseModel): + """MCP Provider domain entity for business logic operations""" + + # Basic identification + id: str + provider_id: str # server_identifier + name: str + tenant_id: str + user_id: str + + # Server connection info + server_url: str # encrypted URL + headers: dict[str, str] # encrypted headers + timeout: float + sse_read_timeout: float + + # Authentication related + authed: bool + credentials: dict[str, Any] # encrypted credentials + code_verifier: str | None = None # for OAuth + + # Tools and display info + tools: list[dict[str, Any]] # parsed tools list + icon: str | dict[str, str] # parsed icon + + # Timestamps + created_at: datetime + updated_at: datetime + + @classmethod + def from_db_model(cls, db_provider: "MCPToolProvider") -> "MCPProviderEntity": + """Create entity from database model with decryption""" + + return cls( + id=db_provider.id, + provider_id=db_provider.server_identifier, + name=db_provider.name, + tenant_id=db_provider.tenant_id, + user_id=db_provider.user_id, + server_url=db_provider.server_url, + headers=db_provider.headers, + timeout=db_provider.timeout, + sse_read_timeout=db_provider.sse_read_timeout, + authed=db_provider.authed, + credentials=db_provider.credentials, + tools=db_provider.tool_dict, + icon=db_provider.icon or "", + created_at=db_provider.created_at, + updated_at=db_provider.updated_at, + ) + + @property + def redirect_url(self) -> str: + """OAuth redirect URL""" + return dify_config.CONSOLE_API_URL + "/console/api/mcp/oauth/callback" + + @property + def client_metadata(self) -> OAuthClientMetadata: + """Metadata about this OAuth client.""" + # Get grant type from credentials + credentials = self.decrypt_credentials() + + # Try to get grant_type from different locations + grant_type = credentials.get("grant_type", MCPSupportGrantType.AUTHORIZATION_CODE) + + # For nested structure, check if client_information has grant_types + if "client_information" in credentials and isinstance(credentials["client_information"], dict): + client_info = credentials["client_information"] + # If grant_types is specified in client_information, use it to determine grant_type + if "grant_types" in client_info and isinstance(client_info["grant_types"], list): + if "client_credentials" in client_info["grant_types"]: + grant_type = MCPSupportGrantType.CLIENT_CREDENTIALS + elif "authorization_code" in client_info["grant_types"]: + grant_type = MCPSupportGrantType.AUTHORIZATION_CODE + + # Configure based on grant type + is_client_credentials = grant_type == MCPSupportGrantType.CLIENT_CREDENTIALS + + grant_types = ["refresh_token"] + grant_types.append("client_credentials" if is_client_credentials else "authorization_code") + + response_types = [] if is_client_credentials else ["code"] + redirect_uris = [] if is_client_credentials else [self.redirect_url] + + return OAuthClientMetadata( + redirect_uris=redirect_uris, + token_endpoint_auth_method="none", + grant_types=grant_types, + response_types=response_types, + client_name=CLIENT_NAME, + client_uri=CLIENT_URI, + ) + + @property + def provider_icon(self) -> dict[str, str] | str: + """Get provider icon, handling both dict and string formats""" + if isinstance(self.icon, dict): + return self.icon + try: + return json.loads(self.icon) + except (json.JSONDecodeError, TypeError): + # If not JSON, assume it's a file path + return file_helpers.get_signed_file_url(self.icon) + + def to_api_response(self, user_name: str | None = None, include_sensitive: bool = True) -> dict[str, Any]: + """Convert to API response format + + Args: + user_name: User name to display + include_sensitive: If False, skip expensive decryption operations (for list view optimization) + """ + response = { + "id": self.id, + "author": user_name or "Anonymous", + "name": self.name, + "icon": self.provider_icon, + "type": ToolProviderType.MCP.value, + "is_team_authorization": self.authed, + "server_url": self.masked_server_url(), + "server_identifier": self.provider_id, + "updated_at": int(self.updated_at.timestamp()), + "label": I18nObject(en_US=self.name, zh_Hans=self.name).to_dict(), + "description": I18nObject(en_US="", zh_Hans="").to_dict(), + } + + # Add configuration + response["configuration"] = { + "timeout": str(self.timeout), + "sse_read_timeout": str(self.sse_read_timeout), + } + + # Skip expensive operations when sensitive data is not needed (e.g., list view) + if not include_sensitive: + response["masked_headers"] = {} + response["is_dynamic_registration"] = True + else: + # Add masked headers + response["masked_headers"] = self.masked_headers() + + # Add authentication info if available + masked_creds = self.masked_credentials() + if masked_creds: + response["authentication"] = masked_creds + response["is_dynamic_registration"] = self.credentials.get("client_information", {}).get( + "is_dynamic_registration", True + ) + + return response + + def retrieve_client_information(self) -> OAuthClientInformation | None: + """OAuth client information if available""" + credentials = self.decrypt_credentials() + if not credentials: + return None + + # Check if we have nested client_information structure + if "client_information" not in credentials: + return None + client_info_data = credentials["client_information"] + if isinstance(client_info_data, dict): + if "encrypted_client_secret" in client_info_data: + client_info_data["client_secret"] = encrypter.decrypt_token( + self.tenant_id, client_info_data["encrypted_client_secret"] + ) + return OAuthClientInformation.model_validate(client_info_data) + return None + + def retrieve_tokens(self) -> OAuthTokens | None: + """OAuth tokens if available""" + if not self.credentials: + return None + credentials = self.decrypt_credentials() + return OAuthTokens( + access_token=credentials.get("access_token", ""), + token_type=credentials.get("token_type", DEFAULT_TOKEN_TYPE), + expires_in=int(credentials.get("expires_in", str(DEFAULT_EXPIRES_IN)) or DEFAULT_EXPIRES_IN), + refresh_token=credentials.get("refresh_token", ""), + ) + + def masked_server_url(self) -> str: + """Masked server URL for display""" + parsed = urlparse(self.decrypt_server_url()) + if parsed.path and parsed.path != "/": + masked = parsed._replace(path="/******") + return masked.geturl() + return parsed.geturl() + + def _mask_value(self, value: str) -> str: + """Mask a sensitive value for display""" + if len(value) > MIN_UNMASK_LENGTH: + return value[:2] + MASK_CHAR * (len(value) - 4) + value[-2:] + else: + return MASK_CHAR * len(value) + + def masked_headers(self) -> dict[str, str]: + """Masked headers for display""" + return {key: self._mask_value(value) for key, value in self.decrypt_headers().items()} + + def masked_credentials(self) -> dict[str, str]: + """Masked credentials for display""" + credentials = self.decrypt_credentials() + if not credentials: + return {} + + masked = {} + + if "client_information" not in credentials or not isinstance(credentials["client_information"], dict): + return {} + client_info = credentials["client_information"] + # Mask sensitive fields from nested structure + if client_info.get("client_id"): + masked["client_id"] = self._mask_value(client_info["client_id"]) + if client_info.get("encrypted_client_secret"): + masked["client_secret"] = self._mask_value( + encrypter.decrypt_token(self.tenant_id, client_info["encrypted_client_secret"]) + ) + if client_info.get("client_secret"): + masked["client_secret"] = self._mask_value(client_info["client_secret"]) + return masked + + def decrypt_server_url(self) -> str: + """Decrypt server URL""" + return encrypter.decrypt_token(self.tenant_id, self.server_url) + + def _decrypt_dict(self, data: dict[str, Any]) -> dict[str, Any]: + """Generic method to decrypt dictionary fields""" + from core.tools.utils.encryption import create_provider_encrypter + + if not data: + return {} + + # Only decrypt fields that are actually encrypted + # For nested structures, client_information is not encrypted as a whole + encrypted_fields = [] + for key, value in data.items(): + # Skip nested objects - they are not encrypted + if isinstance(value, dict): + continue + # Only process string values that might be encrypted + if isinstance(value, str) and value: + encrypted_fields.append(key) + + if not encrypted_fields: + return data + + # Create dynamic config only for encrypted fields + config = [BasicProviderConfig(type=BasicProviderConfig.Type.SECRET_INPUT, name=key) for key in encrypted_fields] + + encrypter_instance, _ = create_provider_encrypter( + tenant_id=self.tenant_id, + config=config, + cache=NoOpProviderCredentialCache(), + ) + + # Decrypt only the encrypted fields + decrypted_data = encrypter_instance.decrypt({k: data[k] for k in encrypted_fields}) + + # Merge decrypted data with original data (preserving non-encrypted fields) + result = data.copy() + result.update(decrypted_data) + + return result + + def decrypt_headers(self) -> dict[str, Any]: + """Decrypt headers""" + return self._decrypt_dict(self.headers) + + def decrypt_credentials(self) -> dict[str, Any]: + """Decrypt credentials""" + return self._decrypt_dict(self.credentials) + + def decrypt_authentication(self) -> dict[str, Any]: + """Decrypt authentication""" + # Option 1: if headers is provided, use it and don't need to get token + headers = self.decrypt_headers() + + # Option 2: Add OAuth token if authed and no headers provided + if not self.headers and self.authed: + token = self.retrieve_tokens() + if token: + headers["Authorization"] = f"{token.token_type.capitalize()} {token.access_token}" + return headers diff --git a/api/core/entities/provider_configuration.py b/api/core/entities/provider_configuration.py index b10838f8c9..56c133e598 100644 --- a/api/core/entities/provider_configuration.py +++ b/api/core/entities/provider_configuration.py @@ -1533,6 +1533,9 @@ class ProviderConfiguration(BaseModel): # Return composite sort key: (model_type value, model position index) return (model.model_type.value, position_index) + # Deduplicate + provider_models = list({(m.model, m.model_type, m.fetch_from): m for m in provider_models}.values()) + # Sort using the composite sort key return sorted(provider_models, key=get_sort_key) diff --git a/api/core/entities/provider_entities.py b/api/core/entities/provider_entities.py index deac8847e6..8a8067332d 100644 --- a/api/core/entities/provider_entities.py +++ b/api/core/entities/provider_entities.py @@ -1,5 +1,5 @@ from enum import StrEnum, auto -from typing import Optional, Union +from typing import Union from pydantic import BaseModel, ConfigDict, Field @@ -84,9 +84,9 @@ class SystemConfiguration(BaseModel): """ enabled: bool - current_quota_type: Optional[ProviderQuotaType] = None + current_quota_type: ProviderQuotaType | None = None quota_configurations: list[QuotaConfiguration] = [] - credentials: Optional[dict] = None + credentials: dict | None = None class CustomProviderConfiguration(BaseModel): @@ -95,8 +95,8 @@ class CustomProviderConfiguration(BaseModel): """ credentials: dict - current_credential_id: Optional[str] = None - current_credential_name: Optional[str] = None + current_credential_id: str | None = None + current_credential_name: str | None = None available_credentials: list[CredentialConfiguration] = [] @@ -108,8 +108,8 @@ class CustomModelConfiguration(BaseModel): model: str model_type: ModelType credentials: dict | None - current_credential_id: Optional[str] = None - current_credential_name: Optional[str] = None + current_credential_id: str | None = None + current_credential_name: str | None = None available_model_credentials: list[CredentialConfiguration] = [] unadded_to_model_list: bool | None = False @@ -131,7 +131,7 @@ class CustomConfiguration(BaseModel): Model class for provider custom configuration. """ - provider: Optional[CustomProviderConfiguration] = None + provider: CustomProviderConfiguration | None = None models: list[CustomModelConfiguration] = [] can_added_models: list[UnaddedModelConfiguration] = [] diff --git a/api/core/file/models.py b/api/core/file/models.py index 7089b7ce7a..d149205d77 100644 --- a/api/core/file/models.py +++ b/api/core/file/models.py @@ -74,6 +74,10 @@ class File(BaseModel): storage_key: str | None = None, dify_model_identity: str | None = FILE_MODEL_IDENTITY, url: str | None = None, + # Legacy compatibility fields - explicitly handle known extra fields + tool_file_id: str | None = None, + upload_file_id: str | None = None, + datasource_file_id: str | None = None, ): super().__init__( id=id, diff --git a/api/core/helper/code_executor/javascript/javascript_transformer.py b/api/core/helper/code_executor/javascript/javascript_transformer.py index 62489cdf29..e28f027a3a 100644 --- a/api/core/helper/code_executor/javascript/javascript_transformer.py +++ b/api/core/helper/code_executor/javascript/javascript_transformer.py @@ -6,10 +6,7 @@ from core.helper.code_executor.template_transformer import TemplateTransformer class NodeJsTemplateTransformer(TemplateTransformer): @classmethod def get_runner_script(cls) -> str: - runner_script = dedent( - f""" - // declare main function - {cls._code_placeholder} + runner_script = dedent(f""" {cls._code_placeholder} // decode and prepare input object var inputs_obj = JSON.parse(Buffer.from('{cls._inputs_placeholder}', 'base64').toString('utf-8')) @@ -21,6 +18,5 @@ class NodeJsTemplateTransformer(TemplateTransformer): var output_json = JSON.stringify(output_obj) var result = `<>${{output_json}}<>` console.log(result) - """ - ) + """) return runner_script diff --git a/api/core/helper/code_executor/python3/python3_transformer.py b/api/core/helper/code_executor/python3/python3_transformer.py index 836fd273ae..ee866eeb81 100644 --- a/api/core/helper/code_executor/python3/python3_transformer.py +++ b/api/core/helper/code_executor/python3/python3_transformer.py @@ -6,9 +6,7 @@ from core.helper.code_executor.template_transformer import TemplateTransformer class Python3TemplateTransformer(TemplateTransformer): @classmethod def get_runner_script(cls) -> str: - runner_script = dedent(f""" - # declare main function - {cls._code_placeholder} + runner_script = dedent(f""" {cls._code_placeholder} import json from base64 import b64decode diff --git a/api/core/helper/marketplace.py b/api/core/helper/marketplace.py index bddb864a95..b2286d39ed 100644 --- a/api/core/helper/marketplace.py +++ b/api/core/helper/marketplace.py @@ -29,6 +29,18 @@ def batch_fetch_plugin_manifests(plugin_ids: list[str]) -> Sequence[MarketplaceP return [MarketplacePluginDeclaration.model_validate(plugin) for plugin in response.json()["data"]["plugins"]] +def batch_fetch_plugin_by_ids(plugin_ids: list[str]) -> list[dict]: + if not plugin_ids: + return [] + + url = str(marketplace_api_url / "api/v1/plugins/batch") + response = httpx.post(url, json={"plugin_ids": plugin_ids}, headers={"X-Dify-Version": dify_config.project.version}) + response.raise_for_status() + + data = response.json() + return data.get("data", {}).get("plugins", []) + + def batch_fetch_plugin_manifests_ignore_deserialization_error( plugin_ids: list[str], ) -> Sequence[MarketplacePluginDeclaration]: diff --git a/api/core/helper/provider_encryption.py b/api/core/helper/provider_encryption.py index 4f3ed75d86..8484a28c05 100644 --- a/api/core/helper/provider_encryption.py +++ b/api/core/helper/provider_encryption.py @@ -1,7 +1,7 @@ import contextlib from collections.abc import Mapping from copy import deepcopy -from typing import Any, Optional, Protocol +from typing import Any, Protocol from core.entities.provider_entities import BasicProviderConfig from core.helper import encrypter @@ -12,7 +12,7 @@ class ProviderConfigCache(Protocol): Interface for provider configuration cache operations """ - def get(self) -> Optional[dict[str, Any]]: + def get(self) -> dict[str, Any] | None: """Get cached provider configuration""" ... diff --git a/api/core/mcp/auth/auth_flow.py b/api/core/mcp/auth/auth_flow.py index 7d938a8a7d..92787b39dd 100644 --- a/api/core/mcp/auth/auth_flow.py +++ b/api/core/mcp/auth/auth_flow.py @@ -7,33 +7,28 @@ import urllib.parse from urllib.parse import urljoin, urlparse import httpx -from pydantic import BaseModel, ValidationError +from httpx import RequestError +from pydantic import ValidationError -from core.mcp.auth.auth_provider import OAuthClientProvider +from core.entities.mcp_provider import MCPProviderEntity, MCPSupportGrantType +from core.helper import ssrf_proxy +from core.mcp.entities import AuthAction, AuthActionType, AuthResult, OAuthCallbackState +from core.mcp.error import MCPRefreshTokenError from core.mcp.types import ( + LATEST_PROTOCOL_VERSION, OAuthClientInformation, OAuthClientInformationFull, OAuthClientMetadata, OAuthMetadata, OAuthTokens, + ProtectedResourceMetadata, ) from extensions.ext_redis import redis_client -LATEST_PROTOCOL_VERSION = "1.0" OAUTH_STATE_EXPIRY_SECONDS = 5 * 60 # 5 minutes expiry OAUTH_STATE_REDIS_KEY_PREFIX = "oauth_state:" -class OAuthCallbackState(BaseModel): - provider_id: str - tenant_id: str - server_url: str - metadata: OAuthMetadata | None = None - client_information: OAuthClientInformation - code_verifier: str - redirect_uri: str - - def generate_pkce_challenge() -> tuple[str, str]: """Generate PKCE challenge and verifier.""" code_verifier = base64.urlsafe_b64encode(os.urandom(40)).decode("utf-8") @@ -46,6 +41,131 @@ def generate_pkce_challenge() -> tuple[str, str]: return code_verifier, code_challenge +def build_protected_resource_metadata_discovery_urls( + www_auth_resource_metadata_url: str | None, server_url: str +) -> list[str]: + """ + Build a list of URLs to try for Protected Resource Metadata discovery. + + Per SEP-985, supports fallback when discovery fails at one URL. + """ + urls = [] + + # First priority: URL from WWW-Authenticate header + if www_auth_resource_metadata_url: + urls.append(www_auth_resource_metadata_url) + + # Fallback: construct from server URL + parsed = urlparse(server_url) + base_url = f"{parsed.scheme}://{parsed.netloc}" + fallback_url = urljoin(base_url, "/.well-known/oauth-protected-resource") + if fallback_url not in urls: + urls.append(fallback_url) + + return urls + + +def build_oauth_authorization_server_metadata_discovery_urls(auth_server_url: str | None, server_url: str) -> list[str]: + """ + Build a list of URLs to try for OAuth Authorization Server Metadata discovery. + + Supports both OAuth 2.0 (RFC 8414) and OpenID Connect discovery. + + Per RFC 8414 section 3: + - If issuer has no path: https://example.com/.well-known/oauth-authorization-server + - If issuer has path: https://example.com/.well-known/oauth-authorization-server{path} + + Example: + - issuer: https://example.com/oauth + - metadata: https://example.com/.well-known/oauth-authorization-server/oauth + """ + urls = [] + base_url = auth_server_url or server_url + + parsed = urlparse(base_url) + base = f"{parsed.scheme}://{parsed.netloc}" + path = parsed.path.rstrip("/") # Remove trailing slash + + # Try OpenID Connect discovery first (more common) + urls.append(urljoin(base + "/", ".well-known/openid-configuration")) + + # OAuth 2.0 Authorization Server Metadata (RFC 8414) + # Include the path component if present in the issuer URL + if path: + urls.append(urljoin(base, f".well-known/oauth-authorization-server{path}")) + else: + urls.append(urljoin(base, ".well-known/oauth-authorization-server")) + + return urls + + +def discover_protected_resource_metadata( + prm_url: str | None, server_url: str, protocol_version: str | None = None +) -> ProtectedResourceMetadata | None: + """Discover OAuth 2.0 Protected Resource Metadata (RFC 9470).""" + urls = build_protected_resource_metadata_discovery_urls(prm_url, server_url) + headers = {"MCP-Protocol-Version": protocol_version or LATEST_PROTOCOL_VERSION, "User-Agent": "Dify"} + + for url in urls: + try: + response = ssrf_proxy.get(url, headers=headers) + if response.status_code == 200: + return ProtectedResourceMetadata.model_validate(response.json()) + elif response.status_code == 404: + continue # Try next URL + except (RequestError, ValidationError): + continue # Try next URL + + return None + + +def discover_oauth_authorization_server_metadata( + auth_server_url: str | None, server_url: str, protocol_version: str | None = None +) -> OAuthMetadata | None: + """Discover OAuth 2.0 Authorization Server Metadata (RFC 8414).""" + urls = build_oauth_authorization_server_metadata_discovery_urls(auth_server_url, server_url) + headers = {"MCP-Protocol-Version": protocol_version or LATEST_PROTOCOL_VERSION, "User-Agent": "Dify"} + + for url in urls: + try: + response = ssrf_proxy.get(url, headers=headers) + if response.status_code == 200: + return OAuthMetadata.model_validate(response.json()) + elif response.status_code == 404: + continue # Try next URL + except (RequestError, ValidationError): + continue # Try next URL + + return None + + +def get_effective_scope( + scope_from_www_auth: str | None, + prm: ProtectedResourceMetadata | None, + asm: OAuthMetadata | None, + client_scope: str | None, +) -> str | None: + """ + Determine effective scope using priority-based selection strategy. + + Priority order: + 1. WWW-Authenticate header scope (server explicit requirement) + 2. Protected Resource Metadata scopes + 3. OAuth Authorization Server Metadata scopes + 4. Client configured scope + """ + if scope_from_www_auth: + return scope_from_www_auth + + if prm and prm.scopes_supported: + return " ".join(prm.scopes_supported) + + if asm and asm.scopes_supported: + return " ".join(asm.scopes_supported) + + return client_scope + + def _create_secure_redis_state(state_data: OAuthCallbackState) -> str: """Create a secure state parameter by storing state data in Redis and returning a random state key.""" # Generate a secure random state key @@ -80,8 +200,13 @@ def _retrieve_redis_state(state_key: str) -> OAuthCallbackState: raise ValueError(f"Invalid state parameter: {str(e)}") -def handle_callback(state_key: str, authorization_code: str) -> OAuthCallbackState: - """Handle the callback from the OAuth provider.""" +def handle_callback(state_key: str, authorization_code: str) -> tuple[OAuthCallbackState, OAuthTokens]: + """ + Handle the callback from the OAuth provider. + + Returns: + A tuple of (callback_state, tokens) that can be used by the caller to save data. + """ # Retrieve state data from Redis (state is automatically deleted after retrieval) full_state_data = _retrieve_redis_state(state_key) @@ -93,60 +218,66 @@ def handle_callback(state_key: str, authorization_code: str) -> OAuthCallbackSta full_state_data.code_verifier, full_state_data.redirect_uri, ) - provider = OAuthClientProvider(full_state_data.provider_id, full_state_data.tenant_id, for_list=True) - provider.save_tokens(tokens) - return full_state_data + + return full_state_data, tokens def check_support_resource_discovery(server_url: str) -> tuple[bool, str]: """Check if the server supports OAuth 2.0 Resource Discovery.""" - b_scheme, b_netloc, b_path, _, b_query, b_fragment = urlparse(server_url, "", True) - url_for_resource_discovery = f"{b_scheme}://{b_netloc}/.well-known/oauth-protected-resource{b_path}" + b_scheme, b_netloc, _, _, b_query, b_fragment = urlparse(server_url, "", True) + url_for_resource_discovery = f"{b_scheme}://{b_netloc}/.well-known/oauth-protected-resource" if b_query: url_for_resource_discovery += f"?{b_query}" if b_fragment: url_for_resource_discovery += f"#{b_fragment}" try: headers = {"MCP-Protocol-Version": LATEST_PROTOCOL_VERSION, "User-Agent": "Dify"} - response = httpx.get(url_for_resource_discovery, headers=headers) + response = ssrf_proxy.get(url_for_resource_discovery, headers=headers) if 200 <= response.status_code < 300: body = response.json() - if "authorization_server_url" in body: + # Support both singular and plural forms + if body.get("authorization_servers"): + return True, body["authorization_servers"][0] + elif body.get("authorization_server_url"): return True, body["authorization_server_url"][0] else: return False, "" return False, "" - except httpx.RequestError: + except RequestError: # Not support resource discovery, fall back to well-known OAuth metadata return False, "" -def discover_oauth_metadata(server_url: str, protocol_version: str | None = None) -> OAuthMetadata | None: - """Looks up RFC 8414 OAuth 2.0 Authorization Server Metadata.""" - # First check if the server supports OAuth 2.0 Resource Discovery - support_resource_discovery, oauth_discovery_url = check_support_resource_discovery(server_url) - if support_resource_discovery: - url = oauth_discovery_url - else: - url = urljoin(server_url, "/.well-known/oauth-authorization-server") +def discover_oauth_metadata( + server_url: str, + resource_metadata_url: str | None = None, + scope_hint: str | None = None, + protocol_version: str | None = None, +) -> tuple[OAuthMetadata | None, ProtectedResourceMetadata | None, str | None]: + """ + Discover OAuth metadata using RFC 8414/9470 standards. - try: - headers = {"MCP-Protocol-Version": protocol_version or LATEST_PROTOCOL_VERSION} - response = httpx.get(url, headers=headers) - if response.status_code == 404: - return None - if not response.is_success: - raise ValueError(f"HTTP {response.status_code} trying to load well-known OAuth metadata") - return OAuthMetadata.model_validate(response.json()) - except httpx.RequestError as e: - if isinstance(e, httpx.ConnectError): - response = httpx.get(url) - if response.status_code == 404: - return None - if not response.is_success: - raise ValueError(f"HTTP {response.status_code} trying to load well-known OAuth metadata") - return OAuthMetadata.model_validate(response.json()) - raise + Args: + server_url: The MCP server URL + resource_metadata_url: Protected Resource Metadata URL from WWW-Authenticate header + scope_hint: Scope hint from WWW-Authenticate header + protocol_version: MCP protocol version + + Returns: + (oauth_metadata, protected_resource_metadata, scope_hint) + """ + # Discover Protected Resource Metadata + prm = discover_protected_resource_metadata(resource_metadata_url, server_url, protocol_version) + + # Get authorization server URL from PRM or use server URL + auth_server_url = None + if prm and prm.authorization_servers: + auth_server_url = prm.authorization_servers[0] + + # Discover OAuth Authorization Server Metadata + asm = discover_oauth_authorization_server_metadata(auth_server_url, server_url, protocol_version) + + return asm, prm, scope_hint def start_authorization( @@ -156,6 +287,7 @@ def start_authorization( redirect_url: str, provider_id: str, tenant_id: str, + scope: str | None = None, ) -> tuple[str, str]: """Begins the authorization flow with secure Redis state storage.""" response_type = "code" @@ -165,13 +297,6 @@ def start_authorization( authorization_url = metadata.authorization_endpoint if response_type not in metadata.response_types_supported: raise ValueError(f"Incompatible auth server: does not support response type {response_type}") - if ( - not metadata.code_challenge_methods_supported - or code_challenge_method not in metadata.code_challenge_methods_supported - ): - raise ValueError( - f"Incompatible auth server: does not support code challenge method {code_challenge_method}" - ) else: authorization_url = urljoin(server_url, "/authorize") @@ -200,10 +325,49 @@ def start_authorization( "state": state_key, } + # Add scope if provided + if scope: + params["scope"] = scope + authorization_url = f"{authorization_url}?{urllib.parse.urlencode(params)}" return authorization_url, code_verifier +def _parse_token_response(response: httpx.Response) -> OAuthTokens: + """ + Parse OAuth token response supporting both JSON and form-urlencoded formats. + + Per RFC 6749 Section 5.1, the standard format is JSON. + However, some legacy OAuth providers (e.g., early GitHub OAuth Apps) return + application/x-www-form-urlencoded format for backwards compatibility. + + Args: + response: The HTTP response from token endpoint + + Returns: + Parsed OAuth tokens + + Raises: + ValueError: If response cannot be parsed + """ + content_type = response.headers.get("content-type", "").lower() + + if "application/json" in content_type: + # Standard OAuth 2.0 JSON response (RFC 6749) + return OAuthTokens.model_validate(response.json()) + elif "application/x-www-form-urlencoded" in content_type: + # Legacy form-urlencoded response (non-standard but used by some providers) + token_data = dict(urllib.parse.parse_qsl(response.text)) + return OAuthTokens.model_validate(token_data) + else: + # No content-type or unknown - try JSON first, fallback to form-urlencoded + try: + return OAuthTokens.model_validate(response.json()) + except (ValidationError, json.JSONDecodeError): + token_data = dict(urllib.parse.parse_qsl(response.text)) + return OAuthTokens.model_validate(token_data) + + def exchange_authorization( server_url: str, metadata: OAuthMetadata | None, @@ -213,7 +377,7 @@ def exchange_authorization( redirect_uri: str, ) -> OAuthTokens: """Exchanges an authorization code for an access token.""" - grant_type = "authorization_code" + grant_type = MCPSupportGrantType.AUTHORIZATION_CODE.value if metadata: token_url = metadata.token_endpoint @@ -233,10 +397,10 @@ def exchange_authorization( if client_information.client_secret: params["client_secret"] = client_information.client_secret - response = httpx.post(token_url, data=params) + response = ssrf_proxy.post(token_url, data=params) if not response.is_success: raise ValueError(f"Token exchange failed: HTTP {response.status_code}") - return OAuthTokens.model_validate(response.json()) + return _parse_token_response(response) def refresh_authorization( @@ -246,7 +410,7 @@ def refresh_authorization( refresh_token: str, ) -> OAuthTokens: """Exchange a refresh token for an updated access token.""" - grant_type = "refresh_token" + grant_type = MCPSupportGrantType.REFRESH_TOKEN.value if metadata: token_url = metadata.token_endpoint @@ -263,11 +427,56 @@ def refresh_authorization( if client_information.client_secret: params["client_secret"] = client_information.client_secret - - response = httpx.post(token_url, data=params) + try: + response = ssrf_proxy.post(token_url, data=params) + except ssrf_proxy.MaxRetriesExceededError as e: + raise MCPRefreshTokenError(e) from e if not response.is_success: - raise ValueError(f"Token refresh failed: HTTP {response.status_code}") - return OAuthTokens.model_validate(response.json()) + raise MCPRefreshTokenError(response.text) + return _parse_token_response(response) + + +def client_credentials_flow( + server_url: str, + metadata: OAuthMetadata | None, + client_information: OAuthClientInformation, + scope: str | None = None, +) -> OAuthTokens: + """Execute Client Credentials Flow to get access token.""" + grant_type = MCPSupportGrantType.CLIENT_CREDENTIALS.value + + if metadata: + token_url = metadata.token_endpoint + if metadata.grant_types_supported and grant_type not in metadata.grant_types_supported: + raise ValueError(f"Incompatible auth server: does not support grant type {grant_type}") + else: + token_url = urljoin(server_url, "/token") + + # Support both Basic Auth and body parameters for client authentication + headers = {"Content-Type": "application/x-www-form-urlencoded"} + data = {"grant_type": grant_type} + + if scope: + data["scope"] = scope + + # If client_secret is provided, use Basic Auth (preferred method) + if client_information.client_secret: + credentials = f"{client_information.client_id}:{client_information.client_secret}" + encoded_credentials = base64.b64encode(credentials.encode()).decode() + headers["Authorization"] = f"Basic {encoded_credentials}" + else: + # Fall back to including credentials in the body + data["client_id"] = client_information.client_id + if client_information.client_secret: + data["client_secret"] = client_information.client_secret + + response = ssrf_proxy.post(token_url, headers=headers, data=data) + if not response.is_success: + raise ValueError( + f"Client credentials token request failed: HTTP {response.status_code}, Response: {response.text}" + ) + + return _parse_token_response(response) def register_client( @@ -283,7 +492,7 @@ def register_client( else: registration_url = urljoin(server_url, "/register") - response = httpx.post( + response = ssrf_proxy.post( registration_url, json=client_metadata.model_dump(), headers={"Content-Type": "application/json"}, @@ -294,28 +503,120 @@ def register_client( def auth( - provider: OAuthClientProvider, - server_url: str, + provider: MCPProviderEntity, authorization_code: str | None = None, state_param: str | None = None, - for_list: bool = False, -) -> dict[str, str]: - """Orchestrates the full auth flow with a server using secure Redis state storage.""" - metadata = discover_oauth_metadata(server_url) + resource_metadata_url: str | None = None, + scope_hint: str | None = None, +) -> AuthResult: + """ + Orchestrates the full auth flow with a server using secure Redis state storage. + + This function performs only network operations and returns actions that need + to be performed by the caller (such as saving data to database). + + Args: + provider: The MCP provider entity + authorization_code: Optional authorization code from OAuth callback + state_param: Optional state parameter from OAuth callback + resource_metadata_url: Optional Protected Resource Metadata URL from WWW-Authenticate + scope_hint: Optional scope hint from WWW-Authenticate header + + Returns: + AuthResult containing actions to be performed and response data + """ + actions: list[AuthAction] = [] + server_url = provider.decrypt_server_url() + + # Discover OAuth metadata using RFC 8414/9470 standards + server_metadata, prm, scope_from_www_auth = discover_oauth_metadata( + server_url, resource_metadata_url, scope_hint, LATEST_PROTOCOL_VERSION + ) + + client_metadata = provider.client_metadata + provider_id = provider.id + tenant_id = provider.tenant_id + client_information = provider.retrieve_client_information() + redirect_url = provider.redirect_url + credentials = provider.decrypt_credentials() + + # Determine grant type based on server metadata + if not server_metadata: + raise ValueError("Failed to discover OAuth metadata from server") + + supported_grant_types = server_metadata.grant_types_supported or [] + + # Convert to lowercase for comparison + supported_grant_types_lower = [gt.lower() for gt in supported_grant_types] + + # Determine which grant type to use + effective_grant_type = None + if MCPSupportGrantType.AUTHORIZATION_CODE.value in supported_grant_types_lower: + effective_grant_type = MCPSupportGrantType.AUTHORIZATION_CODE.value + else: + effective_grant_type = MCPSupportGrantType.CLIENT_CREDENTIALS.value + + # Determine effective scope using priority-based strategy + effective_scope = get_effective_scope(scope_from_www_auth, prm, server_metadata, credentials.get("scope")) - # Handle client registration if needed - client_information = provider.client_information() if not client_information: if authorization_code is not None: raise ValueError("Existing OAuth client information is required when exchanging an authorization code") + + # For client credentials flow, we don't need to register client dynamically + if effective_grant_type == MCPSupportGrantType.CLIENT_CREDENTIALS.value: + # Client should provide client_id and client_secret directly + raise ValueError("Client credentials flow requires client_id and client_secret to be provided") + try: - full_information = register_client(server_url, metadata, provider.client_metadata) - except httpx.RequestError as e: + full_information = register_client(server_url, server_metadata, client_metadata) + except RequestError as e: raise ValueError(f"Could not register OAuth client: {e}") - provider.save_client_information(full_information) + + # Return action to save client information + actions.append( + AuthAction( + action_type=AuthActionType.SAVE_CLIENT_INFO, + data={"client_information": full_information.model_dump()}, + provider_id=provider_id, + tenant_id=tenant_id, + ) + ) + client_information = full_information - # Exchange authorization code for tokens + # Handle client credentials flow + if effective_grant_type == MCPSupportGrantType.CLIENT_CREDENTIALS.value: + # Direct token request without user interaction + try: + tokens = client_credentials_flow( + server_url, + server_metadata, + client_information, + effective_scope, + ) + + # Return action to save tokens and grant type + token_data = tokens.model_dump() + token_data["grant_type"] = MCPSupportGrantType.CLIENT_CREDENTIALS.value + + actions.append( + AuthAction( + action_type=AuthActionType.SAVE_TOKENS, + data=token_data, + provider_id=provider_id, + tenant_id=tenant_id, + ) + ) + + return AuthResult(actions=actions, response={"result": "success"}) + except (RequestError, ValueError, KeyError) as e: + # RequestError: HTTP request failed + # ValueError: Invalid response data + # KeyError: Missing required fields in response + raise ValueError(f"Client credentials flow failed: {e}") + + # Exchange authorization code for tokens (Authorization Code flow) if authorization_code is not None: if not state_param: raise ValueError("State parameter is required when exchanging authorization code") @@ -335,35 +636,70 @@ def auth( tokens = exchange_authorization( server_url, - metadata, + server_metadata, client_information, authorization_code, code_verifier, redirect_uri, ) - provider.save_tokens(tokens) - return {"result": "success"} - provider_tokens = provider.tokens() + # Return action to save tokens + actions.append( + AuthAction( + action_type=AuthActionType.SAVE_TOKENS, + data=tokens.model_dump(), + provider_id=provider_id, + tenant_id=tenant_id, + ) + ) + + return AuthResult(actions=actions, response={"result": "success"}) + + provider_tokens = provider.retrieve_tokens() # Handle token refresh or new authorization if provider_tokens and provider_tokens.refresh_token: try: - new_tokens = refresh_authorization(server_url, metadata, client_information, provider_tokens.refresh_token) - provider.save_tokens(new_tokens) - return {"result": "success"} - except Exception as e: + new_tokens = refresh_authorization( + server_url, server_metadata, client_information, provider_tokens.refresh_token + ) + + # Return action to save new tokens + actions.append( + AuthAction( + action_type=AuthActionType.SAVE_TOKENS, + data=new_tokens.model_dump(), + provider_id=provider_id, + tenant_id=tenant_id, + ) + ) + + return AuthResult(actions=actions, response={"result": "success"}) + except (RequestError, ValueError, KeyError) as e: + # RequestError: HTTP request failed + # ValueError: Invalid response data + # KeyError: Missing required fields in response raise ValueError(f"Could not refresh OAuth tokens: {e}") - # Start new authorization flow + # Start new authorization flow (only for authorization code flow) authorization_url, code_verifier = start_authorization( server_url, - metadata, + server_metadata, client_information, - provider.redirect_url, - provider.mcp_provider.id, - provider.mcp_provider.tenant_id, + redirect_url, + provider_id, + tenant_id, + effective_scope, ) - provider.save_code_verifier(code_verifier) - return {"authorization_url": authorization_url} + # Return action to save code verifier + actions.append( + AuthAction( + action_type=AuthActionType.SAVE_CODE_VERIFIER, + data={"code_verifier": code_verifier}, + provider_id=provider_id, + tenant_id=tenant_id, + ) + ) + + return AuthResult(actions=actions, response={"authorization_url": authorization_url}) diff --git a/api/core/mcp/auth/auth_provider.py b/api/core/mcp/auth/auth_provider.py deleted file mode 100644 index 3a550eb1b6..0000000000 --- a/api/core/mcp/auth/auth_provider.py +++ /dev/null @@ -1,77 +0,0 @@ -from configs import dify_config -from core.mcp.types import ( - OAuthClientInformation, - OAuthClientInformationFull, - OAuthClientMetadata, - OAuthTokens, -) -from models.tools import MCPToolProvider -from services.tools.mcp_tools_manage_service import MCPToolManageService - - -class OAuthClientProvider: - mcp_provider: MCPToolProvider - - def __init__(self, provider_id: str, tenant_id: str, for_list: bool = False): - if for_list: - self.mcp_provider = MCPToolManageService.get_mcp_provider_by_provider_id(provider_id, tenant_id) - else: - self.mcp_provider = MCPToolManageService.get_mcp_provider_by_server_identifier(provider_id, tenant_id) - - @property - def redirect_url(self) -> str: - """The URL to redirect the user agent to after authorization.""" - return dify_config.CONSOLE_API_URL + "/console/api/mcp/oauth/callback" - - @property - def client_metadata(self) -> OAuthClientMetadata: - """Metadata about this OAuth client.""" - return OAuthClientMetadata( - redirect_uris=[self.redirect_url], - token_endpoint_auth_method="none", - grant_types=["authorization_code", "refresh_token"], - response_types=["code"], - client_name="Dify", - client_uri="https://github.com/langgenius/dify", - ) - - def client_information(self) -> OAuthClientInformation | None: - """Loads information about this OAuth client.""" - client_information = self.mcp_provider.decrypted_credentials.get("client_information", {}) - if not client_information: - return None - return OAuthClientInformation.model_validate(client_information) - - def save_client_information(self, client_information: OAuthClientInformationFull): - """Saves client information after dynamic registration.""" - MCPToolManageService.update_mcp_provider_credentials( - self.mcp_provider, - {"client_information": client_information.model_dump()}, - ) - - def tokens(self) -> OAuthTokens | None: - """Loads any existing OAuth tokens for the current session.""" - credentials = self.mcp_provider.decrypted_credentials - if not credentials: - return None - return OAuthTokens( - access_token=credentials.get("access_token", ""), - token_type=credentials.get("token_type", "Bearer"), - expires_in=int(credentials.get("expires_in", "3600") or 3600), - refresh_token=credentials.get("refresh_token", ""), - ) - - def save_tokens(self, tokens: OAuthTokens): - """Stores new OAuth tokens for the current session.""" - # update mcp provider credentials - token_dict = tokens.model_dump() - MCPToolManageService.update_mcp_provider_credentials(self.mcp_provider, token_dict, authed=True) - - def save_code_verifier(self, code_verifier: str): - """Saves a PKCE code verifier for the current session.""" - MCPToolManageService.update_mcp_provider_credentials(self.mcp_provider, {"code_verifier": code_verifier}) - - def code_verifier(self) -> str: - """Loads the PKCE code verifier for the current session.""" - # get code verifier from mcp provider credentials - return str(self.mcp_provider.decrypted_credentials.get("code_verifier", "")) diff --git a/api/core/mcp/auth_client.py b/api/core/mcp/auth_client.py new file mode 100644 index 0000000000..d8724b8de5 --- /dev/null +++ b/api/core/mcp/auth_client.py @@ -0,0 +1,197 @@ +""" +MCP Client with Authentication Retry Support + +This module provides an enhanced MCPClient that automatically handles +authentication failures and retries operations after refreshing tokens. +""" + +import logging +from collections.abc import Callable +from typing import Any + +from sqlalchemy.orm import Session + +from core.entities.mcp_provider import MCPProviderEntity +from core.mcp.error import MCPAuthError +from core.mcp.mcp_client import MCPClient +from core.mcp.types import CallToolResult, Tool +from extensions.ext_database import db + +logger = logging.getLogger(__name__) + + +class MCPClientWithAuthRetry(MCPClient): + """ + An enhanced MCPClient that provides automatic authentication retry. + + This class extends MCPClient and intercepts MCPAuthError exceptions + to refresh authentication before retrying failed operations. + + Note: This class uses lazy session creation - database sessions are only + created when authentication retry is actually needed, not on every request. + """ + + def __init__( + self, + server_url: str, + headers: dict[str, str] | None = None, + timeout: float | None = None, + sse_read_timeout: float | None = None, + provider_entity: MCPProviderEntity | None = None, + authorization_code: str | None = None, + by_server_id: bool = False, + ): + """ + Initialize the MCP client with auth retry capability. + + Args: + server_url: The MCP server URL + headers: Optional headers for requests + timeout: Request timeout + sse_read_timeout: SSE read timeout + provider_entity: Provider entity for authentication + authorization_code: Optional authorization code for initial auth + by_server_id: Whether to look up provider by server ID + """ + super().__init__(server_url, headers, timeout, sse_read_timeout) + + self.provider_entity = provider_entity + self.authorization_code = authorization_code + self.by_server_id = by_server_id + self._has_retried = False + + def _handle_auth_error(self, error: MCPAuthError) -> None: + """ + Handle authentication error by refreshing tokens. + + This method creates a short-lived database session only when authentication + retry is needed, minimizing database connection hold time. + + Args: + error: The authentication error + + Raises: + MCPAuthError: If authentication fails or max retries reached + """ + if not self.provider_entity: + raise error + if self._has_retried: + raise error + + self._has_retried = True + + try: + # Create a temporary session only for auth retry + # This session is short-lived and only exists during the auth operation + + from services.tools.mcp_tools_manage_service import MCPToolManageService + + with Session(db.engine) as session, session.begin(): + mcp_service = MCPToolManageService(session=session) + + # Perform authentication using the service's auth method + # Extract OAuth metadata hints from the error + mcp_service.auth_with_actions( + self.provider_entity, + self.authorization_code, + resource_metadata_url=error.resource_metadata_url, + scope_hint=error.scope_hint, + ) + + # Retrieve new tokens + self.provider_entity = mcp_service.get_provider_entity( + self.provider_entity.id, self.provider_entity.tenant_id, by_server_id=self.by_server_id + ) + + # Session is closed here, before we update headers + token = self.provider_entity.retrieve_tokens() + if not token: + raise MCPAuthError("Authentication failed - no token received") + + # Update headers with new token + self.headers["Authorization"] = f"{token.token_type.capitalize()} {token.access_token}" + + # Clear authorization code after first use + self.authorization_code = None + + except MCPAuthError: + # Re-raise MCPAuthError as is + raise + except Exception as e: + # Catch all exceptions during auth retry + logger.exception("Authentication retry failed") + raise MCPAuthError(f"Authentication retry failed: {e}") from e + + def _execute_with_retry(self, func: Callable[..., Any], *args, **kwargs) -> Any: + """ + Execute a function with authentication retry logic. + + Args: + func: The function to execute + *args: Positional arguments for the function + **kwargs: Keyword arguments for the function + + Returns: + The result of the function call + + Raises: + MCPAuthError: If authentication fails after retries + Any other exceptions from the function + """ + try: + return func(*args, **kwargs) + except MCPAuthError as e: + self._handle_auth_error(e) + + # Re-initialize the connection with new headers + if self._initialized: + # Clean up existing connection + self._exit_stack.close() + self._session = None + self._initialized = False + + # Re-initialize with new headers + self._initialize() + self._initialized = True + + return func(*args, **kwargs) + finally: + # Reset retry flag after operation completes + self._has_retried = False + + def __enter__(self): + """Enter the context manager with retry support.""" + + def initialize_with_retry(): + super(MCPClientWithAuthRetry, self).__enter__() + return self + + return self._execute_with_retry(initialize_with_retry) + + def list_tools(self) -> list[Tool]: + """ + List available tools from the MCP server with auth retry. + + Returns: + List of available tools + + Raises: + MCPAuthError: If authentication fails after retries + """ + return self._execute_with_retry(super().list_tools) + + def invoke_tool(self, tool_name: str, tool_args: dict[str, Any]) -> CallToolResult: + """ + Invoke a tool on the MCP server with auth retry. + + Args: + tool_name: Name of the tool to invoke + tool_args: Arguments for the tool + + Returns: + Result of the tool invocation + + Raises: + MCPAuthError: If authentication fails after retries + """ + return self._execute_with_retry(super().invoke_tool, tool_name, tool_args) diff --git a/web/app/components/app/configuration/base/icons/remove-icon/style.module.css b/api/core/mcp/auth_client_comparison.md similarity index 100% rename from web/app/components/app/configuration/base/icons/remove-icon/style.module.css rename to api/core/mcp/auth_client_comparison.md diff --git a/api/core/mcp/client/sse_client.py b/api/core/mcp/client/sse_client.py index 6db22a09e0..24ca59ee45 100644 --- a/api/core/mcp/client/sse_client.py +++ b/api/core/mcp/client/sse_client.py @@ -46,7 +46,7 @@ class SSETransport: url: str, headers: dict[str, Any] | None = None, timeout: float = 5.0, - sse_read_timeout: float = 5 * 60, + sse_read_timeout: float = 1 * 60, ): """Initialize the SSE transport. @@ -255,7 +255,7 @@ def sse_client( url: str, headers: dict[str, Any] | None = None, timeout: float = 5.0, - sse_read_timeout: float = 5 * 60, + sse_read_timeout: float = 1 * 60, ) -> Generator[tuple[ReadQueue, WriteQueue], None, None]: """ Client transport for SSE. @@ -276,31 +276,34 @@ def sse_client( read_queue: ReadQueue | None = None write_queue: WriteQueue | None = None - with ThreadPoolExecutor() as executor: - try: - with create_ssrf_proxy_mcp_http_client(headers=transport.headers) as client: - with ssrf_proxy_sse_connect( - url, timeout=httpx.Timeout(timeout, read=sse_read_timeout), client=client - ) as event_source: - event_source.response.raise_for_status() + executor = ThreadPoolExecutor() + try: + with create_ssrf_proxy_mcp_http_client(headers=transport.headers) as client: + with ssrf_proxy_sse_connect( + url, timeout=httpx.Timeout(timeout, read=sse_read_timeout), client=client + ) as event_source: + event_source.response.raise_for_status() - read_queue, write_queue = transport.connect(executor, client, event_source) + read_queue, write_queue = transport.connect(executor, client, event_source) - yield read_queue, write_queue + yield read_queue, write_queue - except httpx.HTTPStatusError as exc: - if exc.response.status_code == 401: - raise MCPAuthError() - raise MCPConnectionError() - except Exception: - logger.exception("Error connecting to SSE endpoint") - raise - finally: - # Clean up queues - if read_queue: - read_queue.put(None) - if write_queue: - write_queue.put(None) + except httpx.HTTPStatusError as exc: + if exc.response.status_code == 401: + raise MCPAuthError(response=exc.response) + raise MCPConnectionError() + except Exception: + logger.exception("Error connecting to SSE endpoint") + raise + finally: + # Clean up queues + if read_queue: + read_queue.put(None) + if write_queue: + write_queue.put(None) + + # Shutdown executor without waiting to prevent hanging + executor.shutdown(wait=False) def send_message(http_client: httpx.Client, endpoint_url: str, session_message: SessionMessage): diff --git a/api/core/mcp/client/streamable_client.py b/api/core/mcp/client/streamable_client.py index 7eafa79837..805c16c838 100644 --- a/api/core/mcp/client/streamable_client.py +++ b/api/core/mcp/client/streamable_client.py @@ -138,6 +138,10 @@ class StreamableHTTPTransport: ) -> bool: """Handle an SSE event, returning True if the response is complete.""" if sse.event == "message": + # ping event send by server will be recognized as a message event with empty data by httpx-sse's SSEDecoder + if not sse.data.strip(): + return False + try: message = JSONRPCMessage.model_validate_json(sse.data) logger.debug("SSE message: %s", message) @@ -434,45 +438,48 @@ def streamablehttp_client( server_to_client_queue: ServerToClientQueue = queue.Queue() # For messages FROM server TO client client_to_server_queue: ClientToServerQueue = queue.Queue() # For messages FROM client TO server - with ThreadPoolExecutor(max_workers=2) as executor: - try: - with create_ssrf_proxy_mcp_http_client( - headers=transport.request_headers, - timeout=httpx.Timeout(transport.timeout, read=transport.sse_read_timeout), - ) as client: - # Define callbacks that need access to thread pool - def start_get_stream(): - """Start a worker thread to handle server-initiated messages.""" - executor.submit(transport.handle_get_stream, client, server_to_client_queue) + executor = ThreadPoolExecutor(max_workers=2) + try: + with create_ssrf_proxy_mcp_http_client( + headers=transport.request_headers, + timeout=httpx.Timeout(transport.timeout, read=transport.sse_read_timeout), + ) as client: + # Define callbacks that need access to thread pool + def start_get_stream(): + """Start a worker thread to handle server-initiated messages.""" + executor.submit(transport.handle_get_stream, client, server_to_client_queue) - # Start the post_writer worker thread - executor.submit( - transport.post_writer, - client, - client_to_server_queue, # Queue for messages FROM client TO server - server_to_client_queue, # Queue for messages FROM server TO client - start_get_stream, - ) + # Start the post_writer worker thread + executor.submit( + transport.post_writer, + client, + client_to_server_queue, # Queue for messages FROM client TO server + server_to_client_queue, # Queue for messages FROM server TO client + start_get_stream, + ) - try: - yield ( - server_to_client_queue, # Queue for receiving messages FROM server - client_to_server_queue, # Queue for sending messages TO server - transport.get_session_id, - ) - finally: - if transport.session_id and terminate_on_close: - transport.terminate_session(client) - - # Signal threads to stop - client_to_server_queue.put(None) - finally: - # Clear any remaining items and add None sentinel to unblock any waiting threads try: - while not client_to_server_queue.empty(): - client_to_server_queue.get_nowait() - except queue.Empty: - pass + yield ( + server_to_client_queue, # Queue for receiving messages FROM server + client_to_server_queue, # Queue for sending messages TO server + transport.get_session_id, + ) + finally: + if transport.session_id and terminate_on_close: + transport.terminate_session(client) - client_to_server_queue.put(None) - server_to_client_queue.put(None) + # Signal threads to stop + client_to_server_queue.put(None) + finally: + # Clear any remaining items and add None sentinel to unblock any waiting threads + try: + while not client_to_server_queue.empty(): + client_to_server_queue.get_nowait() + except queue.Empty: + pass + + client_to_server_queue.put(None) + server_to_client_queue.put(None) + + # Shutdown executor without waiting to prevent hanging + executor.shutdown(wait=False) diff --git a/api/core/mcp/entities.py b/api/core/mcp/entities.py index 7553c10a2e..08823daab1 100644 --- a/api/core/mcp/entities.py +++ b/api/core/mcp/entities.py @@ -1,10 +1,13 @@ from dataclasses import dataclass +from enum import StrEnum from typing import Any, Generic, TypeVar -from core.mcp.session.base_session import BaseSession -from core.mcp.types import LATEST_PROTOCOL_VERSION, RequestId, RequestParams +from pydantic import BaseModel -SUPPORTED_PROTOCOL_VERSIONS: list[str] = ["2024-11-05", LATEST_PROTOCOL_VERSION] +from core.mcp.session.base_session import BaseSession +from core.mcp.types import LATEST_PROTOCOL_VERSION, OAuthClientInformation, OAuthMetadata, RequestId, RequestParams + +SUPPORTED_PROTOCOL_VERSIONS: list[str] = ["2024-11-05", "2025-03-26", LATEST_PROTOCOL_VERSION] SessionT = TypeVar("SessionT", bound=BaseSession[Any, Any, Any, Any, Any]) @@ -17,3 +20,41 @@ class RequestContext(Generic[SessionT, LifespanContextT]): meta: RequestParams.Meta | None session: SessionT lifespan_context: LifespanContextT + + +class AuthActionType(StrEnum): + """Types of actions that can be performed during auth flow.""" + + SAVE_CLIENT_INFO = "save_client_info" + SAVE_TOKENS = "save_tokens" + SAVE_CODE_VERIFIER = "save_code_verifier" + START_AUTHORIZATION = "start_authorization" + SUCCESS = "success" + + +class AuthAction(BaseModel): + """Represents an action that needs to be performed as a result of auth flow.""" + + action_type: AuthActionType + data: dict[str, Any] + provider_id: str | None = None + tenant_id: str | None = None + + +class AuthResult(BaseModel): + """Result of auth function containing actions to be performed and response data.""" + + actions: list[AuthAction] + response: dict[str, str] + + +class OAuthCallbackState(BaseModel): + """State data stored in Redis during OAuth callback flow.""" + + provider_id: str + tenant_id: str + server_url: str + metadata: OAuthMetadata | None = None + client_information: OAuthClientInformation + code_verifier: str + redirect_uri: str diff --git a/api/core/mcp/error.py b/api/core/mcp/error.py index 92ea7bde09..1128369ac5 100644 --- a/api/core/mcp/error.py +++ b/api/core/mcp/error.py @@ -1,3 +1,10 @@ +import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import httpx + + class MCPError(Exception): pass @@ -7,4 +14,50 @@ class MCPConnectionError(MCPError): class MCPAuthError(MCPConnectionError): + def __init__( + self, + message: str | None = None, + response: "httpx.Response | None" = None, + www_authenticate_header: str | None = None, + ): + """ + MCP Authentication Error. + + Args: + message: Error message + response: HTTP response object (will extract WWW-Authenticate header if provided) + www_authenticate_header: Pre-extracted WWW-Authenticate header value + """ + super().__init__(message or "Authentication failed") + + # Extract OAuth metadata hints from WWW-Authenticate header + if response is not None: + www_authenticate_header = response.headers.get("WWW-Authenticate") + + self.resource_metadata_url: str | None = None + self.scope_hint: str | None = None + + if www_authenticate_header: + self.resource_metadata_url = self._extract_field(www_authenticate_header, "resource_metadata") + self.scope_hint = self._extract_field(www_authenticate_header, "scope") + + @staticmethod + def _extract_field(www_auth: str, field_name: str) -> str | None: + """Extract a specific field from the WWW-Authenticate header.""" + # Pattern to match field="value" or field=value + pattern = rf'{field_name}="([^"]*)"' + match = re.search(pattern, www_auth) + if match: + return match.group(1) + + # Try without quotes + pattern = rf"{field_name}=([^\s,]+)" + match = re.search(pattern, www_auth) + if match: + return match.group(1) + + return None + + +class MCPRefreshTokenError(MCPError): pass diff --git a/api/core/mcp/mcp_client.py b/api/core/mcp/mcp_client.py index 86ec2c4db9..b0e0dab9be 100644 --- a/api/core/mcp/mcp_client.py +++ b/api/core/mcp/mcp_client.py @@ -7,9 +7,9 @@ from urllib.parse import urlparse from core.mcp.client.sse_client import sse_client from core.mcp.client.streamable_client import streamablehttp_client -from core.mcp.error import MCPAuthError, MCPConnectionError +from core.mcp.error import MCPConnectionError from core.mcp.session.client_session import ClientSession -from core.mcp.types import Tool +from core.mcp.types import CallToolResult, Tool logger = logging.getLogger(__name__) @@ -18,40 +18,18 @@ class MCPClient: def __init__( self, server_url: str, - provider_id: str, - tenant_id: str, - authed: bool = True, - authorization_code: str | None = None, - for_list: bool = False, headers: dict[str, str] | None = None, timeout: float | None = None, sse_read_timeout: float | None = None, ): - # Initialize info - self.provider_id = provider_id - self.tenant_id = tenant_id - self.client_type = "streamable" self.server_url = server_url self.headers = headers or {} self.timeout = timeout self.sse_read_timeout = sse_read_timeout - # Authentication info - self.authed = authed - self.authorization_code = authorization_code - if authed: - from core.mcp.auth.auth_provider import OAuthClientProvider - - self.provider = OAuthClientProvider(self.provider_id, self.tenant_id, for_list=for_list) - self.token = self.provider.tokens() - # Initialize session and client objects self._session: ClientSession | None = None - self._streams_context: AbstractContextManager[Any] | None = None - self._session_context: ClientSession | None = None self._exit_stack = ExitStack() - - # Whether the client has been initialized self._initialized = False def __enter__(self): @@ -85,61 +63,42 @@ class MCPClient: logger.debug("MCP connection failed with 'sse', falling back to 'mcp' method.") self.connect_server(streamablehttp_client, "mcp") - def connect_server( - self, client_factory: Callable[..., AbstractContextManager[Any]], method_name: str, first_try: bool = True - ): - from core.mcp.auth.auth_flow import auth + def connect_server(self, client_factory: Callable[..., AbstractContextManager[Any]], method_name: str) -> None: + """ + Connect to the MCP server using streamable http or sse. + Default to streamable http. + Args: + client_factory: The client factory to use(streamablehttp_client or sse_client). + method_name: The method name to use(mcp or sse). + """ + streams_context = client_factory( + url=self.server_url, + headers=self.headers, + timeout=self.timeout, + sse_read_timeout=self.sse_read_timeout, + ) - try: - headers = ( - {"Authorization": f"{self.token.token_type.capitalize()} {self.token.access_token}"} - if self.authed and self.token - else self.headers - ) - self._streams_context = client_factory( - url=self.server_url, - headers=headers, - timeout=self.timeout, - sse_read_timeout=self.sse_read_timeout, - ) - if not self._streams_context: - raise MCPConnectionError("Failed to create connection context") + # Use exit_stack to manage context managers properly + if method_name == "mcp": + read_stream, write_stream, _ = self._exit_stack.enter_context(streams_context) + streams = (read_stream, write_stream) + else: # sse_client + streams = self._exit_stack.enter_context(streams_context) - # Use exit_stack to manage context managers properly - if method_name == "mcp": - read_stream, write_stream, _ = self._exit_stack.enter_context(self._streams_context) - streams = (read_stream, write_stream) - else: # sse_client - streams = self._exit_stack.enter_context(self._streams_context) - - self._session_context = ClientSession(*streams) - self._session = self._exit_stack.enter_context(self._session_context) - self._session.initialize() - return - - except MCPAuthError: - if not self.authed: - raise - try: - auth(self.provider, self.server_url, self.authorization_code) - except Exception as e: - raise ValueError(f"Failed to authenticate: {e}") - self.token = self.provider.tokens() - if first_try: - return self.connect_server(client_factory, method_name, first_try=False) + session_context = ClientSession(*streams) + self._session = self._exit_stack.enter_context(session_context) + self._session.initialize() def list_tools(self) -> list[Tool]: - """Connect to an MCP server running with SSE transport""" - # List available tools to verify connection - if not self._initialized or not self._session: + """List available tools from the MCP server""" + if not self._session: raise ValueError("Session not initialized.") response = self._session.list_tools() - tools = response.tools - return tools + return response.tools - def invoke_tool(self, tool_name: str, tool_args: dict): + def invoke_tool(self, tool_name: str, tool_args: dict[str, Any]) -> CallToolResult: """Call a tool""" - if not self._initialized or not self._session: + if not self._session: raise ValueError("Session not initialized.") return self._session.call_tool(tool_name, tool_args) @@ -153,6 +112,4 @@ class MCPClient: raise ValueError(f"Error during cleanup: {e}") finally: self._session = None - self._session_context = None - self._streams_context = None self._initialized = False diff --git a/api/core/mcp/session/base_session.py b/api/core/mcp/session/base_session.py index 653b3773c0..c97ae6eac7 100644 --- a/api/core/mcp/session/base_session.py +++ b/api/core/mcp/session/base_session.py @@ -149,7 +149,7 @@ class BaseSession( messages when entered. """ - _response_streams: dict[RequestId, queue.Queue[JSONRPCResponse | JSONRPCError]] + _response_streams: dict[RequestId, queue.Queue[JSONRPCResponse | JSONRPCError | HTTPStatusError]] _request_id: int _in_flight: dict[RequestId, RequestResponder[ReceiveRequestT, SendResultT]] _receive_request_type: type[ReceiveRequestT] @@ -201,11 +201,14 @@ class BaseSession( self._receiver_future.result(timeout=5.0) # Wait up to 5 seconds except TimeoutError: # If the receiver loop is still running after timeout, we'll force shutdown - pass + # Cancel the future to interrupt the receiver loop + self._receiver_future.cancel() # Shutdown the executor if self._executor: - self._executor.shutdown(wait=True) + # Use non-blocking shutdown to prevent hanging + # The receiver thread should have already exited due to the None message in the queue + self._executor.shutdown(wait=False) def send_request( self, @@ -227,7 +230,7 @@ class BaseSession( request_id = self._request_id self._request_id = request_id + 1 - response_queue: queue.Queue[JSONRPCResponse | JSONRPCError] = queue.Queue() + response_queue: queue.Queue[JSONRPCResponse | JSONRPCError | HTTPStatusError] = queue.Queue() self._response_streams[request_id] = response_queue try: @@ -258,11 +261,17 @@ class BaseSession( message="No response received", ) ) + elif isinstance(response_or_error, HTTPStatusError): + # HTTPStatusError from streamable_client with preserved response object + if response_or_error.response.status_code == 401: + raise MCPAuthError(response=response_or_error.response) + else: + raise MCPConnectionError( + ErrorData(code=response_or_error.response.status_code, message=str(response_or_error)) + ) elif isinstance(response_or_error, JSONRPCError): if response_or_error.error.code == 401: - raise MCPAuthError( - ErrorData(code=response_or_error.error.code, message=response_or_error.error.message) - ) + raise MCPAuthError(message=response_or_error.error.message) else: raise MCPConnectionError( ErrorData(code=response_or_error.error.code, message=response_or_error.error.message) @@ -324,13 +333,17 @@ class BaseSession( if isinstance(message, HTTPStatusError): response_queue = self._response_streams.get(self._request_id - 1) if response_queue is not None: - response_queue.put( - JSONRPCError( - jsonrpc="2.0", - id=self._request_id - 1, - error=ErrorData(code=message.response.status_code, message=message.args[0]), + # For 401 errors, pass the HTTPStatusError directly to preserve response object + if message.response.status_code == 401: + response_queue.put(message) + else: + response_queue.put( + JSONRPCError( + jsonrpc="2.0", + id=self._request_id - 1, + error=ErrorData(code=message.response.status_code, message=message.args[0]), + ) ) - ) else: self._handle_incoming(RuntimeError(f"Received response with an unknown request ID: {message}")) elif isinstance(message, Exception): diff --git a/api/core/mcp/session/client_session.py b/api/core/mcp/session/client_session.py index fa1d309134..d684fe0dd7 100644 --- a/api/core/mcp/session/client_session.py +++ b/api/core/mcp/session/client_session.py @@ -109,12 +109,16 @@ class ClientSession( self._message_handler = message_handler or _default_message_handler def initialize(self) -> types.InitializeResult: - sampling = types.SamplingCapability() - roots = types.RootsCapability( - # TODO: Should this be based on whether we - # _will_ send notifications, or only whether - # they're supported? - listChanged=True, + # Only set capabilities if non-default callbacks are provided + # This prevents servers from attempting callbacks when we don't actually support them + sampling = types.SamplingCapability() if self._sampling_callback is not _default_sampling_callback else None + roots = ( + types.RootsCapability( + # Only enable listChanged if we have a custom callback + listChanged=True, + ) + if self._list_roots_callback is not _default_list_roots_callback + else None ) result = self.send_request( @@ -284,7 +288,7 @@ class ClientSession( def complete( self, - ref: types.ResourceReference | types.PromptReference, + ref: types.ResourceTemplateReference | types.PromptReference, argument: dict[str, str], ) -> types.CompleteResult: """Send a completion/complete request.""" diff --git a/api/core/mcp/types.py b/api/core/mcp/types.py index c7a046b585..335c6a5cbc 100644 --- a/api/core/mcp/types.py +++ b/api/core/mcp/types.py @@ -1,13 +1,6 @@ from collections.abc import Callable from dataclasses import dataclass -from typing import ( - Annotated, - Any, - Generic, - Literal, - TypeAlias, - TypeVar, -) +from typing import Annotated, Any, Generic, Literal, TypeAlias, TypeVar from pydantic import BaseModel, ConfigDict, Field, FileUrl, RootModel from pydantic.networks import AnyUrl, UrlConstraints @@ -30,9 +23,10 @@ for reference. not separate types in the schema. """ # Client support both version, not support 2025-06-18 yet. -LATEST_PROTOCOL_VERSION = "2025-03-26" +LATEST_PROTOCOL_VERSION = "2025-06-18" # Server support 2024-11-05 to allow claude to use. SERVER_LATEST_PROTOCOL_VERSION = "2024-11-05" +DEFAULT_NEGOTIATED_VERSION = "2025-03-26" ProgressToken = str | int Cursor = str Role = Literal["user", "assistant"] @@ -55,14 +49,22 @@ class RequestParams(BaseModel): meta: Meta | None = Field(alias="_meta", default=None) +class PaginatedRequestParams(RequestParams): + cursor: Cursor | None = None + """ + An opaque token representing the current pagination position. + If provided, the server should return results starting after this cursor. + """ + + class NotificationParams(BaseModel): class Meta(BaseModel): model_config = ConfigDict(extra="allow") meta: Meta | None = Field(alias="_meta", default=None) """ - This parameter name is reserved by MCP to allow clients and servers to attach - additional metadata to their notifications. + See [MCP specification](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/47339c03c143bb4ec01a26e721a1b8fe66634ebe/docs/specification/draft/basic/index.mdx#general-fields) + for notes on _meta usage. """ @@ -79,12 +81,11 @@ class Request(BaseModel, Generic[RequestParamsT, MethodT]): model_config = ConfigDict(extra="allow") -class PaginatedRequest(Request[RequestParamsT, MethodT]): - cursor: Cursor | None = None - """ - An opaque token representing the current pagination position. - If provided, the server should return results starting after this cursor. - """ +class PaginatedRequest(Request[PaginatedRequestParams | None, MethodT], Generic[MethodT]): + """Base class for paginated requests, + matching the schema's PaginatedRequest interface.""" + + params: PaginatedRequestParams | None = None class Notification(BaseModel, Generic[NotificationParamsT, MethodT]): @@ -98,13 +99,12 @@ class Notification(BaseModel, Generic[NotificationParamsT, MethodT]): class Result(BaseModel): """Base class for JSON-RPC results.""" - model_config = ConfigDict(extra="allow") - meta: dict[str, Any] | None = Field(alias="_meta", default=None) """ - This result property is reserved by the protocol to allow clients and servers to - attach additional metadata to their responses. + See [MCP specification](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/47339c03c143bb4ec01a26e721a1b8fe66634ebe/docs/specification/draft/basic/index.mdx#general-fields) + for notes on _meta usage. """ + model_config = ConfigDict(extra="allow") class PaginatedResult(Result): @@ -186,10 +186,26 @@ class EmptyResult(Result): """A response that indicates success but carries no data.""" -class Implementation(BaseModel): - """Describes the name and version of an MCP implementation.""" +class BaseMetadata(BaseModel): + """Base class for entities with name and optional title fields.""" name: str + """The programmatic name of the entity.""" + + title: str | None = None + """ + Intended for UI and end-user contexts — optimized to be human-readable and easily understood, + even by those unfamiliar with domain-specific terminology. + + If not provided, the name should be used for display (except for Tool, + where `annotations.title` should be given precedence over using `name`, + if present). + """ + + +class Implementation(BaseMetadata): + """Describes the name and version of an MCP implementation.""" + version: str model_config = ConfigDict(extra="allow") @@ -203,7 +219,7 @@ class RootsCapability(BaseModel): class SamplingCapability(BaseModel): - """Capability for logging operations.""" + """Capability for sampling operations.""" model_config = ConfigDict(extra="allow") @@ -252,6 +268,12 @@ class LoggingCapability(BaseModel): model_config = ConfigDict(extra="allow") +class CompletionsCapability(BaseModel): + """Capability for completions operations.""" + + model_config = ConfigDict(extra="allow") + + class ServerCapabilities(BaseModel): """Capabilities that a server may support.""" @@ -265,6 +287,8 @@ class ServerCapabilities(BaseModel): """Present if the server offers any resources to read.""" tools: ToolsCapability | None = None """Present if the server offers any tools to call.""" + completions: CompletionsCapability | None = None + """Present if the server offers autocompletion suggestions for prompts and resources.""" model_config = ConfigDict(extra="allow") @@ -284,7 +308,7 @@ class InitializeRequest(Request[InitializeRequestParams, Literal["initialize"]]) to begin initialization. """ - method: Literal["initialize"] + method: Literal["initialize"] = "initialize" params: InitializeRequestParams @@ -305,7 +329,7 @@ class InitializedNotification(Notification[NotificationParams | None, Literal["n finished. """ - method: Literal["notifications/initialized"] + method: Literal["notifications/initialized"] = "notifications/initialized" params: NotificationParams | None = None @@ -315,7 +339,7 @@ class PingRequest(Request[RequestParams | None, Literal["ping"]]): still alive. """ - method: Literal["ping"] + method: Literal["ping"] = "ping" params: RequestParams | None = None @@ -334,6 +358,11 @@ class ProgressNotificationParams(NotificationParams): """ total: float | None = None """Total number of items to process (or total progress required), if known.""" + message: str | None = None + """ + Message related to progress. This should provide relevant human readable + progress information. + """ model_config = ConfigDict(extra="allow") @@ -343,15 +372,14 @@ class ProgressNotification(Notification[ProgressNotificationParams, Literal["not long-running request. """ - method: Literal["notifications/progress"] + method: Literal["notifications/progress"] = "notifications/progress" params: ProgressNotificationParams -class ListResourcesRequest(PaginatedRequest[RequestParams | None, Literal["resources/list"]]): +class ListResourcesRequest(PaginatedRequest[Literal["resources/list"]]): """Sent from the client to request a list of resources the server has.""" - method: Literal["resources/list"] - params: RequestParams | None = None + method: Literal["resources/list"] = "resources/list" class Annotations(BaseModel): @@ -360,13 +388,11 @@ class Annotations(BaseModel): model_config = ConfigDict(extra="allow") -class Resource(BaseModel): +class Resource(BaseMetadata): """A known resource that the server is capable of reading.""" uri: Annotated[AnyUrl, UrlConstraints(host_required=False)] """The URI of this resource.""" - name: str - """A human-readable name for this resource.""" description: str | None = None """A description of what this resource represents.""" mimeType: str | None = None @@ -379,10 +405,15 @@ class Resource(BaseModel): This can be used by Hosts to display file sizes and estimate context window usage. """ annotations: Annotations | None = None + meta: dict[str, Any] | None = Field(alias="_meta", default=None) + """ + See [MCP specification](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/47339c03c143bb4ec01a26e721a1b8fe66634ebe/docs/specification/draft/basic/index.mdx#general-fields) + for notes on _meta usage. + """ model_config = ConfigDict(extra="allow") -class ResourceTemplate(BaseModel): +class ResourceTemplate(BaseMetadata): """A template description for resources available on the server.""" uriTemplate: str @@ -390,8 +421,6 @@ class ResourceTemplate(BaseModel): A URI template (according to RFC 6570) that can be used to construct resource URIs. """ - name: str - """A human-readable name for the type of resource this template refers to.""" description: str | None = None """A human-readable description of what this template is for.""" mimeType: str | None = None @@ -400,6 +429,11 @@ class ResourceTemplate(BaseModel): included if all resources matching this template have the same type. """ annotations: Annotations | None = None + meta: dict[str, Any] | None = Field(alias="_meta", default=None) + """ + See [MCP specification](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/47339c03c143bb4ec01a26e721a1b8fe66634ebe/docs/specification/draft/basic/index.mdx#general-fields) + for notes on _meta usage. + """ model_config = ConfigDict(extra="allow") @@ -409,11 +443,10 @@ class ListResourcesResult(PaginatedResult): resources: list[Resource] -class ListResourceTemplatesRequest(PaginatedRequest[RequestParams | None, Literal["resources/templates/list"]]): +class ListResourceTemplatesRequest(PaginatedRequest[Literal["resources/templates/list"]]): """Sent from the client to request a list of resource templates the server has.""" - method: Literal["resources/templates/list"] - params: RequestParams | None = None + method: Literal["resources/templates/list"] = "resources/templates/list" class ListResourceTemplatesResult(PaginatedResult): @@ -436,7 +469,7 @@ class ReadResourceRequestParams(RequestParams): class ReadResourceRequest(Request[ReadResourceRequestParams, Literal["resources/read"]]): """Sent from the client to the server, to read a specific resource URI.""" - method: Literal["resources/read"] + method: Literal["resources/read"] = "resources/read" params: ReadResourceRequestParams @@ -447,6 +480,11 @@ class ResourceContents(BaseModel): """The URI of this resource.""" mimeType: str | None = None """The MIME type of this resource, if known.""" + meta: dict[str, Any] | None = Field(alias="_meta", default=None) + """ + See [MCP specification](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/47339c03c143bb4ec01a26e721a1b8fe66634ebe/docs/specification/draft/basic/index.mdx#general-fields) + for notes on _meta usage. + """ model_config = ConfigDict(extra="allow") @@ -481,7 +519,7 @@ class ResourceListChangedNotification( of resources it can read from has changed. """ - method: Literal["notifications/resources/list_changed"] + method: Literal["notifications/resources/list_changed"] = "notifications/resources/list_changed" params: NotificationParams | None = None @@ -502,7 +540,7 @@ class SubscribeRequest(Request[SubscribeRequestParams, Literal["resources/subscr whenever a particular resource changes. """ - method: Literal["resources/subscribe"] + method: Literal["resources/subscribe"] = "resources/subscribe" params: SubscribeRequestParams @@ -520,7 +558,7 @@ class UnsubscribeRequest(Request[UnsubscribeRequestParams, Literal["resources/un the server. """ - method: Literal["resources/unsubscribe"] + method: Literal["resources/unsubscribe"] = "resources/unsubscribe" params: UnsubscribeRequestParams @@ -543,15 +581,14 @@ class ResourceUpdatedNotification( changed and may need to be read again. """ - method: Literal["notifications/resources/updated"] + method: Literal["notifications/resources/updated"] = "notifications/resources/updated" params: ResourceUpdatedNotificationParams -class ListPromptsRequest(PaginatedRequest[RequestParams | None, Literal["prompts/list"]]): +class ListPromptsRequest(PaginatedRequest[Literal["prompts/list"]]): """Sent from the client to request a list of prompts and prompt templates.""" - method: Literal["prompts/list"] - params: RequestParams | None = None + method: Literal["prompts/list"] = "prompts/list" class PromptArgument(BaseModel): @@ -566,15 +603,18 @@ class PromptArgument(BaseModel): model_config = ConfigDict(extra="allow") -class Prompt(BaseModel): +class Prompt(BaseMetadata): """A prompt or prompt template that the server offers.""" - name: str - """The name of the prompt or prompt template.""" description: str | None = None """An optional description of what this prompt provides.""" arguments: list[PromptArgument] | None = None """A list of arguments to use for templating the prompt.""" + meta: dict[str, Any] | None = Field(alias="_meta", default=None) + """ + See [MCP specification](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/47339c03c143bb4ec01a26e721a1b8fe66634ebe/docs/specification/draft/basic/index.mdx#general-fields) + for notes on _meta usage. + """ model_config = ConfigDict(extra="allow") @@ -597,7 +637,7 @@ class GetPromptRequestParams(RequestParams): class GetPromptRequest(Request[GetPromptRequestParams, Literal["prompts/get"]]): """Used by the client to get a prompt provided by the server.""" - method: Literal["prompts/get"] + method: Literal["prompts/get"] = "prompts/get" params: GetPromptRequestParams @@ -608,6 +648,11 @@ class TextContent(BaseModel): text: str """The text content of the message.""" annotations: Annotations | None = None + meta: dict[str, Any] | None = Field(alias="_meta", default=None) + """ + See [MCP specification](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/47339c03c143bb4ec01a26e721a1b8fe66634ebe/docs/specification/draft/basic/index.mdx#general-fields) + for notes on _meta usage. + """ model_config = ConfigDict(extra="allow") @@ -623,6 +668,31 @@ class ImageContent(BaseModel): image types. """ annotations: Annotations | None = None + meta: dict[str, Any] | None = Field(alias="_meta", default=None) + """ + See [MCP specification](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/47339c03c143bb4ec01a26e721a1b8fe66634ebe/docs/specification/draft/basic/index.mdx#general-fields) + for notes on _meta usage. + """ + model_config = ConfigDict(extra="allow") + + +class AudioContent(BaseModel): + """Audio content for a message.""" + + type: Literal["audio"] + data: str + """The base64-encoded audio data.""" + mimeType: str + """ + The MIME type of the audio. Different providers may support different + audio types. + """ + annotations: Annotations | None = None + meta: dict[str, Any] | None = Field(alias="_meta", default=None) + """ + See [MCP specification](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/47339c03c143bb4ec01a26e721a1b8fe66634ebe/docs/specification/draft/basic/index.mdx#general-fields) + for notes on _meta usage. + """ model_config = ConfigDict(extra="allow") @@ -630,7 +700,7 @@ class SamplingMessage(BaseModel): """Describes a message issued to or received from an LLM API.""" role: Role - content: TextContent | ImageContent + content: TextContent | ImageContent | AudioContent model_config = ConfigDict(extra="allow") @@ -645,14 +715,36 @@ class EmbeddedResource(BaseModel): type: Literal["resource"] resource: TextResourceContents | BlobResourceContents annotations: Annotations | None = None + meta: dict[str, Any] | None = Field(alias="_meta", default=None) + """ + See [MCP specification](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/47339c03c143bb4ec01a26e721a1b8fe66634ebe/docs/specification/draft/basic/index.mdx#general-fields) + for notes on _meta usage. + """ model_config = ConfigDict(extra="allow") +class ResourceLink(Resource): + """ + A resource that the server is capable of reading, included in a prompt or tool call result. + + Note: resource links returned by tools are not guaranteed to appear in the results of `resources/list` requests. + """ + + type: Literal["resource_link"] + + +ContentBlock = TextContent | ImageContent | AudioContent | ResourceLink | EmbeddedResource +"""A content block that can be used in prompts and tool results.""" + +Content: TypeAlias = ContentBlock +# """DEPRECATED: Content is deprecated, you should use ContentBlock directly.""" + + class PromptMessage(BaseModel): """Describes a message returned as part of a prompt.""" role: Role - content: TextContent | ImageContent | EmbeddedResource + content: ContentBlock model_config = ConfigDict(extra="allow") @@ -672,15 +764,14 @@ class PromptListChangedNotification( of prompts it offers has changed. """ - method: Literal["notifications/prompts/list_changed"] + method: Literal["notifications/prompts/list_changed"] = "notifications/prompts/list_changed" params: NotificationParams | None = None -class ListToolsRequest(PaginatedRequest[RequestParams | None, Literal["tools/list"]]): +class ListToolsRequest(PaginatedRequest[Literal["tools/list"]]): """Sent from the client to request a list of tools the server has.""" - method: Literal["tools/list"] - params: RequestParams | None = None + method: Literal["tools/list"] = "tools/list" class ToolAnnotations(BaseModel): @@ -731,17 +822,25 @@ class ToolAnnotations(BaseModel): model_config = ConfigDict(extra="allow") -class Tool(BaseModel): +class Tool(BaseMetadata): """Definition for a tool the client can call.""" - name: str - """The name of the tool.""" description: str | None = None """A human-readable description of the tool.""" inputSchema: dict[str, Any] """A JSON Schema object defining the expected parameters for the tool.""" + outputSchema: dict[str, Any] | None = None + """ + An optional JSON Schema object defining the structure of the tool's output + returned in the structuredContent field of a CallToolResult. + """ annotations: ToolAnnotations | None = None """Optional additional tool information.""" + meta: dict[str, Any] | None = Field(alias="_meta", default=None) + """ + See [MCP specification](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/47339c03c143bb4ec01a26e721a1b8fe66634ebe/docs/specification/draft/basic/index.mdx#general-fields) + for notes on _meta usage. + """ model_config = ConfigDict(extra="allow") @@ -762,14 +861,16 @@ class CallToolRequestParams(RequestParams): class CallToolRequest(Request[CallToolRequestParams, Literal["tools/call"]]): """Used by the client to invoke a tool provided by the server.""" - method: Literal["tools/call"] + method: Literal["tools/call"] = "tools/call" params: CallToolRequestParams class CallToolResult(Result): """The server's response to a tool call.""" - content: list[TextContent | ImageContent | EmbeddedResource] + content: list[ContentBlock] + structuredContent: dict[str, Any] | None = None + """An optional JSON object that represents the structured result of the tool call.""" isError: bool = False @@ -779,7 +880,7 @@ class ToolListChangedNotification(Notification[NotificationParams | None, Litera of tools it offers has changed. """ - method: Literal["notifications/tools/list_changed"] + method: Literal["notifications/tools/list_changed"] = "notifications/tools/list_changed" params: NotificationParams | None = None @@ -797,7 +898,7 @@ class SetLevelRequestParams(RequestParams): class SetLevelRequest(Request[SetLevelRequestParams, Literal["logging/setLevel"]]): """A request from the client to the server, to enable or adjust logging.""" - method: Literal["logging/setLevel"] + method: Literal["logging/setLevel"] = "logging/setLevel" params: SetLevelRequestParams @@ -808,7 +909,7 @@ class LoggingMessageNotificationParams(NotificationParams): """The severity of this log message.""" logger: str | None = None """An optional name of the logger issuing this message.""" - data: Any = None + data: Any """ The data to be logged, such as a string message or an object. Any JSON serializable type is allowed here. @@ -819,7 +920,7 @@ class LoggingMessageNotificationParams(NotificationParams): class LoggingMessageNotification(Notification[LoggingMessageNotificationParams, Literal["notifications/message"]]): """Notification of a log message passed from server to client.""" - method: Literal["notifications/message"] + method: Literal["notifications/message"] = "notifications/message" params: LoggingMessageNotificationParams @@ -914,7 +1015,7 @@ class CreateMessageRequestParams(RequestParams): class CreateMessageRequest(Request[CreateMessageRequestParams, Literal["sampling/createMessage"]]): """A request from the server to sample an LLM via the client.""" - method: Literal["sampling/createMessage"] + method: Literal["sampling/createMessage"] = "sampling/createMessage" params: CreateMessageRequestParams @@ -925,14 +1026,14 @@ class CreateMessageResult(Result): """The client's response to a sampling/create_message request from the server.""" role: Role - content: TextContent | ImageContent + content: TextContent | ImageContent | AudioContent model: str """The name of the model that generated the message.""" stopReason: StopReason | None = None """The reason why sampling stopped, if known.""" -class ResourceReference(BaseModel): +class ResourceTemplateReference(BaseModel): """A reference to a resource or resource template definition.""" type: Literal["ref/resource"] @@ -960,18 +1061,28 @@ class CompletionArgument(BaseModel): model_config = ConfigDict(extra="allow") +class CompletionContext(BaseModel): + """Additional, optional context for completions.""" + + arguments: dict[str, str] | None = None + """Previously-resolved variables in a URI template or prompt.""" + model_config = ConfigDict(extra="allow") + + class CompleteRequestParams(RequestParams): """Parameters for completion requests.""" - ref: ResourceReference | PromptReference + ref: ResourceTemplateReference | PromptReference argument: CompletionArgument + context: CompletionContext | None = None + """Additional, optional context for completions""" model_config = ConfigDict(extra="allow") class CompleteRequest(Request[CompleteRequestParams, Literal["completion/complete"]]): """A request from the client to the server, to ask for completion options.""" - method: Literal["completion/complete"] + method: Literal["completion/complete"] = "completion/complete" params: CompleteRequestParams @@ -1010,7 +1121,7 @@ class ListRootsRequest(Request[RequestParams | None, Literal["roots/list"]]): structure or access specific locations that the client has permission to read from. """ - method: Literal["roots/list"] + method: Literal["roots/list"] = "roots/list" params: RequestParams | None = None @@ -1029,6 +1140,11 @@ class Root(BaseModel): identifier for the root, which may be useful for display purposes or for referencing the root in other parts of the application. """ + meta: dict[str, Any] | None = Field(alias="_meta", default=None) + """ + See [MCP specification](https://github.com/modelcontextprotocol/modelcontextprotocol/blob/47339c03c143bb4ec01a26e721a1b8fe66634ebe/docs/specification/draft/basic/index.mdx#general-fields) + for notes on _meta usage. + """ model_config = ConfigDict(extra="allow") @@ -1054,7 +1170,7 @@ class RootsListChangedNotification( using the ListRootsRequest. """ - method: Literal["notifications/roots/list_changed"] + method: Literal["notifications/roots/list_changed"] = "notifications/roots/list_changed" params: NotificationParams | None = None @@ -1074,7 +1190,7 @@ class CancelledNotification(Notification[CancelledNotificationParams, Literal["n previously-issued request. """ - method: Literal["notifications/cancelled"] + method: Literal["notifications/cancelled"] = "notifications/cancelled" params: CancelledNotificationParams @@ -1214,3 +1330,13 @@ class OAuthMetadata(BaseModel): response_types_supported: list[str] grant_types_supported: list[str] | None = None code_challenge_methods_supported: list[str] | None = None + scopes_supported: list[str] | None = None + + +class ProtectedResourceMetadata(BaseModel): + """OAuth 2.0 Protected Resource Metadata (RFC 9470).""" + + resource: str | None = None + authorization_servers: list[str] + scopes_supported: list[str] | None = None + bearer_methods_supported: list[str] | None = None diff --git a/api/core/memory/token_buffer_memory.py b/api/core/memory/token_buffer_memory.py index 35af742f2a..3ebbb60f85 100644 --- a/api/core/memory/token_buffer_memory.py +++ b/api/core/memory/token_buffer_memory.py @@ -1,6 +1,7 @@ from collections.abc import Sequence from sqlalchemy import select +from sqlalchemy.orm import sessionmaker from core.app.app_config.features.file_upload.manager import FileUploadConfigManager from core.file import file_manager @@ -18,7 +19,9 @@ from core.prompt.utils.extract_thread_messages import extract_thread_messages from extensions.ext_database import db from factories import file_factory from models.model import AppMode, Conversation, Message, MessageFile -from models.workflow import Workflow, WorkflowRun +from models.workflow import Workflow +from repositories.api_workflow_run_repository import APIWorkflowRunRepository +from repositories.factory import DifyAPIRepositoryFactory class TokenBufferMemory: @@ -29,6 +32,14 @@ class TokenBufferMemory: ): self.conversation = conversation self.model_instance = model_instance + self._workflow_run_repo: APIWorkflowRunRepository | None = None + + @property + def workflow_run_repo(self) -> APIWorkflowRunRepository: + if self._workflow_run_repo is None: + session_maker = sessionmaker(bind=db.engine, expire_on_commit=False) + self._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker) + return self._workflow_run_repo def _build_prompt_message_with_files( self, @@ -50,7 +61,16 @@ class TokenBufferMemory: if self.conversation.mode in {AppMode.AGENT_CHAT, AppMode.COMPLETION, AppMode.CHAT}: file_extra_config = FileUploadConfigManager.convert(self.conversation.model_config) elif self.conversation.mode in {AppMode.ADVANCED_CHAT, AppMode.WORKFLOW}: - workflow_run = db.session.scalar(select(WorkflowRun).where(WorkflowRun.id == message.workflow_run_id)) + app = self.conversation.app + if not app: + raise ValueError("App not found for conversation") + + if not message.workflow_run_id: + raise ValueError("Workflow run ID not found") + + workflow_run = self.workflow_run_repo.get_workflow_run_by_id( + tenant_id=app.tenant_id, app_id=app.id, run_id=message.workflow_run_id + ) if not workflow_run: raise ValueError(f"Workflow run not found: {message.workflow_run_id}") workflow = db.session.scalar(select(Workflow).where(Workflow.id == workflow_run.workflow_id)) diff --git a/api/core/model_runtime/entities/llm_entities.py b/api/core/model_runtime/entities/llm_entities.py index 17f6000d93..2c7c421eed 100644 --- a/api/core/model_runtime/entities/llm_entities.py +++ b/api/core/model_runtime/entities/llm_entities.py @@ -38,6 +38,8 @@ class LLMUsageMetadata(TypedDict, total=False): prompt_price: Union[float, str] completion_price: Union[float, str] latency: float + time_to_first_token: float + time_to_generate: float class LLMUsage(ModelUsage): @@ -57,6 +59,8 @@ class LLMUsage(ModelUsage): total_price: Decimal currency: str latency: float + time_to_first_token: float | None = None + time_to_generate: float | None = None @classmethod def empty_usage(cls): @@ -73,6 +77,8 @@ class LLMUsage(ModelUsage): total_price=Decimal("0.0"), currency="USD", latency=0.0, + time_to_first_token=None, + time_to_generate=None, ) @classmethod @@ -108,6 +114,8 @@ class LLMUsage(ModelUsage): prompt_price=Decimal(str(metadata.get("prompt_price", 0))), completion_price=Decimal(str(metadata.get("completion_price", 0))), latency=metadata.get("latency", 0.0), + time_to_first_token=metadata.get("time_to_first_token"), + time_to_generate=metadata.get("time_to_generate"), ) def plus(self, other: LLMUsage) -> LLMUsage: @@ -133,6 +141,8 @@ class LLMUsage(ModelUsage): total_price=self.total_price + other.total_price, currency=other.currency, latency=self.latency + other.latency, + time_to_first_token=other.time_to_first_token, + time_to_generate=other.time_to_generate, ) def __add__(self, other: LLMUsage) -> LLMUsage: diff --git a/api/core/moderation/openai_moderation/openai_moderation.py b/api/core/moderation/openai_moderation/openai_moderation.py index 74ef6f7ceb..5cab4841f5 100644 --- a/api/core/moderation/openai_moderation/openai_moderation.py +++ b/api/core/moderation/openai_moderation/openai_moderation.py @@ -52,7 +52,7 @@ class OpenAIModeration(Moderation): text = "\n".join(str(inputs.values())) model_manager = ModelManager() model_instance = model_manager.get_model_instance( - tenant_id=self.tenant_id, provider="openai", model_type=ModelType.MODERATION, model="text-moderation-stable" + tenant_id=self.tenant_id, provider="openai", model_type=ModelType.MODERATION, model="omni-moderation-latest" ) openai_moderation = model_instance.invoke_moderation(text=text) diff --git a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py index 03d2d75372..347992fa0d 100644 --- a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py +++ b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py @@ -1,21 +1,22 @@ -import hashlib import json import logging import os +import traceback from datetime import datetime, timedelta from typing import Any, Union, cast from urllib.parse import urlparse -from openinference.semconv.trace import OpenInferenceSpanKindValues, SpanAttributes -from opentelemetry import trace +from openinference.semconv.trace import OpenInferenceMimeTypeValues, OpenInferenceSpanKindValues, SpanAttributes from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GrpcOTLPSpanExporter from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HttpOTLPSpanExporter from opentelemetry.sdk import trace as trace_sdk from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace.export import SimpleSpanProcessor -from opentelemetry.sdk.trace.id_generator import RandomIdGenerator -from opentelemetry.trace import SpanContext, TraceFlags, TraceState -from sqlalchemy import select +from opentelemetry.semconv.trace import SpanAttributes as OTELSpanAttributes +from opentelemetry.trace import Span, Status, StatusCode, set_span_in_context, use_span +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator +from opentelemetry.util.types import AttributeValue +from sqlalchemy.orm import sessionmaker from core.ops.base_trace_instance import BaseTraceInstance from core.ops.entities.config_entity import ArizeConfig, PhoenixConfig @@ -30,9 +31,10 @@ from core.ops.entities.trace_entity import ( TraceTaskName, WorkflowTraceInfo, ) +from core.repositories import DifyCoreRepositoryFactory from extensions.ext_database import db from models.model import EndUser, MessageFile -from models.workflow import WorkflowNodeExecutionModel +from models.workflow import WorkflowNodeExecutionTriggeredFrom logger = logging.getLogger(__name__) @@ -99,22 +101,45 @@ def datetime_to_nanos(dt: datetime | None) -> int: return int(dt.timestamp() * 1_000_000_000) -def string_to_trace_id128(string: str | None) -> int: - """ - Convert any input string into a stable 128-bit integer trace ID. +def error_to_string(error: Exception | str | None) -> str: + """Convert an error to a string with traceback information.""" + error_message = "Empty Stack Trace" + if error: + if isinstance(error, Exception): + string_stacktrace = "".join(traceback.format_exception(error)) + error_message = f"{error.__class__.__name__}: {error}\n\n{string_stacktrace}" + else: + error_message = str(error) + return error_message - This uses SHA-256 hashing and takes the first 16 bytes (128 bits) of the digest. - It's suitable for generating consistent, unique identifiers from strings. - """ - if string is None: - string = "" - hash_object = hashlib.sha256(string.encode()) - # Take the first 16 bytes (128 bits) of the hash digest - digest = hash_object.digest()[:16] +def set_span_status(current_span: Span, error: Exception | str | None = None): + """Set the status of the current span based on the presence of an error.""" + if error: + error_string = error_to_string(error) + current_span.set_status(Status(StatusCode.ERROR, error_string)) - # Convert to a 128-bit integer - return int.from_bytes(digest, byteorder="big") + if isinstance(error, Exception): + current_span.record_exception(error) + else: + exception_type = error.__class__.__name__ + exception_message = str(error) + if not exception_message: + exception_message = repr(error) + attributes: dict[str, AttributeValue] = { + OTELSpanAttributes.EXCEPTION_TYPE: exception_type, + OTELSpanAttributes.EXCEPTION_MESSAGE: exception_message, + OTELSpanAttributes.EXCEPTION_ESCAPED: False, + OTELSpanAttributes.EXCEPTION_STACKTRACE: error_string, + } + current_span.add_event(name="exception", attributes=attributes) + else: + current_span.set_status(Status(StatusCode.OK)) + + +def safe_json_dumps(obj: Any) -> str: + """A convenience wrapper around `json.dumps` that ensures that any object can be safely encoded.""" + return json.dumps(obj, default=str, ensure_ascii=False) class ArizePhoenixDataTrace(BaseTraceInstance): @@ -131,9 +156,12 @@ class ArizePhoenixDataTrace(BaseTraceInstance): self.tracer, self.processor = setup_tracer(arize_phoenix_config) self.project = arize_phoenix_config.project self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001") + self.propagator = TraceContextTextMapPropagator() + self.dify_trace_ids: set[str] = set() def trace(self, trace_info: BaseTraceInfo): - logger.info("[Arize/Phoenix] Trace: %s", trace_info) + logger.info("[Arize/Phoenix] Trace Entity Info: %s", trace_info) + logger.info("[Arize/Phoenix] Trace Entity Type: %s", type(trace_info)) try: if isinstance(trace_info, WorkflowTraceInfo): self.workflow_trace(trace_info) @@ -151,7 +179,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance): self.generate_name_trace(trace_info) except Exception as e: - logger.error("[Arize/Phoenix] Error in the trace: %s", str(e), exc_info=True) + logger.error("[Arize/Phoenix] Trace Entity Error: %s", str(e), exc_info=True) raise def workflow_trace(self, trace_info: WorkflowTraceInfo): @@ -166,15 +194,9 @@ class ArizePhoenixDataTrace(BaseTraceInstance): } workflow_metadata.update(trace_info.metadata) - trace_id = string_to_trace_id128(trace_info.trace_id or trace_info.workflow_run_id) - span_id = RandomIdGenerator().generate_span_id() - context = SpanContext( - trace_id=trace_id, - span_id=span_id, - is_remote=False, - trace_flags=TraceFlags(TraceFlags.SAMPLED), - trace_state=TraceState(), - ) + dify_trace_id = trace_info.trace_id or trace_info.message_id or trace_info.workflow_run_id + self.ensure_root_span(dify_trace_id) + root_span_context = self.propagator.extract(carrier=self.carrier) workflow_span = self.tracer.start_span( name=TraceTaskName.WORKFLOW_TRACE.value, @@ -186,31 +208,58 @@ class ArizePhoenixDataTrace(BaseTraceInstance): SpanAttributes.SESSION_ID: trace_info.conversation_id or "", }, start_time=datetime_to_nanos(trace_info.start_time), - context=trace.set_span_in_context(trace.NonRecordingSpan(context)), + context=root_span_context, + ) + + # Through workflow_run_id, get all_nodes_execution using repository + session_factory = sessionmaker(bind=db.engine) + + # Find the app's creator account + app_id = trace_info.metadata.get("app_id") + if not app_id: + raise ValueError("No app_id found in trace_info metadata") + + service_account = self.get_service_account_with_tenant(app_id) + + workflow_node_execution_repository = DifyCoreRepositoryFactory.create_workflow_node_execution_repository( + session_factory=session_factory, + user=service_account, + app_id=app_id, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + # Get all executions for this workflow run + workflow_node_executions = workflow_node_execution_repository.get_by_workflow_run( + workflow_run_id=trace_info.workflow_run_id ) try: - # Process workflow nodes - for node_execution in self._get_workflow_nodes(trace_info.workflow_run_id): + for node_execution in workflow_node_executions: + tenant_id = trace_info.tenant_id # Use from trace_info instead + app_id = trace_info.metadata.get("app_id") # Use from trace_info instead + inputs_value = node_execution.inputs or {} + outputs_value = node_execution.outputs or {} + created_at = node_execution.created_at or datetime.now() elapsed_time = node_execution.elapsed_time finished_at = created_at + timedelta(seconds=elapsed_time) - process_data = json.loads(node_execution.process_data) if node_execution.process_data else {} + process_data = node_execution.process_data or {} + execution_metadata = node_execution.metadata or {} + node_metadata = {str(k): v for k, v in execution_metadata.items()} - node_metadata = { - "node_id": node_execution.id, - "node_type": node_execution.node_type, - "node_status": node_execution.status, - "tenant_id": node_execution.tenant_id, - "app_id": node_execution.app_id, - "app_name": node_execution.title, - "status": node_execution.status, - "level": "ERROR" if node_execution.status != "succeeded" else "DEFAULT", - } - - if node_execution.execution_metadata: - node_metadata.update(json.loads(node_execution.execution_metadata)) + node_metadata.update( + { + "node_id": node_execution.id, + "node_type": node_execution.node_type, + "node_status": node_execution.status, + "tenant_id": tenant_id, + "app_id": app_id, + "app_name": node_execution.title, + "status": node_execution.status, + "level": "ERROR" if node_execution.status == "failed" else "DEFAULT", + } + ) # Determine the correct span kind based on node type span_kind = OpenInferenceSpanKindValues.CHAIN @@ -223,8 +272,9 @@ class ArizePhoenixDataTrace(BaseTraceInstance): if model: node_metadata["ls_model_name"] = model - outputs = json.loads(node_execution.outputs).get("usage", {}) if "outputs" in node_execution else {} - usage_data = process_data.get("usage", {}) if "usage" in process_data else outputs.get("usage", {}) + usage_data = ( + process_data.get("usage", {}) if "usage" in process_data else outputs_value.get("usage", {}) + ) if usage_data: node_metadata["total_tokens"] = usage_data.get("total_tokens", 0) node_metadata["prompt_tokens"] = usage_data.get("prompt_tokens", 0) @@ -236,17 +286,20 @@ class ArizePhoenixDataTrace(BaseTraceInstance): else: span_kind = OpenInferenceSpanKindValues.CHAIN + workflow_span_context = set_span_in_context(workflow_span) node_span = self.tracer.start_span( name=node_execution.node_type, attributes={ - SpanAttributes.INPUT_VALUE: node_execution.inputs or "{}", - SpanAttributes.OUTPUT_VALUE: node_execution.outputs or "{}", + SpanAttributes.INPUT_VALUE: safe_json_dumps(inputs_value), + SpanAttributes.INPUT_MIME_TYPE: OpenInferenceMimeTypeValues.JSON.value, + SpanAttributes.OUTPUT_VALUE: safe_json_dumps(outputs_value), + SpanAttributes.OUTPUT_MIME_TYPE: OpenInferenceMimeTypeValues.JSON.value, SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind.value, - SpanAttributes.METADATA: json.dumps(node_metadata, ensure_ascii=False), + SpanAttributes.METADATA: safe_json_dumps(node_metadata), SpanAttributes.SESSION_ID: trace_info.conversation_id or "", }, start_time=datetime_to_nanos(created_at), - context=trace.set_span_in_context(trace.NonRecordingSpan(context)), + context=workflow_span_context, ) try: @@ -260,11 +313,8 @@ class ArizePhoenixDataTrace(BaseTraceInstance): llm_attributes[SpanAttributes.LLM_PROVIDER] = provider if model: llm_attributes[SpanAttributes.LLM_MODEL_NAME] = model - outputs = ( - json.loads(node_execution.outputs).get("usage", {}) if "outputs" in node_execution else {} - ) usage_data = ( - process_data.get("usage", {}) if "usage" in process_data else outputs.get("usage", {}) + process_data.get("usage", {}) if "usage" in process_data else outputs_value.get("usage", {}) ) if usage_data: llm_attributes[SpanAttributes.LLM_TOKEN_COUNT_TOTAL] = usage_data.get("total_tokens", 0) @@ -275,8 +325,16 @@ class ArizePhoenixDataTrace(BaseTraceInstance): llm_attributes.update(self._construct_llm_attributes(process_data.get("prompts", []))) node_span.set_attributes(llm_attributes) finally: + if node_execution.status == "failed": + set_span_status(node_span, node_execution.error) + else: + set_span_status(node_span) node_span.end(end_time=datetime_to_nanos(finished_at)) finally: + if trace_info.error: + set_span_status(workflow_span, trace_info.error) + else: + set_span_status(workflow_span) workflow_span.end(end_time=datetime_to_nanos(trace_info.end_time)) def message_trace(self, trace_info: MessageTraceInfo): @@ -322,34 +380,18 @@ class ArizePhoenixDataTrace(BaseTraceInstance): SpanAttributes.SESSION_ID: trace_info.message_data.conversation_id, } - trace_id = string_to_trace_id128(trace_info.trace_id or trace_info.message_id) - message_span_id = RandomIdGenerator().generate_span_id() - span_context = SpanContext( - trace_id=trace_id, - span_id=message_span_id, - is_remote=False, - trace_flags=TraceFlags(TraceFlags.SAMPLED), - trace_state=TraceState(), - ) + dify_trace_id = trace_info.trace_id or trace_info.message_id + self.ensure_root_span(dify_trace_id) + root_span_context = self.propagator.extract(carrier=self.carrier) message_span = self.tracer.start_span( name=TraceTaskName.MESSAGE_TRACE.value, attributes=attributes, start_time=datetime_to_nanos(trace_info.start_time), - context=trace.set_span_in_context(trace.NonRecordingSpan(span_context)), + context=root_span_context, ) try: - if trace_info.error: - message_span.add_event( - "exception", - attributes={ - "exception.message": trace_info.error, - "exception.type": "Error", - "exception.stacktrace": trace_info.error, - }, - ) - # Convert outputs to string based on type if isinstance(trace_info.outputs, dict | list): outputs_str = json.dumps(trace_info.outputs, ensure_ascii=False) @@ -383,26 +425,26 @@ class ArizePhoenixDataTrace(BaseTraceInstance): if model_params := metadata_dict.get("model_parameters"): llm_attributes[SpanAttributes.LLM_INVOCATION_PARAMETERS] = json.dumps(model_params) + message_span_context = set_span_in_context(message_span) llm_span = self.tracer.start_span( name="llm", attributes=llm_attributes, start_time=datetime_to_nanos(trace_info.start_time), - context=trace.set_span_in_context(trace.NonRecordingSpan(span_context)), + context=message_span_context, ) try: - if trace_info.error: - llm_span.add_event( - "exception", - attributes={ - "exception.message": trace_info.error, - "exception.type": "Error", - "exception.stacktrace": trace_info.error, - }, - ) + if trace_info.message_data.error: + set_span_status(llm_span, trace_info.message_data.error) + else: + set_span_status(llm_span) finally: llm_span.end(end_time=datetime_to_nanos(trace_info.end_time)) finally: + if trace_info.error: + set_span_status(message_span, trace_info.error) + else: + set_span_status(message_span) message_span.end(end_time=datetime_to_nanos(trace_info.end_time)) def moderation_trace(self, trace_info: ModerationTraceInfo): @@ -418,15 +460,9 @@ class ArizePhoenixDataTrace(BaseTraceInstance): } metadata.update(trace_info.metadata) - trace_id = string_to_trace_id128(trace_info.message_id) - span_id = RandomIdGenerator().generate_span_id() - context = SpanContext( - trace_id=trace_id, - span_id=span_id, - is_remote=False, - trace_flags=TraceFlags(TraceFlags.SAMPLED), - trace_state=TraceState(), - ) + dify_trace_id = trace_info.trace_id or trace_info.message_id + self.ensure_root_span(dify_trace_id) + root_span_context = self.propagator.extract(carrier=self.carrier) span = self.tracer.start_span( name=TraceTaskName.MODERATION_TRACE.value, @@ -445,19 +481,14 @@ class ArizePhoenixDataTrace(BaseTraceInstance): SpanAttributes.METADATA: json.dumps(metadata, ensure_ascii=False), }, start_time=datetime_to_nanos(trace_info.start_time), - context=trace.set_span_in_context(trace.NonRecordingSpan(context)), + context=root_span_context, ) try: if trace_info.message_data.error: - span.add_event( - "exception", - attributes={ - "exception.message": trace_info.message_data.error, - "exception.type": "Error", - "exception.stacktrace": trace_info.message_data.error, - }, - ) + set_span_status(span, trace_info.message_data.error) + else: + set_span_status(span) finally: span.end(end_time=datetime_to_nanos(trace_info.end_time)) @@ -480,15 +511,9 @@ class ArizePhoenixDataTrace(BaseTraceInstance): } metadata.update(trace_info.metadata) - trace_id = string_to_trace_id128(trace_info.message_id) - span_id = RandomIdGenerator().generate_span_id() - context = SpanContext( - trace_id=trace_id, - span_id=span_id, - is_remote=False, - trace_flags=TraceFlags(TraceFlags.SAMPLED), - trace_state=TraceState(), - ) + dify_trace_id = trace_info.trace_id or trace_info.message_id + self.ensure_root_span(dify_trace_id) + root_span_context = self.propagator.extract(carrier=self.carrier) span = self.tracer.start_span( name=TraceTaskName.SUGGESTED_QUESTION_TRACE.value, @@ -499,19 +524,14 @@ class ArizePhoenixDataTrace(BaseTraceInstance): SpanAttributes.METADATA: json.dumps(metadata, ensure_ascii=False), }, start_time=datetime_to_nanos(start_time), - context=trace.set_span_in_context(trace.NonRecordingSpan(context)), + context=root_span_context, ) try: if trace_info.error: - span.add_event( - "exception", - attributes={ - "exception.message": trace_info.error, - "exception.type": "Error", - "exception.stacktrace": trace_info.error, - }, - ) + set_span_status(span, trace_info.error) + else: + set_span_status(span) finally: span.end(end_time=datetime_to_nanos(end_time)) @@ -533,15 +553,9 @@ class ArizePhoenixDataTrace(BaseTraceInstance): } metadata.update(trace_info.metadata) - trace_id = string_to_trace_id128(trace_info.message_id) - span_id = RandomIdGenerator().generate_span_id() - context = SpanContext( - trace_id=trace_id, - span_id=span_id, - is_remote=False, - trace_flags=TraceFlags(TraceFlags.SAMPLED), - trace_state=TraceState(), - ) + dify_trace_id = trace_info.trace_id or trace_info.message_id + self.ensure_root_span(dify_trace_id) + root_span_context = self.propagator.extract(carrier=self.carrier) span = self.tracer.start_span( name=TraceTaskName.DATASET_RETRIEVAL_TRACE.value, @@ -554,19 +568,14 @@ class ArizePhoenixDataTrace(BaseTraceInstance): "end_time": end_time.isoformat() if end_time else "", }, start_time=datetime_to_nanos(start_time), - context=trace.set_span_in_context(trace.NonRecordingSpan(context)), + context=root_span_context, ) try: if trace_info.message_data.error: - span.add_event( - "exception", - attributes={ - "exception.message": trace_info.message_data.error, - "exception.type": "Error", - "exception.stacktrace": trace_info.message_data.error, - }, - ) + set_span_status(span, trace_info.message_data.error) + else: + set_span_status(span) finally: span.end(end_time=datetime_to_nanos(end_time)) @@ -580,20 +589,9 @@ class ArizePhoenixDataTrace(BaseTraceInstance): "tool_config": json.dumps(trace_info.tool_config, ensure_ascii=False), } - trace_id = string_to_trace_id128(trace_info.message_id) - tool_span_id = RandomIdGenerator().generate_span_id() - logger.info("[Arize/Phoenix] Creating tool trace with trace_id: %s, span_id: %s", trace_id, tool_span_id) - - # Create span context with the same trace_id as the parent - # todo: Create with the appropriate parent span context, so that the tool span is - # a child of the appropriate span (e.g. message span) - span_context = SpanContext( - trace_id=trace_id, - span_id=tool_span_id, - is_remote=False, - trace_flags=TraceFlags(TraceFlags.SAMPLED), - trace_state=TraceState(), - ) + dify_trace_id = trace_info.trace_id or trace_info.message_id + self.ensure_root_span(dify_trace_id) + root_span_context = self.propagator.extract(carrier=self.carrier) tool_params_str = ( json.dumps(trace_info.tool_parameters, ensure_ascii=False) @@ -612,19 +610,14 @@ class ArizePhoenixDataTrace(BaseTraceInstance): SpanAttributes.TOOL_PARAMETERS: tool_params_str, }, start_time=datetime_to_nanos(trace_info.start_time), - context=trace.set_span_in_context(trace.NonRecordingSpan(span_context)), + context=root_span_context, ) try: if trace_info.error: - span.add_event( - "exception", - attributes={ - "exception.message": trace_info.error, - "exception.type": "Error", - "exception.stacktrace": trace_info.error, - }, - ) + set_span_status(span, trace_info.error) + else: + set_span_status(span) finally: span.end(end_time=datetime_to_nanos(trace_info.end_time)) @@ -641,15 +634,9 @@ class ArizePhoenixDataTrace(BaseTraceInstance): } metadata.update(trace_info.metadata) - trace_id = string_to_trace_id128(trace_info.message_id) - span_id = RandomIdGenerator().generate_span_id() - context = SpanContext( - trace_id=trace_id, - span_id=span_id, - is_remote=False, - trace_flags=TraceFlags(TraceFlags.SAMPLED), - trace_state=TraceState(), - ) + dify_trace_id = trace_info.trace_id or trace_info.message_id or trace_info.conversation_id + self.ensure_root_span(dify_trace_id) + root_span_context = self.propagator.extract(carrier=self.carrier) span = self.tracer.start_span( name=TraceTaskName.GENERATE_NAME_TRACE.value, @@ -663,22 +650,34 @@ class ArizePhoenixDataTrace(BaseTraceInstance): "end_time": trace_info.end_time.isoformat() if trace_info.end_time else "", }, start_time=datetime_to_nanos(trace_info.start_time), - context=trace.set_span_in_context(trace.NonRecordingSpan(context)), + context=root_span_context, ) try: if trace_info.message_data.error: - span.add_event( - "exception", - attributes={ - "exception.message": trace_info.message_data.error, - "exception.type": "Error", - "exception.stacktrace": trace_info.message_data.error, - }, - ) + set_span_status(span, trace_info.message_data.error) + else: + set_span_status(span) finally: span.end(end_time=datetime_to_nanos(trace_info.end_time)) + def ensure_root_span(self, dify_trace_id: str | None): + """Ensure a unique root span exists for the given Dify trace ID.""" + if str(dify_trace_id) not in self.dify_trace_ids: + self.carrier: dict[str, str] = {} + + root_span = self.tracer.start_span(name="Dify") + root_span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND, OpenInferenceSpanKindValues.CHAIN.value) + root_span.set_attribute("dify_project_name", str(self.project)) + root_span.set_attribute("dify_trace_id", str(dify_trace_id)) + + with use_span(root_span, end_on_exit=False): + self.propagator.inject(carrier=self.carrier) + + set_span_status(root_span) + root_span.end() + self.dify_trace_ids.add(str(dify_trace_id)) + def api_check(self): try: with self.tracer.start_span("api_check") as span: @@ -698,26 +697,6 @@ class ArizePhoenixDataTrace(BaseTraceInstance): logger.info("[Arize/Phoenix] Get run url failed: %s", str(e), exc_info=True) raise ValueError(f"[Arize/Phoenix] Get run url failed: {str(e)}") - def _get_workflow_nodes(self, workflow_run_id: str): - """Helper method to get workflow nodes""" - workflow_nodes = db.session.scalars( - select( - WorkflowNodeExecutionModel.id, - WorkflowNodeExecutionModel.tenant_id, - WorkflowNodeExecutionModel.app_id, - WorkflowNodeExecutionModel.title, - WorkflowNodeExecutionModel.node_type, - WorkflowNodeExecutionModel.status, - WorkflowNodeExecutionModel.inputs, - WorkflowNodeExecutionModel.outputs, - WorkflowNodeExecutionModel.created_at, - WorkflowNodeExecutionModel.elapsed_time, - WorkflowNodeExecutionModel.process_data, - WorkflowNodeExecutionModel.execution_metadata, - ).where(WorkflowNodeExecutionModel.workflow_run_id == workflow_run_id) - ).all() - return workflow_nodes - def _construct_llm_attributes(self, prompts: dict | list | str | None) -> dict[str, str]: """Helper method to construct LLM attributes with passed prompts.""" attributes = {} diff --git a/api/core/ops/entities/trace_entity.py b/api/core/ops/entities/trace_entity.py index 5b81c09a2d..50a2cdea63 100644 --- a/api/core/ops/entities/trace_entity.py +++ b/api/core/ops/entities/trace_entity.py @@ -62,6 +62,9 @@ class MessageTraceInfo(BaseTraceInfo): file_list: Union[str, dict[str, Any], list] | None = None message_file_data: Any | None = None conversation_mode: str + gen_ai_server_time_to_first_token: float | None = None + llm_streaming_time_to_generate: float | None = None + is_streaming_request: bool = False class ModerationTraceInfo(BaseTraceInfo): diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 7db9b076d2..e8ba2d7aab 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -12,9 +12,9 @@ from uuid import UUID, uuid4 from cachetools import LRUCache from flask import current_app from sqlalchemy import select -from sqlalchemy.orm import Session +from sqlalchemy.orm import Session, sessionmaker -from core.helper.encrypter import decrypt_token, encrypt_token, obfuscated_token +from core.helper.encrypter import batch_decrypt_token, encrypt_token, obfuscated_token from core.ops.entities.config_entity import ( OPS_FILE_PATH, TracingProviderEnum, @@ -34,7 +34,8 @@ from core.ops.utils import get_message_data from extensions.ext_database import db from extensions.ext_storage import storage from models.model import App, AppModelConfig, Conversation, Message, MessageFile, TraceAppConfig -from models.workflow import WorkflowAppLog, WorkflowRun +from models.workflow import WorkflowAppLog +from repositories.factory import DifyAPIRepositoryFactory from tasks.ops_trace_task import process_trace_tasks if TYPE_CHECKING: @@ -140,6 +141,8 @@ provider_config_map = OpsTraceProviderConfigMap() class OpsTraceManager: ops_trace_instances_cache: LRUCache = LRUCache(maxsize=128) + decrypted_configs_cache: LRUCache = LRUCache(maxsize=128) + _decryption_cache_lock = threading.RLock() @classmethod def encrypt_tracing_config( @@ -160,7 +163,7 @@ class OpsTraceManager: provider_config_map[tracing_provider]["other_keys"], ) - new_config = {} + new_config: dict[str, Any] = {} # Encrypt necessary keys for key in secret_keys: if key in tracing_config: @@ -190,20 +193,41 @@ class OpsTraceManager: :param tracing_config: tracing config :return: """ - config_class, secret_keys, other_keys = ( - provider_config_map[tracing_provider]["config_class"], - provider_config_map[tracing_provider]["secret_keys"], - provider_config_map[tracing_provider]["other_keys"], + config_json = json.dumps(tracing_config, sort_keys=True) + decrypted_config_key = ( + tenant_id, + tracing_provider, + config_json, ) - new_config = {} - for key in secret_keys: - if key in tracing_config: - new_config[key] = decrypt_token(tenant_id, tracing_config[key]) - for key in other_keys: - new_config[key] = tracing_config.get(key, "") + # First check without lock for performance + cached_config = cls.decrypted_configs_cache.get(decrypted_config_key) + if cached_config is not None: + return dict(cached_config) - return config_class(**new_config).model_dump() + with cls._decryption_cache_lock: + # Second check (double-checked locking) to prevent race conditions + cached_config = cls.decrypted_configs_cache.get(decrypted_config_key) + if cached_config is not None: + return dict(cached_config) + + config_class, secret_keys, other_keys = ( + provider_config_map[tracing_provider]["config_class"], + provider_config_map[tracing_provider]["secret_keys"], + provider_config_map[tracing_provider]["other_keys"], + ) + new_config: dict[str, Any] = {} + keys_to_decrypt = [key for key in secret_keys if key in tracing_config] + if keys_to_decrypt: + decrypted_values = batch_decrypt_token(tenant_id, [tracing_config[key] for key in keys_to_decrypt]) + new_config.update(zip(keys_to_decrypt, decrypted_values)) + + for key in other_keys: + new_config[key] = tracing_config.get(key, "") + + decrypted_config = config_class(**new_config).model_dump() + cls.decrypted_configs_cache[decrypted_config_key] = decrypted_config + return dict(decrypted_config) @classmethod def obfuscated_decrypt_token(cls, tracing_provider: str, decrypt_tracing_config: dict): @@ -218,7 +242,7 @@ class OpsTraceManager: provider_config_map[tracing_provider]["secret_keys"], provider_config_map[tracing_provider]["other_keys"], ) - new_config = {} + new_config: dict[str, Any] = {} for key in secret_keys: if key in decrypt_tracing_config: new_config[key] = obfuscated_token(decrypt_tracing_config[key]) @@ -250,6 +274,8 @@ class OpsTraceManager: raise ValueError("App not found") tenant_id = app.tenant_id + if trace_config_data.tracing_config is None: + raise ValueError("Tracing config cannot be None.") decrypt_tracing_config = cls.decrypt_tracing_config( tenant_id, tracing_provider, trace_config_data.tracing_config ) @@ -419,6 +445,18 @@ class OpsTraceManager: class TraceTask: + _workflow_run_repo = None + _repo_lock = threading.Lock() + + @classmethod + def _get_workflow_run_repo(cls): + if cls._workflow_run_repo is None: + with cls._repo_lock: + if cls._workflow_run_repo is None: + session_maker = sessionmaker(bind=db.engine, expire_on_commit=False) + cls._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker) + return cls._workflow_run_repo + def __init__( self, trace_type: Any, @@ -486,27 +524,27 @@ class TraceTask: if not workflow_run_id: return {} + workflow_run_repo = self._get_workflow_run_repo() + workflow_run = workflow_run_repo.get_workflow_run_by_id_without_tenant(run_id=workflow_run_id) + if not workflow_run: + raise ValueError("Workflow run not found") + + workflow_id = workflow_run.workflow_id + tenant_id = workflow_run.tenant_id + workflow_run_id = workflow_run.id + workflow_run_elapsed_time = workflow_run.elapsed_time + workflow_run_status = workflow_run.status + workflow_run_inputs = workflow_run.inputs_dict + workflow_run_outputs = workflow_run.outputs_dict + workflow_run_version = workflow_run.version + error = workflow_run.error or "" + + total_tokens = workflow_run.total_tokens + + file_list = workflow_run_inputs.get("sys.file") or [] + query = workflow_run_inputs.get("query") or workflow_run_inputs.get("sys.query") or "" + with Session(db.engine) as session: - workflow_run_stmt = select(WorkflowRun).where(WorkflowRun.id == workflow_run_id) - workflow_run = session.scalars(workflow_run_stmt).first() - if not workflow_run: - raise ValueError("Workflow run not found") - - workflow_id = workflow_run.workflow_id - tenant_id = workflow_run.tenant_id - workflow_run_id = workflow_run.id - workflow_run_elapsed_time = workflow_run.elapsed_time - workflow_run_status = workflow_run.status - workflow_run_inputs = workflow_run.inputs_dict - workflow_run_outputs = workflow_run.outputs_dict - workflow_run_version = workflow_run.version - error = workflow_run.error or "" - - total_tokens = workflow_run.total_tokens - - file_list = workflow_run_inputs.get("sys.file") or [] - query = workflow_run_inputs.get("query") or workflow_run_inputs.get("sys.query") or "" - # get workflow_app_log_id workflow_app_log_data_stmt = select(WorkflowAppLog.id).where( WorkflowAppLog.tenant_id == tenant_id, @@ -523,43 +561,43 @@ class TraceTask: ) message_id = session.scalar(message_data_stmt) - metadata = { - "workflow_id": workflow_id, - "conversation_id": conversation_id, - "workflow_run_id": workflow_run_id, - "tenant_id": tenant_id, - "elapsed_time": workflow_run_elapsed_time, - "status": workflow_run_status, - "version": workflow_run_version, - "total_tokens": total_tokens, - "file_list": file_list, - "triggered_from": workflow_run.triggered_from, - "user_id": user_id, - "app_id": workflow_run.app_id, - } + metadata = { + "workflow_id": workflow_id, + "conversation_id": conversation_id, + "workflow_run_id": workflow_run_id, + "tenant_id": tenant_id, + "elapsed_time": workflow_run_elapsed_time, + "status": workflow_run_status, + "version": workflow_run_version, + "total_tokens": total_tokens, + "file_list": file_list, + "triggered_from": workflow_run.triggered_from, + "user_id": user_id, + "app_id": workflow_run.app_id, + } - workflow_trace_info = WorkflowTraceInfo( - trace_id=self.trace_id, - workflow_data=workflow_run.to_dict(), - conversation_id=conversation_id, - workflow_id=workflow_id, - tenant_id=tenant_id, - workflow_run_id=workflow_run_id, - workflow_run_elapsed_time=workflow_run_elapsed_time, - workflow_run_status=workflow_run_status, - workflow_run_inputs=workflow_run_inputs, - workflow_run_outputs=workflow_run_outputs, - workflow_run_version=workflow_run_version, - error=error, - total_tokens=total_tokens, - file_list=file_list, - query=query, - metadata=metadata, - workflow_app_log_id=workflow_app_log_id, - message_id=message_id, - start_time=workflow_run.created_at, - end_time=workflow_run.finished_at, - ) + workflow_trace_info = WorkflowTraceInfo( + trace_id=self.trace_id, + workflow_data=workflow_run.to_dict(), + conversation_id=conversation_id, + workflow_id=workflow_id, + tenant_id=tenant_id, + workflow_run_id=workflow_run_id, + workflow_run_elapsed_time=workflow_run_elapsed_time, + workflow_run_status=workflow_run_status, + workflow_run_inputs=workflow_run_inputs, + workflow_run_outputs=workflow_run_outputs, + workflow_run_version=workflow_run_version, + error=error, + total_tokens=total_tokens, + file_list=file_list, + query=query, + metadata=metadata, + workflow_app_log_id=workflow_app_log_id, + message_id=message_id, + start_time=workflow_run.created_at, + end_time=workflow_run.finished_at, + ) return workflow_trace_info def message_trace(self, message_id: str | None): @@ -583,6 +621,8 @@ class TraceTask: file_url = f"{self.file_base_url}/{message_file_data.url}" if message_file_data else "" file_list.append(file_url) + streaming_metrics = self._extract_streaming_metrics(message_data) + metadata = { "conversation_id": message_data.conversation_id, "ls_provider": message_data.model_provider, @@ -615,6 +655,9 @@ class TraceTask: metadata=metadata, message_file_data=message_file_data, conversation_mode=conversation_mode, + gen_ai_server_time_to_first_token=streaming_metrics.get("gen_ai_server_time_to_first_token"), + llm_streaming_time_to_generate=streaming_metrics.get("llm_streaming_time_to_generate"), + is_streaming_request=streaming_metrics.get("is_streaming_request", False), ) return message_trace_info @@ -840,6 +883,24 @@ class TraceTask: return generate_name_trace_info + def _extract_streaming_metrics(self, message_data) -> dict: + if not message_data.message_metadata: + return {} + + try: + metadata = json.loads(message_data.message_metadata) + usage = metadata.get("usage", {}) + time_to_first_token = usage.get("time_to_first_token") + time_to_generate = usage.get("time_to_generate") + + return { + "gen_ai_server_time_to_first_token": time_to_first_token, + "llm_streaming_time_to_generate": time_to_generate, + "is_streaming_request": time_to_first_token is not None, + } + except (json.JSONDecodeError, AttributeError): + return {} + trace_manager_timer: threading.Timer | None = None trace_manager_queue: queue.Queue = queue.Queue() diff --git a/api/core/ops/tencent_trace/client.py b/api/core/ops/tencent_trace/client.py index 270732aa02..bf1ab5e7e6 100644 --- a/api/core/ops/tencent_trace/client.py +++ b/api/core/ops/tencent_trace/client.py @@ -5,12 +5,18 @@ Tencent APM Trace Client - handles network operations, metrics, and API communic from __future__ import annotations import importlib +import json import logging import os import socket from typing import TYPE_CHECKING from urllib.parse import urlparse +try: + from importlib.metadata import version +except ImportError: + from importlib_metadata import version # type: ignore[import-not-found] + if TYPE_CHECKING: from opentelemetry.metrics import Meter from opentelemetry.metrics._internal.instrument import Histogram @@ -27,12 +33,27 @@ from opentelemetry.util.types import AttributeValue from configs import dify_config -from .entities.tencent_semconv import LLM_OPERATION_DURATION +from .entities.semconv import ( + GEN_AI_SERVER_TIME_TO_FIRST_TOKEN, + GEN_AI_STREAMING_TIME_TO_GENERATE, + GEN_AI_TOKEN_USAGE, + GEN_AI_TRACE_DURATION, + LLM_OPERATION_DURATION, +) from .entities.tencent_trace_entity import SpanData logger = logging.getLogger(__name__) +def _get_opentelemetry_sdk_version() -> str: + """Get OpenTelemetry SDK version dynamically.""" + try: + return version("opentelemetry-sdk") + except Exception: + logger.debug("Failed to get opentelemetry-sdk version, using default") + return "1.27.0" # fallback version + + class TencentTraceClient: """Tencent APM trace client using OpenTelemetry OTLP exporter""" @@ -57,6 +78,9 @@ class TencentTraceClient: ResourceAttributes.SERVICE_VERSION: f"dify-{dify_config.project.version}-{dify_config.COMMIT_SHA}", ResourceAttributes.DEPLOYMENT_ENVIRONMENT: f"{dify_config.DEPLOY_ENV}-{dify_config.EDITION}", ResourceAttributes.HOST_NAME: socket.gethostname(), + ResourceAttributes.TELEMETRY_SDK_LANGUAGE: "python", + ResourceAttributes.TELEMETRY_SDK_NAME: "opentelemetry", + ResourceAttributes.TELEMETRY_SDK_VERSION: _get_opentelemetry_sdk_version(), } ) # Prepare gRPC endpoint/metadata @@ -80,18 +104,23 @@ class TencentTraceClient: ) self.tracer_provider.add_span_processor(self.span_processor) - self.tracer = self.tracer_provider.get_tracer("dify.tencent_apm") + # use dify api version as tracer version + self.tracer = self.tracer_provider.get_tracer("dify-sdk", dify_config.project.version) # Store span contexts for parent-child relationships self.span_contexts: dict[int, trace_api.SpanContext] = {} self.meter: Meter | None = None + self.meter_provider: MeterProvider | None = None self.hist_llm_duration: Histogram | None = None + self.hist_token_usage: Histogram | None = None + self.hist_time_to_first_token: Histogram | None = None + self.hist_time_to_generate: Histogram | None = None + self.hist_trace_duration: Histogram | None = None self.metric_reader: MetricReader | None = None # Metrics exporter and instruments try: - from opentelemetry import metrics from opentelemetry.sdk.metrics import Histogram, MeterProvider from opentelemetry.sdk.metrics.export import AggregationTemporality, PeriodicExportingMetricReader @@ -99,7 +128,7 @@ class TencentTraceClient: use_http_protobuf = protocol in {"http/protobuf", "http-protobuf"} use_http_json = protocol in {"http/json", "http-json"} - # Set preferred temporality for histograms to DELTA + # Tencent APM works best with delta aggregation temporality preferred_temporality: dict[type, AggregationTemporality] = {Histogram: AggregationTemporality.DELTA} def _create_metric_exporter(exporter_cls, **kwargs): @@ -174,23 +203,66 @@ class TencentTraceClient: ) if metric_reader is not None: + # Use instance-level MeterProvider instead of global to support config changes + # without worker restart. Each TencentTraceClient manages its own MeterProvider. provider = MeterProvider(resource=self.resource, metric_readers=[metric_reader]) - metrics.set_meter_provider(provider) - self.meter = metrics.get_meter("dify-sdk", dify_config.project.version) + self.meter_provider = provider + self.meter = provider.get_meter("dify-sdk", dify_config.project.version) + + # LLM operation duration histogram self.hist_llm_duration = self.meter.create_histogram( name=LLM_OPERATION_DURATION, unit="s", description="LLM operation duration (seconds)", ) + + # Token usage histogram with exponential buckets + self.hist_token_usage = self.meter.create_histogram( + name=GEN_AI_TOKEN_USAGE, + unit="token", + description="Number of tokens used in prompt and completions", + ) + + # Time to first token histogram + self.hist_time_to_first_token = self.meter.create_histogram( + name=GEN_AI_SERVER_TIME_TO_FIRST_TOKEN, + unit="s", + description="Time to first token for streaming LLM responses (seconds)", + ) + + # Time to generate histogram + self.hist_time_to_generate = self.meter.create_histogram( + name=GEN_AI_STREAMING_TIME_TO_GENERATE, + unit="s", + description="Total time to generate streaming LLM responses (seconds)", + ) + + # Trace duration histogram + self.hist_trace_duration = self.meter.create_histogram( + name=GEN_AI_TRACE_DURATION, + unit="s", + description="End-to-end GenAI trace duration (seconds)", + ) + self.metric_reader = metric_reader else: self.meter = None + self.meter_provider = None self.hist_llm_duration = None + self.hist_token_usage = None + self.hist_time_to_first_token = None + self.hist_time_to_generate = None + self.hist_trace_duration = None self.metric_reader = None except Exception: logger.exception("[Tencent APM] Metrics initialization failed; metrics disabled") self.meter = None + self.meter_provider = None self.hist_llm_duration = None + self.hist_token_usage = None + self.hist_time_to_first_token = None + self.hist_time_to_generate = None + self.hist_trace_duration = None self.metric_reader = None def add_span(self, span_data: SpanData) -> None: @@ -212,10 +284,158 @@ class TencentTraceClient: if attributes: for k, v in attributes.items(): attrs[k] = str(v) if not isinstance(v, (str, int, float, bool)) else v # type: ignore[assignment] + + logger.info( + "[Tencent Metrics] Metric: %s | Value: %.4f | Attributes: %s", + LLM_OPERATION_DURATION, + latency_seconds, + json.dumps(attrs, ensure_ascii=False), + ) + self.hist_llm_duration.record(latency_seconds, attrs) # type: ignore[attr-defined] except Exception: logger.debug("[Tencent APM] Failed to record LLM duration", exc_info=True) + def record_token_usage( + self, + token_count: int, + token_type: str, + operation_name: str, + request_model: str, + response_model: str, + server_address: str, + provider: str, + ) -> None: + """Record token usage histogram. + + Args: + token_count: Number of tokens used + token_type: "input" or "output" + operation_name: Operation name (e.g., "chat") + request_model: Model used in request + response_model: Model used in response + server_address: Server address + provider: Model provider name + """ + try: + if not hasattr(self, "hist_token_usage") or self.hist_token_usage is None: + return + + attributes = { + "gen_ai.operation.name": operation_name, + "gen_ai.request.model": request_model, + "gen_ai.response.model": response_model, + "gen_ai.system": provider, + "gen_ai.token.type": token_type, + "server.address": server_address, + } + + logger.info( + "[Tencent Metrics] Metric: %s | Value: %d | Attributes: %s", + GEN_AI_TOKEN_USAGE, + token_count, + json.dumps(attributes, ensure_ascii=False), + ) + + self.hist_token_usage.record(token_count, attributes) # type: ignore[attr-defined] + except Exception: + logger.debug("[Tencent APM] Failed to record token usage", exc_info=True) + + def record_time_to_first_token( + self, ttft_seconds: float, provider: str, model: str, operation_name: str = "chat" + ) -> None: + """Record time to first token histogram. + + Args: + ttft_seconds: Time to first token in seconds + provider: Model provider name + model: Model name + operation_name: Operation name (default: "chat") + """ + try: + if not hasattr(self, "hist_time_to_first_token") or self.hist_time_to_first_token is None: + return + + attributes = { + "gen_ai.operation.name": operation_name, + "gen_ai.system": provider, + "gen_ai.request.model": model, + "gen_ai.response.model": model, + "stream": "true", + } + + logger.info( + "[Tencent Metrics] Metric: %s | Value: %.4f | Attributes: %s", + GEN_AI_SERVER_TIME_TO_FIRST_TOKEN, + ttft_seconds, + json.dumps(attributes, ensure_ascii=False), + ) + + self.hist_time_to_first_token.record(ttft_seconds, attributes) # type: ignore[attr-defined] + except Exception: + logger.debug("[Tencent APM] Failed to record time to first token", exc_info=True) + + def record_time_to_generate( + self, ttg_seconds: float, provider: str, model: str, operation_name: str = "chat" + ) -> None: + """Record time to generate histogram. + + Args: + ttg_seconds: Time to generate in seconds + provider: Model provider name + model: Model name + operation_name: Operation name (default: "chat") + """ + try: + if not hasattr(self, "hist_time_to_generate") or self.hist_time_to_generate is None: + return + + attributes = { + "gen_ai.operation.name": operation_name, + "gen_ai.system": provider, + "gen_ai.request.model": model, + "gen_ai.response.model": model, + "stream": "true", + } + + logger.info( + "[Tencent Metrics] Metric: %s | Value: %.4f | Attributes: %s", + GEN_AI_STREAMING_TIME_TO_GENERATE, + ttg_seconds, + json.dumps(attributes, ensure_ascii=False), + ) + + self.hist_time_to_generate.record(ttg_seconds, attributes) # type: ignore[attr-defined] + except Exception: + logger.debug("[Tencent APM] Failed to record time to generate", exc_info=True) + + def record_trace_duration(self, duration_seconds: float, attributes: dict[str, str] | None = None) -> None: + """Record end-to-end trace duration histogram in seconds. + + Args: + duration_seconds: Trace duration in seconds + attributes: Optional attributes (e.g., conversation_mode, app_id) + """ + try: + if not hasattr(self, "hist_trace_duration") or self.hist_trace_duration is None: + return + + attrs: dict[str, str] = {} + if attributes: + for k, v in attributes.items(): + attrs[k] = str(v) if not isinstance(v, (str, int, float, bool)) else v # type: ignore[assignment] + + logger.info( + "[Tencent Metrics] Metric: %s | Value: %.4f | Attributes: %s", + GEN_AI_TRACE_DURATION, + duration_seconds, + json.dumps(attrs, ensure_ascii=False), + ) + + self.hist_trace_duration.record(duration_seconds, attrs) # type: ignore[attr-defined] + except Exception: + logger.debug("[Tencent APM] Failed to record trace duration", exc_info=True) + def _create_and_export_span(self, span_data: SpanData) -> None: """Create span using OpenTelemetry Tracer API""" try: @@ -296,11 +516,19 @@ class TencentTraceClient: if self.tracer_provider: self.tracer_provider.shutdown() + + # Shutdown instance-level meter provider + if self.meter_provider is not None: + try: + self.meter_provider.shutdown() # type: ignore[attr-defined] + except Exception: + logger.debug("[Tencent APM] Error shutting down meter provider", exc_info=True) + if self.metric_reader is not None: try: self.metric_reader.shutdown() # type: ignore[attr-defined] except Exception: - pass + logger.debug("[Tencent APM] Error shutting down metric reader", exc_info=True) except Exception: logger.exception("[Tencent APM] Error during client shutdown") diff --git a/api/core/ops/tencent_trace/entities/tencent_semconv.py b/api/core/ops/tencent_trace/entities/semconv.py similarity index 69% rename from api/core/ops/tencent_trace/entities/tencent_semconv.py rename to api/core/ops/tencent_trace/entities/semconv.py index 5ea6eeacef..cd2dbade8b 100644 --- a/api/core/ops/tencent_trace/entities/tencent_semconv.py +++ b/api/core/ops/tencent_trace/entities/semconv.py @@ -47,6 +47,9 @@ GEN_AI_COMPLETION = "gen_ai.completion" GEN_AI_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reason" +# Streaming Span Attributes +GEN_AI_IS_STREAMING_REQUEST = "llm.is_streaming" # Same as OpenLLMetry semconv + # Tool TOOL_NAME = "tool.name" @@ -62,6 +65,19 @@ INSTRUMENTATION_LANGUAGE = "python" # Metrics LLM_OPERATION_DURATION = "gen_ai.client.operation.duration" +GEN_AI_TOKEN_USAGE = "gen_ai.client.token.usage" +GEN_AI_SERVER_TIME_TO_FIRST_TOKEN = "gen_ai.server.time_to_first_token" +GEN_AI_STREAMING_TIME_TO_GENERATE = "gen_ai.streaming.time_to_generate" +# The LLM trace duration which is exclusive to tencent apm +GEN_AI_TRACE_DURATION = "gen_ai.trace.duration" + +# Token Usage Attributes +GEN_AI_OPERATION_NAME = "gen_ai.operation.name" +GEN_AI_REQUEST_MODEL = "gen_ai.request.model" +GEN_AI_RESPONSE_MODEL = "gen_ai.response.model" +GEN_AI_SYSTEM = "gen_ai.system" +GEN_AI_TOKEN_TYPE = "gen_ai.token.type" +SERVER_ADDRESS = "server.address" class GenAISpanKind(Enum): diff --git a/api/core/ops/tencent_trace/span_builder.py b/api/core/ops/tencent_trace/span_builder.py index 5ba592290d..26e8779e3e 100644 --- a/api/core/ops/tencent_trace/span_builder.py +++ b/api/core/ops/tencent_trace/span_builder.py @@ -14,10 +14,11 @@ from core.ops.entities.trace_entity import ( ToolTraceInfo, WorkflowTraceInfo, ) -from core.ops.tencent_trace.entities.tencent_semconv import ( +from core.ops.tencent_trace.entities.semconv import ( GEN_AI_COMPLETION, GEN_AI_FRAMEWORK, GEN_AI_IS_ENTRY, + GEN_AI_IS_STREAMING_REQUEST, GEN_AI_MODEL_NAME, GEN_AI_PROMPT, GEN_AI_PROVIDER, @@ -156,6 +157,25 @@ class TencentSpanBuilder: outputs = node_execution.outputs or {} usage_data = process_data.get("usage", {}) if "usage" in process_data else outputs.get("usage", {}) + attributes = { + GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id", ""), + GEN_AI_SPAN_KIND: GenAISpanKind.GENERATION.value, + GEN_AI_FRAMEWORK: "dify", + GEN_AI_MODEL_NAME: process_data.get("model_name", ""), + GEN_AI_PROVIDER: process_data.get("model_provider", ""), + GEN_AI_USAGE_INPUT_TOKENS: str(usage_data.get("prompt_tokens", 0)), + GEN_AI_USAGE_OUTPUT_TOKENS: str(usage_data.get("completion_tokens", 0)), + GEN_AI_USAGE_TOTAL_TOKENS: str(usage_data.get("total_tokens", 0)), + GEN_AI_PROMPT: json.dumps(process_data.get("prompts", []), ensure_ascii=False), + GEN_AI_COMPLETION: str(outputs.get("text", "")), + GEN_AI_RESPONSE_FINISH_REASON: outputs.get("finish_reason", ""), + INPUT_VALUE: json.dumps(process_data.get("prompts", []), ensure_ascii=False), + OUTPUT_VALUE: str(outputs.get("text", "")), + } + + if usage_data.get("time_to_first_token") is not None: + attributes[GEN_AI_IS_STREAMING_REQUEST] = "true" + return SpanData( trace_id=trace_id, parent_span_id=workflow_span_id, @@ -163,21 +183,7 @@ class TencentSpanBuilder: name="GENERATION", start_time=TencentSpanBuilder._get_time_nanoseconds(node_execution.created_at), end_time=TencentSpanBuilder._get_time_nanoseconds(node_execution.finished_at), - attributes={ - GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id", ""), - GEN_AI_SPAN_KIND: GenAISpanKind.GENERATION.value, - GEN_AI_FRAMEWORK: "dify", - GEN_AI_MODEL_NAME: process_data.get("model_name", ""), - GEN_AI_PROVIDER: process_data.get("model_provider", ""), - GEN_AI_USAGE_INPUT_TOKENS: str(usage_data.get("prompt_tokens", 0)), - GEN_AI_USAGE_OUTPUT_TOKENS: str(usage_data.get("completion_tokens", 0)), - GEN_AI_USAGE_TOTAL_TOKENS: str(usage_data.get("total_tokens", 0)), - GEN_AI_PROMPT: json.dumps(process_data.get("prompts", []), ensure_ascii=False), - GEN_AI_COMPLETION: str(outputs.get("text", "")), - GEN_AI_RESPONSE_FINISH_REASON: outputs.get("finish_reason", ""), - INPUT_VALUE: json.dumps(process_data.get("prompts", []), ensure_ascii=False), - OUTPUT_VALUE: str(outputs.get("text", "")), - }, + attributes=attributes, status=TencentSpanBuilder._get_workflow_node_status(node_execution), ) @@ -191,6 +197,19 @@ class TencentSpanBuilder: if trace_info.error: status = Status(StatusCode.ERROR, trace_info.error) + attributes = { + GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id", ""), + GEN_AI_USER_ID: str(user_id), + GEN_AI_SPAN_KIND: GenAISpanKind.WORKFLOW.value, + GEN_AI_FRAMEWORK: "dify", + GEN_AI_IS_ENTRY: "true", + INPUT_VALUE: str(trace_info.inputs or ""), + OUTPUT_VALUE: str(trace_info.outputs or ""), + } + + if trace_info.is_streaming_request: + attributes[GEN_AI_IS_STREAMING_REQUEST] = "true" + return SpanData( trace_id=trace_id, parent_span_id=None, @@ -198,15 +217,7 @@ class TencentSpanBuilder: name="message", start_time=TencentSpanBuilder._get_time_nanoseconds(trace_info.start_time), end_time=TencentSpanBuilder._get_time_nanoseconds(trace_info.end_time), - attributes={ - GEN_AI_SESSION_ID: trace_info.metadata.get("conversation_id", ""), - GEN_AI_USER_ID: str(user_id), - GEN_AI_SPAN_KIND: GenAISpanKind.WORKFLOW.value, - GEN_AI_FRAMEWORK: "dify", - GEN_AI_IS_ENTRY: "true", - INPUT_VALUE: str(trace_info.inputs or ""), - OUTPUT_VALUE: str(trace_info.outputs or ""), - }, + attributes=attributes, status=status, links=links, ) diff --git a/api/core/ops/tencent_trace/tencent_trace.py b/api/core/ops/tencent_trace/tencent_trace.py index 5ef1c61b24..9b3df86e16 100644 --- a/api/core/ops/tencent_trace/tencent_trace.py +++ b/api/core/ops/tencent_trace/tencent_trace.py @@ -90,6 +90,9 @@ class TencentDataTrace(BaseTraceInstance): self._process_workflow_nodes(trace_info, trace_id) + # Record trace duration for entry span + self._record_workflow_trace_duration(trace_info) + except Exception: logger.exception("[Tencent APM] Failed to process workflow trace") @@ -107,6 +110,11 @@ class TencentDataTrace(BaseTraceInstance): self.trace_client.add_span(message_span) + self._record_message_llm_metrics(trace_info) + + # Record trace duration for entry span + self._record_message_trace_duration(trace_info) + except Exception: logger.exception("[Tencent APM] Failed to process message trace") @@ -290,24 +298,219 @@ class TencentDataTrace(BaseTraceInstance): def _record_llm_metrics(self, node_execution: WorkflowNodeExecution) -> None: """Record LLM performance metrics""" try: - if not hasattr(self.trace_client, "record_llm_duration"): - return - process_data = node_execution.process_data or {} - usage = process_data.get("usage", {}) - latency_s = float(usage.get("latency", 0.0)) + outputs = node_execution.outputs or {} + usage = process_data.get("usage", {}) if "usage" in process_data else outputs.get("usage", {}) - if latency_s > 0: - attributes = { - "provider": process_data.get("model_provider", ""), - "model": process_data.get("model_name", ""), - "span_kind": "GENERATION", - } - self.trace_client.record_llm_duration(latency_s, attributes) + model_provider = process_data.get("model_provider", "unknown") + model_name = process_data.get("model_name", "unknown") + model_mode = process_data.get("model_mode", "chat") + + # Record LLM duration + if hasattr(self.trace_client, "record_llm_duration"): + latency_s = float(usage.get("latency", 0.0)) + + if latency_s > 0: + # Determine if streaming from usage metrics + is_streaming = usage.get("time_to_first_token") is not None + + attributes = { + "gen_ai.system": model_provider, + "gen_ai.response.model": model_name, + "gen_ai.operation.name": model_mode, + "stream": "true" if is_streaming else "false", + } + self.trace_client.record_llm_duration(latency_s, attributes) + + # Record streaming metrics from usage + time_to_first_token = usage.get("time_to_first_token") + if time_to_first_token is not None and hasattr(self.trace_client, "record_time_to_first_token"): + ttft_seconds = float(time_to_first_token) + if ttft_seconds > 0: + self.trace_client.record_time_to_first_token( + ttft_seconds=ttft_seconds, provider=model_provider, model=model_name, operation_name=model_mode + ) + + time_to_generate = usage.get("time_to_generate") + if time_to_generate is not None and hasattr(self.trace_client, "record_time_to_generate"): + ttg_seconds = float(time_to_generate) + if ttg_seconds > 0: + self.trace_client.record_time_to_generate( + ttg_seconds=ttg_seconds, provider=model_provider, model=model_name, operation_name=model_mode + ) + + # Record token usage + if hasattr(self.trace_client, "record_token_usage"): + # Extract token counts + input_tokens = int(usage.get("prompt_tokens", 0)) + output_tokens = int(usage.get("completion_tokens", 0)) + + if input_tokens > 0 or output_tokens > 0: + server_address = f"{model_provider}" + + # Record input tokens + if input_tokens > 0: + self.trace_client.record_token_usage( + token_count=input_tokens, + token_type="input", + operation_name=model_mode, + request_model=model_name, + response_model=model_name, + server_address=server_address, + provider=model_provider, + ) + + # Record output tokens + if output_tokens > 0: + self.trace_client.record_token_usage( + token_count=output_tokens, + token_type="output", + operation_name=model_mode, + request_model=model_name, + response_model=model_name, + server_address=server_address, + provider=model_provider, + ) except Exception: logger.debug("[Tencent APM] Failed to record LLM metrics") + def _record_message_llm_metrics(self, trace_info: MessageTraceInfo) -> None: + """Record LLM metrics for message traces""" + try: + trace_metadata = trace_info.metadata or {} + message_data = trace_info.message_data or {} + provider_latency = 0.0 + if isinstance(message_data, dict): + provider_latency = float(message_data.get("provider_response_latency", 0.0) or 0.0) + else: + provider_latency = float(getattr(message_data, "provider_response_latency", 0.0) or 0.0) + + model_provider = trace_metadata.get("ls_provider") or ( + message_data.get("model_provider", "") if isinstance(message_data, dict) else "" + ) + model_name = trace_metadata.get("ls_model_name") or ( + message_data.get("model_id", "") if isinstance(message_data, dict) else "" + ) + + # Record LLM duration + if provider_latency > 0 and hasattr(self.trace_client, "record_llm_duration"): + is_streaming = trace_info.is_streaming_request + + duration_attributes = { + "gen_ai.system": model_provider, + "gen_ai.response.model": model_name, + "gen_ai.operation.name": "chat", # Message traces are always chat + "stream": "true" if is_streaming else "false", + } + self.trace_client.record_llm_duration(provider_latency, duration_attributes) + + # Record streaming metrics for message traces + if trace_info.is_streaming_request: + # Record time to first token + if trace_info.gen_ai_server_time_to_first_token is not None and hasattr( + self.trace_client, "record_time_to_first_token" + ): + ttft_seconds = float(trace_info.gen_ai_server_time_to_first_token) + if ttft_seconds > 0: + self.trace_client.record_time_to_first_token( + ttft_seconds=ttft_seconds, provider=str(model_provider or ""), model=str(model_name or "") + ) + + # Record time to generate + if trace_info.llm_streaming_time_to_generate is not None and hasattr( + self.trace_client, "record_time_to_generate" + ): + ttg_seconds = float(trace_info.llm_streaming_time_to_generate) + if ttg_seconds > 0: + self.trace_client.record_time_to_generate( + ttg_seconds=ttg_seconds, provider=str(model_provider or ""), model=str(model_name or "") + ) + + # Record token usage + if hasattr(self.trace_client, "record_token_usage"): + input_tokens = int(trace_info.message_tokens or 0) + output_tokens = int(trace_info.answer_tokens or 0) + + if input_tokens > 0: + self.trace_client.record_token_usage( + token_count=input_tokens, + token_type="input", + operation_name="chat", + request_model=str(model_name or ""), + response_model=str(model_name or ""), + server_address=str(model_provider or ""), + provider=str(model_provider or ""), + ) + + if output_tokens > 0: + self.trace_client.record_token_usage( + token_count=output_tokens, + token_type="output", + operation_name="chat", + request_model=str(model_name or ""), + response_model=str(model_name or ""), + server_address=str(model_provider or ""), + provider=str(model_provider or ""), + ) + + except Exception: + logger.debug("[Tencent APM] Failed to record message LLM metrics") + + def _record_workflow_trace_duration(self, trace_info: WorkflowTraceInfo) -> None: + """Record end-to-end workflow trace duration.""" + try: + if not hasattr(self.trace_client, "record_trace_duration"): + return + + # Calculate duration from start_time and end_time to match span duration + if trace_info.start_time and trace_info.end_time: + duration_s = (trace_info.end_time - trace_info.start_time).total_seconds() + else: + # Fallback to workflow_run_elapsed_time if timestamps not available + duration_s = float(trace_info.workflow_run_elapsed_time) + + if duration_s > 0: + attributes = { + "conversation_mode": "workflow", + "workflow_status": trace_info.workflow_run_status, + } + + # Add conversation_id if available + if trace_info.conversation_id: + attributes["has_conversation"] = "true" + else: + attributes["has_conversation"] = "false" + + self.trace_client.record_trace_duration(duration_s, attributes) + + except Exception: + logger.debug("[Tencent APM] Failed to record workflow trace duration") + + def _record_message_trace_duration(self, trace_info: MessageTraceInfo) -> None: + """Record end-to-end message trace duration.""" + try: + if not hasattr(self.trace_client, "record_trace_duration"): + return + + # Calculate duration from start_time and end_time + if trace_info.start_time and trace_info.end_time: + duration = (trace_info.end_time - trace_info.start_time).total_seconds() + + if duration > 0: + attributes = { + "conversation_mode": trace_info.conversation_mode, + } + + # Add streaming flag if available + if hasattr(trace_info, "is_streaming_request"): + attributes["stream"] = "true" if trace_info.is_streaming_request else "false" + + self.trace_client.record_trace_duration(duration, attributes) + + except Exception: + logger.debug("[Tencent APM] Failed to record message trace duration") + def __del__(self): """Ensure proper cleanup on garbage collection.""" try: diff --git a/api/core/ops/weave_trace/weave_trace.py b/api/core/ops/weave_trace/weave_trace.py index 9b3d7a8192..2134be0bce 100644 --- a/api/core/ops/weave_trace/weave_trace.py +++ b/api/core/ops/weave_trace/weave_trace.py @@ -1,12 +1,20 @@ import logging import os import uuid -from datetime import datetime, timedelta +from datetime import UTC, datetime, timedelta from typing import Any, cast import wandb import weave from sqlalchemy.orm import sessionmaker +from weave.trace_server.trace_server_interface import ( + CallEndReq, + CallStartReq, + EndedCallSchemaForInsert, + StartedCallSchemaForInsert, + SummaryInsertMap, + TraceStatus, +) from core.ops.base_trace_instance import BaseTraceInstance from core.ops.entities.config_entity import WeaveConfig @@ -57,6 +65,7 @@ class WeaveDataTrace(BaseTraceInstance): ) self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001") self.calls: dict[str, Any] = {} + self.project_id = f"{self.weave_client.entity}/{self.weave_client.project}" def get_project_url( self, @@ -424,6 +433,13 @@ class WeaveDataTrace(BaseTraceInstance): logger.debug("Weave API check failed: %s", str(e)) raise ValueError(f"Weave API check failed: {str(e)}") + def _normalize_time(self, dt: datetime | None) -> datetime: + if dt is None: + return datetime.now(UTC) + if dt.tzinfo is None: + return dt.replace(tzinfo=UTC) + return dt + def start_call(self, run_data: WeaveTraceModel, parent_run_id: str | None = None): inputs = run_data.inputs if inputs is None: @@ -437,19 +453,71 @@ class WeaveDataTrace(BaseTraceInstance): elif not isinstance(attributes, dict): attributes = {"attributes": str(attributes)} - call = self.weave_client.create_call( - op=run_data.op, - inputs=inputs, - attributes=attributes, + start_time = attributes.get("start_time") if isinstance(attributes, dict) else None + started_at = self._normalize_time(start_time if isinstance(start_time, datetime) else None) + trace_id = attributes.get("trace_id") if isinstance(attributes, dict) else None + if trace_id is None: + trace_id = run_data.id + + call_start_req = CallStartReq( + start=StartedCallSchemaForInsert( + project_id=self.project_id, + id=run_data.id, + op_name=str(run_data.op), + trace_id=trace_id, + parent_id=parent_run_id, + started_at=started_at, + attributes=attributes, + inputs=inputs, + wb_user_id=None, + ) ) - self.calls[run_data.id] = call - if parent_run_id: - self.calls[run_data.id].parent_id = parent_run_id + self.weave_client.server.call_start(call_start_req) + self.calls[run_data.id] = {"trace_id": trace_id, "parent_id": parent_run_id} def finish_call(self, run_data: WeaveTraceModel): - call = self.calls.get(run_data.id) - if call: - exception = Exception(run_data.exception) if run_data.exception else None - self.weave_client.finish_call(call=call, output=run_data.outputs, exception=exception) - else: + call_meta = self.calls.get(run_data.id) + if not call_meta: raise ValueError(f"Call with id {run_data.id} not found") + + attributes = run_data.attributes + if attributes is None: + attributes = {} + elif not isinstance(attributes, dict): + attributes = {"attributes": str(attributes)} + + start_time = attributes.get("start_time") if isinstance(attributes, dict) else None + end_time = attributes.get("end_time") if isinstance(attributes, dict) else None + started_at = self._normalize_time(start_time if isinstance(start_time, datetime) else None) + ended_at = self._normalize_time(end_time if isinstance(end_time, datetime) else None) + elapsed_ms = int((ended_at - started_at).total_seconds() * 1000) + if elapsed_ms < 0: + elapsed_ms = 0 + + status_counts = { + TraceStatus.SUCCESS: 0, + TraceStatus.ERROR: 0, + } + if run_data.exception: + status_counts[TraceStatus.ERROR] = 1 + else: + status_counts[TraceStatus.SUCCESS] = 1 + + summary: dict[str, Any] = { + "status_counts": status_counts, + "weave": {"latency_ms": elapsed_ms}, + } + + exception_str = str(run_data.exception) if run_data.exception else None + + call_end_req = CallEndReq( + end=EndedCallSchemaForInsert( + project_id=self.project_id, + id=run_data.id, + ended_at=ended_at, + exception=exception_str, + output=run_data.outputs, + summary=cast(SummaryInsertMap, summary), + ) + ) + self.weave_client.server.call_end(call_end_req) diff --git a/api/core/plugin/entities/plugin.py b/api/core/plugin/entities/plugin.py index 1b521d0ff7..9e1a9edf82 100644 --- a/api/core/plugin/entities/plugin.py +++ b/api/core/plugin/entities/plugin.py @@ -1,7 +1,7 @@ import datetime from collections.abc import Mapping from enum import StrEnum, auto -from typing import Any, Optional +from typing import Any from packaging.version import InvalidVersion, Version from pydantic import BaseModel, Field, field_validator, model_validator @@ -73,7 +73,7 @@ class PluginDeclaration(BaseModel): models: list[str] | None = Field(default_factory=list[str]) endpoints: list[str] | None = Field(default_factory=list[str]) datasources: list[str] | None = Field(default_factory=list[str]) - triggers: Optional[list[str]] = Field(default_factory=list[str]) + triggers: list[str] | None = Field(default_factory=list[str]) class Meta(BaseModel): minimum_dify_version: str | None = Field(default=None) diff --git a/api/core/plugin/entities/request.py b/api/core/plugin/entities/request.py index 995d52ac6e..73d3b8c89c 100644 --- a/api/core/plugin/entities/request.py +++ b/api/core/plugin/entities/request.py @@ -1,7 +1,7 @@ import binascii import json from collections.abc import Mapping -from typing import Any, Literal, Optional +from typing import Any, Literal from flask import Response from pydantic import BaseModel, ConfigDict, Field, field_validator @@ -246,7 +246,7 @@ class RequestFetchAppInfo(BaseModel): class TriggerInvokeEventResponse(BaseModel): variables: Mapping[str, Any] = Field(default_factory=dict) - cancelled: Optional[bool] = False + cancelled: bool = Field(default=False) model_config = ConfigDict(protected_namespaces=(), arbitrary_types_allowed=True) diff --git a/api/core/provider_manager.py b/api/core/provider_manager.py index 6cf6620d8d..6c818bdc8b 100644 --- a/api/core/provider_manager.py +++ b/api/core/provider_manager.py @@ -309,11 +309,12 @@ class ProviderManager: (model for model in available_models if model.model == "gpt-4"), available_models[0] ) - default_model = TenantDefaultModel() - default_model.tenant_id = tenant_id - default_model.model_type = model_type.to_origin_model_type() - default_model.provider_name = available_model.provider.provider - default_model.model_name = available_model.model + default_model = TenantDefaultModel( + tenant_id=tenant_id, + model_type=model_type.to_origin_model_type(), + provider_name=available_model.provider.provider, + model_name=available_model.model, + ) db.session.add(default_model) db.session.commit() diff --git a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py index 0ff8c915e6..1470713b88 100644 --- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py +++ b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py @@ -147,7 +147,8 @@ class ElasticSearchVector(BaseVector): def _get_version(self) -> str: info = self._client.info() - return cast(str, info["version"]["number"]) + # remove any suffix like "-SNAPSHOT" from the version string + return cast(str, info["version"]["number"]).split("-")[0] def _check_version(self): if parse_version(self._version) < parse_version("8.0.0"): diff --git a/api/core/rag/datasource/vdb/matrixone/matrixone_vector.py b/api/core/rag/datasource/vdb/matrixone/matrixone_vector.py index 6fe396dc1e..14955c8d7c 100644 --- a/api/core/rag/datasource/vdb/matrixone/matrixone_vector.py +++ b/api/core/rag/datasource/vdb/matrixone/matrixone_vector.py @@ -22,6 +22,18 @@ logger = logging.getLogger(__name__) P = ParamSpec("P") R = TypeVar("R") +T = TypeVar("T", bound="MatrixoneVector") + + +def ensure_client(func: Callable[Concatenate[T, P], R]): + @wraps(func) + def wrapper(self: T, *args: P.args, **kwargs: P.kwargs): + if self.client is None: + self.client = self._get_client(None, False) + return func(self, *args, **kwargs) + + return wrapper + class MatrixoneConfig(BaseModel): host: str = "localhost" @@ -206,19 +218,6 @@ class MatrixoneVector(BaseVector): self.client.delete() -T = TypeVar("T", bound=MatrixoneVector) - - -def ensure_client(func: Callable[Concatenate[T, P], R]): - @wraps(func) - def wrapper(self: T, *args: P.args, **kwargs: P.kwargs): - if self.client is None: - self.client = self._get_client(None, False) - return func(self, *args, **kwargs) - - return wrapper - - class MatrixoneVectorFactory(AbstractVectorFactory): def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> MatrixoneVector: if dataset.index_struct_dict: diff --git a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py index d2d8fcf964..591de01669 100644 --- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py +++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py @@ -39,11 +39,13 @@ class WeaviateConfig(BaseModel): Attributes: endpoint: Weaviate server endpoint URL + grpc_endpoint: Optional Weaviate gRPC server endpoint URL api_key: Optional API key for authentication batch_size: Number of objects to batch per insert operation """ endpoint: str + grpc_endpoint: str | None = None api_key: str | None = None batch_size: int = 100 @@ -88,9 +90,22 @@ class WeaviateVector(BaseVector): http_secure = p.scheme == "https" http_port = p.port or (443 if http_secure else 80) - grpc_host = host - grpc_secure = http_secure - grpc_port = 443 if grpc_secure else 50051 + # Parse gRPC configuration + if config.grpc_endpoint: + # Urls without scheme won't be parsed correctly in some python versions, + # see https://bugs.python.org/issue27657 + grpc_endpoint_with_scheme = ( + config.grpc_endpoint if "://" in config.grpc_endpoint else f"grpc://{config.grpc_endpoint}" + ) + grpc_p = urlparse(grpc_endpoint_with_scheme) + grpc_host = grpc_p.hostname or "localhost" + grpc_port = grpc_p.port or (443 if grpc_p.scheme == "grpcs" else 50051) + grpc_secure = grpc_p.scheme == "grpcs" + else: + # Infer from HTTP endpoint as fallback + grpc_host = host + grpc_secure = http_secure + grpc_port = 443 if grpc_secure else 50051 client = weaviate.connect_to_custom( http_host=host, @@ -100,6 +115,7 @@ class WeaviateVector(BaseVector): grpc_port=grpc_port, grpc_secure=grpc_secure, auth_credentials=Auth.api_key(config.api_key) if config.api_key else None, + skip_init_checks=True, # Skip PyPI version check to avoid unnecessary HTTP requests ) if not client.is_ready(): @@ -431,6 +447,7 @@ class WeaviateVectorFactory(AbstractVectorFactory): collection_name=collection_name, config=WeaviateConfig( endpoint=dify_config.WEAVIATE_ENDPOINT or "", + grpc_endpoint=dify_config.WEAVIATE_GRPC_ENDPOINT or "", api_key=dify_config.WEAVIATE_API_KEY, batch_size=dify_config.WEAVIATE_BATCH_SIZE, ), diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py index 1a9704688a..c7a5568866 100644 --- a/api/core/rag/extractor/word_extractor.py +++ b/api/core/rag/extractor/word_extractor.py @@ -152,13 +152,15 @@ class WordExtractor(BaseExtractor): # Initialize a row, all of which are empty by default row_cells = [""] * total_cols col_index = 0 - for cell in row.cells: + while col_index < len(row.cells): # make sure the col_index is not out of range - while col_index < total_cols and row_cells[col_index] != "": + while col_index < len(row.cells) and row_cells[col_index] != "": col_index += 1 # if col_index is out of range the loop is jumped - if col_index >= total_cols: + if col_index >= len(row.cells): break + # get the correct cell + cell = row.cells[col_index] cell_content = self._parse_cell(cell, image_map).strip() cell_colspan = cell.grid_span or 1 for i in range(cell_colspan): diff --git a/api/core/rag/pipeline/__init__.py b/api/core/rag/pipeline/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/rag/pipeline/queue.py b/api/core/rag/pipeline/queue.py new file mode 100644 index 0000000000..7472598a7f --- /dev/null +++ b/api/core/rag/pipeline/queue.py @@ -0,0 +1,82 @@ +import json +from collections.abc import Sequence +from typing import Any + +from pydantic import BaseModel, ValidationError + +from extensions.ext_redis import redis_client + +_DEFAULT_TASK_TTL = 60 * 60 # 1 hour + + +class TaskWrapper(BaseModel): + data: Any + + def serialize(self) -> str: + return self.model_dump_json() + + @classmethod + def deserialize(cls, serialized_data: str) -> "TaskWrapper": + return cls.model_validate_json(serialized_data) + + +class TenantIsolatedTaskQueue: + """ + Simple queue for tenant isolated tasks, used for rag related tenant tasks isolation. + It uses Redis list to store tasks, and Redis key to store task waiting flag. + Support tasks that can be serialized by json. + """ + + def __init__(self, tenant_id: str, unique_key: str): + self._tenant_id = tenant_id + self._unique_key = unique_key + self._queue = f"tenant_self_{unique_key}_task_queue:{tenant_id}" + self._task_key = f"tenant_{unique_key}_task:{tenant_id}" + + def get_task_key(self): + return redis_client.get(self._task_key) + + def set_task_waiting_time(self, ttl: int = _DEFAULT_TASK_TTL): + redis_client.setex(self._task_key, ttl, 1) + + def delete_task_key(self): + redis_client.delete(self._task_key) + + def push_tasks(self, tasks: Sequence[Any]): + serialized_tasks = [] + for task in tasks: + # Store str list directly, maintaining full compatibility for pipeline scenarios + if isinstance(task, str): + serialized_tasks.append(task) + else: + # Use TaskWrapper to do JSON serialization for non-string tasks + wrapper = TaskWrapper(data=task) + serialized_data = wrapper.serialize() + serialized_tasks.append(serialized_data) + + if not serialized_tasks: + return + + redis_client.lpush(self._queue, *serialized_tasks) + + def pull_tasks(self, count: int = 1) -> Sequence[Any]: + if count <= 0: + return [] + + tasks = [] + for _ in range(count): + serialized_task = redis_client.rpop(self._queue) + if not serialized_task: + break + + if isinstance(serialized_task, bytes): + serialized_task = serialized_task.decode("utf-8") + + try: + wrapper = TaskWrapper.deserialize(serialized_task) + tasks.append(wrapper.data) + except (json.JSONDecodeError, ValidationError, TypeError, ValueError): + # Fall back to raw string for legacy format or invalid JSON + tasks.append(serialized_task) + + return tasks diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 45b19f25a0..3db67efb0e 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -7,8 +7,7 @@ from collections.abc import Generator, Mapping from typing import Any, Union, cast from flask import Flask, current_app -from sqlalchemy import Float, and_, or_, select, text -from sqlalchemy import cast as sqlalchemy_cast +from sqlalchemy import and_, or_, select from core.app.app_config.entities import ( DatasetEntity, @@ -1023,60 +1022,55 @@ class DatasetRetrieval: self, sequence: int, condition: str, metadata_name: str, value: Any | None, filters: list ): if value is None and condition not in ("empty", "not empty"): - return + return filters + + json_field = DatasetDocument.doc_metadata[metadata_name].as_string() - key = f"{metadata_name}_{sequence}" - key_value = f"{metadata_name}_{sequence}_value" match condition: case "contains": - filters.append( - (text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params( - **{key: metadata_name, key_value: f"%{value}%"} - ) - ) + filters.append(json_field.like(f"%{value}%")) + case "not contains": - filters.append( - (text(f"documents.doc_metadata ->> :{key} NOT LIKE :{key_value}")).params( - **{key: metadata_name, key_value: f"%{value}%"} - ) - ) + filters.append(json_field.notlike(f"%{value}%")) + case "start with": - filters.append( - (text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params( - **{key: metadata_name, key_value: f"{value}%"} - ) - ) + filters.append(json_field.like(f"{value}%")) case "end with": - filters.append( - (text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params( - **{key: metadata_name, key_value: f"%{value}"} - ) - ) + filters.append(json_field.like(f"%{value}")) + case "is" | "=": if isinstance(value, str): - filters.append(DatasetDocument.doc_metadata[metadata_name] == f'"{value}"') - else: - filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) == value) + filters.append(json_field == value) + elif isinstance(value, (int, float)): + filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() == value) + case "is not" | "≠": if isinstance(value, str): - filters.append(DatasetDocument.doc_metadata[metadata_name] != f'"{value}"') - else: - filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) != value) + filters.append(json_field != value) + elif isinstance(value, (int, float)): + filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() != value) + case "empty": filters.append(DatasetDocument.doc_metadata[metadata_name].is_(None)) + case "not empty": filters.append(DatasetDocument.doc_metadata[metadata_name].isnot(None)) + case "before" | "<": - filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) < value) + filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() < value) + case "after" | ">": - filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) > value) + filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() > value) + case "≤" | "<=": - filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) <= value) + filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() <= value) + case "≥" | ">=": - filters.append(sqlalchemy_cast(DatasetDocument.doc_metadata[metadata_name].astext, Float) >= value) + filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() >= value) case _: pass + return filters def _fetch_model_config( diff --git a/api/core/tools/__base/tool.py b/api/core/tools/__base/tool.py index 6e0462c530..8ca4eabb7a 100644 --- a/api/core/tools/__base/tool.py +++ b/api/core/tools/__base/tool.py @@ -210,10 +210,24 @@ class Tool(ABC): meta=meta, ) - def create_json_message(self, object: dict) -> ToolInvokeMessage: + def create_json_message(self, object: dict, suppress_output: bool = False) -> ToolInvokeMessage: """ create a json message """ return ToolInvokeMessage( - type=ToolInvokeMessage.MessageType.JSON, message=ToolInvokeMessage.JsonMessage(json_object=object) + type=ToolInvokeMessage.MessageType.JSON, + message=ToolInvokeMessage.JsonMessage(json_object=object, suppress_output=suppress_output), + ) + + def create_variable_message( + self, variable_name: str, variable_value: Any, stream: bool = False + ) -> ToolInvokeMessage: + """ + create a variable message + """ + return ToolInvokeMessage( + type=ToolInvokeMessage.MessageType.VARIABLE, + message=ToolInvokeMessage.VariableMessage( + variable_name=variable_name, variable_value=variable_value, stream=stream + ), ) diff --git a/api/core/tools/entities/api_entities.py b/api/core/tools/entities/api_entities.py index 7f77359f2e..807d0245d1 100644 --- a/api/core/tools/entities/api_entities.py +++ b/api/core/tools/entities/api_entities.py @@ -4,6 +4,7 @@ from typing import Any, Literal from pydantic import BaseModel, Field, field_validator +from core.entities.mcp_provider import MCPAuthentication, MCPConfiguration from core.model_runtime.utils.encoders import jsonable_encoder from core.plugin.entities.plugin_daemon import CredentialType from core.tools.__base.tool import ToolParameter @@ -45,10 +46,14 @@ class ToolProviderApiEntity(BaseModel): server_url: str | None = Field(default="", description="The server url of the tool") updated_at: int = Field(default_factory=lambda: int(datetime.now().timestamp())) server_identifier: str | None = Field(default="", description="The server identifier of the MCP tool") - timeout: float | None = Field(default=30.0, description="The timeout of the MCP tool") - sse_read_timeout: float | None = Field(default=300.0, description="The SSE read timeout of the MCP tool") + masked_headers: dict[str, str] | None = Field(default=None, description="The masked headers of the MCP tool") original_headers: dict[str, str] | None = Field(default=None, description="The original headers of the MCP tool") + authentication: MCPAuthentication | None = Field(default=None, description="The OAuth config of the MCP tool") + is_dynamic_registration: bool = Field(default=True, description="Whether the MCP tool is dynamically registered") + configuration: MCPConfiguration | None = Field( + default=None, description="The timeout and sse_read_timeout of the MCP tool" + ) @field_validator("tools", mode="before") @classmethod @@ -71,8 +76,15 @@ class ToolProviderApiEntity(BaseModel): if self.type == ToolProviderType.MCP: optional_fields.update(self.optional_field("updated_at", self.updated_at)) optional_fields.update(self.optional_field("server_identifier", self.server_identifier)) - optional_fields.update(self.optional_field("timeout", self.timeout)) - optional_fields.update(self.optional_field("sse_read_timeout", self.sse_read_timeout)) + optional_fields.update( + self.optional_field( + "configuration", self.configuration.model_dump() if self.configuration else MCPConfiguration() + ) + ) + optional_fields.update( + self.optional_field("authentication", self.authentication.model_dump() if self.authentication else None) + ) + optional_fields.update(self.optional_field("is_dynamic_registration", self.is_dynamic_registration)) optional_fields.update(self.optional_field("masked_headers", self.masked_headers)) optional_fields.update(self.optional_field("original_headers", self.original_headers)) return { diff --git a/api/core/tools/entities/tool_entities.py b/api/core/tools/entities/tool_entities.py index 72d319ef4d..353f3a646a 100644 --- a/api/core/tools/entities/tool_entities.py +++ b/api/core/tools/entities/tool_entities.py @@ -129,6 +129,7 @@ class ToolInvokeMessage(BaseModel): class JsonMessage(BaseModel): json_object: dict + suppress_output: bool = Field(default=False, description="Whether to suppress JSON output in result string") class BlobMessage(BaseModel): blob: bytes diff --git a/api/core/tools/mcp_tool/provider.py b/api/core/tools/mcp_tool/provider.py index f0e4dba9c3..557211c8c8 100644 --- a/api/core/tools/mcp_tool/provider.py +++ b/api/core/tools/mcp_tool/provider.py @@ -1,6 +1,6 @@ -import json from typing import Any, Self +from core.entities.mcp_provider import MCPProviderEntity from core.mcp.types import Tool as RemoteMCPTool from core.tools.__base.tool_provider import ToolProviderController from core.tools.__base.tool_runtime import ToolRuntime @@ -52,18 +52,25 @@ class MCPToolProviderController(ToolProviderController): """ from db provider """ - tools = [] - tools_data = json.loads(db_provider.tools) - remote_mcp_tools = [RemoteMCPTool.model_validate(tool) for tool in tools_data] - user = db_provider.load_user() + # Convert to entity first + provider_entity = db_provider.to_entity() + return cls.from_entity(provider_entity) + + @classmethod + def from_entity(cls, entity: MCPProviderEntity) -> Self: + """ + create a MCPToolProviderController from a MCPProviderEntity + """ + remote_mcp_tools = [RemoteMCPTool(**tool) for tool in entity.tools] + tools = [ ToolEntity( identity=ToolIdentity( - author=user.name if user else "Anonymous", + author="Anonymous", # Tool level author is not stored name=remote_mcp_tool.name, label=I18nObject(en_US=remote_mcp_tool.name, zh_Hans=remote_mcp_tool.name), - provider=db_provider.server_identifier, - icon=db_provider.icon, + provider=entity.provider_id, + icon=entity.icon if isinstance(entity.icon, str) else "", ), parameters=ToolTransformService.convert_mcp_schema_to_parameter(remote_mcp_tool.inputSchema), description=ToolDescription( @@ -72,31 +79,32 @@ class MCPToolProviderController(ToolProviderController): ), llm=remote_mcp_tool.description or "", ), + output_schema=remote_mcp_tool.outputSchema or {}, has_runtime_parameters=len(remote_mcp_tool.inputSchema) > 0, ) for remote_mcp_tool in remote_mcp_tools ] - if not db_provider.icon: + if not entity.icon: raise ValueError("Database provider icon is required") return cls( entity=ToolProviderEntityWithPlugin( identity=ToolProviderIdentity( - author=user.name if user else "Anonymous", - name=db_provider.name, - label=I18nObject(en_US=db_provider.name, zh_Hans=db_provider.name), + author="Anonymous", # Provider level author is not stored in entity + name=entity.name, + label=I18nObject(en_US=entity.name, zh_Hans=entity.name), description=I18nObject(en_US="", zh_Hans=""), - icon=db_provider.icon, + icon=entity.icon if isinstance(entity.icon, str) else "", ), plugin_id=None, credentials_schema=[], tools=tools, ), - provider_id=db_provider.server_identifier or "", - tenant_id=db_provider.tenant_id or "", - server_url=db_provider.decrypted_server_url, - headers=db_provider.decrypted_headers or {}, - timeout=db_provider.timeout, - sse_read_timeout=db_provider.sse_read_timeout, + provider_id=entity.provider_id, + tenant_id=entity.tenant_id, + server_url=entity.server_url, + headers=entity.headers, + timeout=entity.timeout, + sse_read_timeout=entity.sse_read_timeout, ) def _validate_credentials(self, user_id: str, credentials: dict[str, Any]): diff --git a/api/core/tools/mcp_tool/tool.py b/api/core/tools/mcp_tool/tool.py index 976d4dc942..fbaf31ad09 100644 --- a/api/core/tools/mcp_tool/tool.py +++ b/api/core/tools/mcp_tool/tool.py @@ -1,14 +1,18 @@ import base64 import json +import logging from collections.abc import Generator from typing import Any -from core.mcp.error import MCPAuthError, MCPConnectionError -from core.mcp.mcp_client import MCPClient -from core.mcp.types import ImageContent, TextContent +from core.mcp.auth_client import MCPClientWithAuthRetry +from core.mcp.error import MCPConnectionError +from core.mcp.types import AudioContent, CallToolResult, ImageContent, TextContent from core.tools.__base.tool import Tool from core.tools.__base.tool_runtime import ToolRuntime from core.tools.entities.tool_entities import ToolEntity, ToolInvokeMessage, ToolProviderType +from core.tools.errors import ToolInvokeError + +logger = logging.getLogger(__name__) class MCPTool(Tool): @@ -44,40 +48,37 @@ class MCPTool(Tool): app_id: str | None = None, message_id: str | None = None, ) -> Generator[ToolInvokeMessage, None, None]: - from core.tools.errors import ToolInvokeError - - try: - with MCPClient( - self.server_url, - self.provider_id, - self.tenant_id, - authed=True, - headers=self.headers, - timeout=self.timeout, - sse_read_timeout=self.sse_read_timeout, - ) as mcp_client: - tool_parameters = self._handle_none_parameter(tool_parameters) - result = mcp_client.invoke_tool(tool_name=self.entity.identity.name, tool_args=tool_parameters) - except MCPAuthError as e: - raise ToolInvokeError("Please auth the tool first") from e - except MCPConnectionError as e: - raise ToolInvokeError(f"Failed to connect to MCP server: {e}") from e - except Exception as e: - raise ToolInvokeError(f"Failed to invoke tool: {e}") from e - + result = self.invoke_remote_mcp_tool(tool_parameters) + # handle dify tool output for content in result.content: if isinstance(content, TextContent): yield from self._process_text_content(content) elif isinstance(content, ImageContent): yield self._process_image_content(content) + elif isinstance(content, AudioContent): + yield self._process_audio_content(content) + else: + logger.warning("Unsupported content type=%s", type(content)) + + # handle MCP structured output + if self.entity.output_schema and result.structuredContent: + for k, v in result.structuredContent.items(): + yield self.create_variable_message(k, v) def _process_text_content(self, content: TextContent) -> Generator[ToolInvokeMessage, None, None]: """Process text content and yield appropriate messages.""" - try: - content_json = json.loads(content.text) - yield from self._process_json_content(content_json) - except json.JSONDecodeError: - yield self.create_text_message(content.text) + # Check if content looks like JSON before attempting to parse + text = content.text.strip() + if text and text[0] in ("{", "[") and text[-1] in ("}", "]"): + try: + content_json = json.loads(text) + yield from self._process_json_content(content_json) + return + except json.JSONDecodeError: + pass + + # If not JSON or parsing failed, treat as plain text + yield self.create_text_message(content.text) def _process_json_content(self, content_json: Any) -> Generator[ToolInvokeMessage, None, None]: """Process JSON content based on its type.""" @@ -104,6 +105,10 @@ class MCPTool(Tool): """Process image content and return a blob message.""" return self.create_blob_message(blob=base64.b64decode(content.data), meta={"mime_type": content.mimeType}) + def _process_audio_content(self, content: AudioContent) -> ToolInvokeMessage: + """Process audio content and return a blob message.""" + return self.create_blob_message(blob=base64.b64decode(content.data), meta={"mime_type": content.mimeType}) + def fork_tool_runtime(self, runtime: ToolRuntime) -> "MCPTool": return MCPTool( entity=self.entity, @@ -126,3 +131,44 @@ class MCPTool(Tool): for key, value in parameter.items() if value is not None and not (isinstance(value, str) and value.strip() == "") } + + def invoke_remote_mcp_tool(self, tool_parameters: dict[str, Any]) -> CallToolResult: + headers = self.headers.copy() if self.headers else {} + tool_parameters = self._handle_none_parameter(tool_parameters) + + from sqlalchemy.orm import Session + + from extensions.ext_database import db + from services.tools.mcp_tools_manage_service import MCPToolManageService + + # Step 1: Load provider entity and credentials in a short-lived session + # This minimizes database connection hold time + with Session(db.engine, expire_on_commit=False) as session: + mcp_service = MCPToolManageService(session=session) + provider_entity = mcp_service.get_provider_entity(self.provider_id, self.tenant_id, by_server_id=True) + + # Decrypt and prepare all credentials before closing session + server_url = provider_entity.decrypt_server_url() + headers = provider_entity.decrypt_headers() + + # Try to get existing token and add to headers + if not headers: + tokens = provider_entity.retrieve_tokens() + if tokens and tokens.access_token: + headers["Authorization"] = f"{tokens.token_type.capitalize()} {tokens.access_token}" + + # Step 2: Session is now closed, perform network operations without holding database connection + # MCPClientWithAuthRetry will create a new session lazily only if auth retry is needed + try: + with MCPClientWithAuthRetry( + server_url=server_url, + headers=headers, + timeout=self.timeout, + sse_read_timeout=self.sse_read_timeout, + provider_entity=provider_entity, + ) as mcp_client: + return mcp_client.invoke_tool(tool_name=self.entity.identity.name, tool_args=tool_parameters) + except MCPConnectionError as e: + raise ToolInvokeError(f"Failed to connect to MCP server: {e}") from e + except Exception as e: + raise ToolInvokeError(f"Failed to invoke tool: {e}") from e diff --git a/api/core/tools/tool_engine.py b/api/core/tools/tool_engine.py index 9fb6062770..13fd579e20 100644 --- a/api/core/tools/tool_engine.py +++ b/api/core/tools/tool_engine.py @@ -228,29 +228,41 @@ class ToolEngine: """ Handle tool response """ - result = "" + parts: list[str] = [] + json_parts: list[str] = [] + for response in tool_response: if response.type == ToolInvokeMessage.MessageType.TEXT: - result += cast(ToolInvokeMessage.TextMessage, response.message).text + parts.append(cast(ToolInvokeMessage.TextMessage, response.message).text) elif response.type == ToolInvokeMessage.MessageType.LINK: - result += ( + parts.append( f"result link: {cast(ToolInvokeMessage.TextMessage, response.message).text}." + " please tell user to check it." ) elif response.type in {ToolInvokeMessage.MessageType.IMAGE_LINK, ToolInvokeMessage.MessageType.IMAGE}: - result += ( + parts.append( "image has been created and sent to user already, " + "you do not need to create it, just tell the user to check it now." ) elif response.type == ToolInvokeMessage.MessageType.JSON: - result += json.dumps( - safe_json_value(cast(ToolInvokeMessage.JsonMessage, response.message).json_object), - ensure_ascii=False, + json_message = cast(ToolInvokeMessage.JsonMessage, response.message) + if json_message.suppress_output: + continue + json_parts.append( + json.dumps( + safe_json_value(cast(ToolInvokeMessage.JsonMessage, response.message).json_object), + ensure_ascii=False, + ) ) else: - result += str(response.message) + parts.append(str(response.message)) - return result + # Add JSON parts, avoiding duplicates from text parts. + if json_parts: + existing_parts = set(parts) + parts.extend(p for p in json_parts if p not in existing_parts) + + return "".join(parts) @staticmethod def _extract_tool_response_binary_and_text( diff --git a/api/core/tools/tool_manager.py b/api/core/tools/tool_manager.py index 440744462e..8f5fa7cab5 100644 --- a/api/core/tools/tool_manager.py +++ b/api/core/tools/tool_manager.py @@ -8,24 +8,38 @@ from threading import Lock from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast import sqlalchemy as sa -from pydantic import TypeAdapter from sqlalchemy import select from sqlalchemy.orm import Session from yarl import URL import contexts from configs import dify_config +from core.helper.provider_cache import ToolProviderCredentialsCache +from core.plugin.impl.tool import PluginToolManager +from core.tools.__base.tool_provider import ToolProviderController +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.mcp_tool.provider import MCPToolProviderController +from core.tools.mcp_tool.tool import MCPTool +from core.tools.plugin_tool.provider import PluginToolProviderController +from core.tools.plugin_tool.tool import PluginTool +from core.tools.utils.uuid_utils import is_valid_uuid +from core.tools.workflow_as_tool.provider import WorkflowToolProviderController +from core.workflow.runtime.variable_pool import VariablePool +from extensions.ext_database import db +from models.provider_ids import ToolProviderID +from services.enterprise.plugin_manager_service import PluginCredentialType +from services.tools.mcp_tools_manage_service import MCPToolManageService + +if TYPE_CHECKING: + from core.workflow.nodes.tool.entities import ToolEntity + from core.agent.entities import AgentToolEntity from core.app.entities.app_invoke_entities import InvokeFrom from core.helper.module_import_helper import load_single_subclass_from_source from core.helper.position_helper import is_filtered -from core.helper.provider_cache import ToolProviderCredentialsCache from core.model_runtime.utils.encoders import jsonable_encoder from core.plugin.entities.plugin_daemon import CredentialType -from core.plugin.impl.tool import PluginToolManager from core.tools.__base.tool import Tool -from core.tools.__base.tool_provider import ToolProviderController -from core.tools.__base.tool_runtime import ToolRuntime from core.tools.builtin_tool.provider import BuiltinToolProviderController from core.tools.builtin_tool.providers._positions import BuiltinToolProviderSort from core.tools.builtin_tool.tool import BuiltinTool @@ -40,26 +54,15 @@ from core.tools.entities.tool_entities import ( ToolProviderType, ) from core.tools.errors import ToolProviderNotFoundError -from core.tools.mcp_tool.provider import MCPToolProviderController -from core.tools.mcp_tool.tool import MCPTool -from core.tools.plugin_tool.provider import PluginToolProviderController -from core.tools.plugin_tool.tool import PluginTool from core.tools.tool_label_manager import ToolLabelManager from core.tools.utils.configuration import ToolParameterConfigurationManager from core.tools.utils.encryption import create_provider_encrypter, create_tool_provider_encrypter -from core.tools.utils.uuid_utils import is_valid_uuid -from core.tools.workflow_as_tool.provider import WorkflowToolProviderController from core.tools.workflow_as_tool.tool import WorkflowTool -from extensions.ext_database import db -from models.provider_ids import ToolProviderID -from models.tools import ApiToolProvider, BuiltinToolProvider, MCPToolProvider, WorkflowToolProvider -from services.enterprise.plugin_manager_service import PluginCredentialType -from services.tools.mcp_tools_manage_service import MCPToolManageService +from models.tools import ApiToolProvider, BuiltinToolProvider, WorkflowToolProvider from services.tools.tools_transform_service import ToolTransformService if TYPE_CHECKING: from core.workflow.nodes.tool.entities import ToolEntity - from core.workflow.runtime import VariablePool logger = logging.getLogger(__name__) @@ -284,10 +287,8 @@ class ToolManager: credentials=decrypted_credentials, ) # update the credentials - builtin_provider.encrypted_credentials = ( - TypeAdapter(dict[str, Any]) - .dump_json(encrypter.encrypt(dict(refreshed_credentials.credentials))) - .decode("utf-8") + builtin_provider.encrypted_credentials = json.dumps( + encrypter.encrypt(refreshed_credentials.credentials) ) builtin_provider.expires_at = refreshed_credentials.expires_at db.session.commit() @@ -317,7 +318,7 @@ class ToolManager: return api_provider.get_tool(tool_name).fork_tool_runtime( runtime=ToolRuntime( tenant_id=tenant_id, - credentials=encrypter.decrypt(credentials), + credentials=dict(encrypter.decrypt(credentials)), invoke_from=invoke_from, tool_invoke_from=tool_invoke_from, ) @@ -616,12 +617,28 @@ class ToolManager: """ # according to multi credentials, select the one with is_default=True first, then created_at oldest # for compatibility with old version - sql = """ + if dify_config.SQLALCHEMY_DATABASE_URI_SCHEME == "postgresql": + # PostgreSQL: Use DISTINCT ON + sql = """ SELECT DISTINCT ON (tenant_id, provider) id FROM tool_builtin_providers WHERE tenant_id = :tenant_id ORDER BY tenant_id, provider, is_default DESC, created_at DESC """ + else: + # MySQL: Use window function to achieve same result + sql = """ + SELECT id FROM ( + SELECT id, + ROW_NUMBER() OVER ( + PARTITION BY tenant_id, provider + ORDER BY is_default DESC, created_at DESC + ) as rn + FROM tool_builtin_providers + WHERE tenant_id = :tenant_id + ) ranked WHERE rn = 1 + """ + with Session(db.engine, autoflush=False) as session: ids = [row.id for row in session.execute(sa.text(sql), {"tenant_id": tenant_id}).all()] return session.query(BuiltinToolProvider).where(BuiltinToolProvider.id.in_(ids)).all() @@ -719,7 +736,9 @@ class ToolManager: ) result_providers[f"workflow_provider.{user_provider.name}"] = user_provider if "mcp" in filters: - mcp_providers = MCPToolManageService.retrieve_mcp_tools(tenant_id, for_list=True) + with Session(db.engine) as session: + mcp_service = MCPToolManageService(session=session) + mcp_providers = mcp_service.list_providers(tenant_id=tenant_id, for_list=True) for mcp_provider in mcp_providers: result_providers[f"mcp_provider.{mcp_provider.name}"] = mcp_provider @@ -774,17 +793,12 @@ class ToolManager: :return: the provider controller, the credentials """ - provider: MCPToolProvider | None = ( - db.session.query(MCPToolProvider) - .where( - MCPToolProvider.server_identifier == provider_id, - MCPToolProvider.tenant_id == tenant_id, - ) - .first() - ) - - if provider is None: - raise ToolProviderNotFoundError(f"mcp provider {provider_id} not found") + with Session(db.engine) as session: + mcp_service = MCPToolManageService(session=session) + try: + provider = mcp_service.get_provider(server_identifier=provider_id, tenant_id=tenant_id) + except ValueError: + raise ToolProviderNotFoundError(f"mcp provider {provider_id} not found") controller = MCPToolProviderController.from_db(provider) @@ -922,16 +936,15 @@ class ToolManager: @classmethod def generate_mcp_tool_icon_url(cls, tenant_id: str, provider_id: str) -> Mapping[str, str] | str: try: - mcp_provider: MCPToolProvider | None = ( - db.session.query(MCPToolProvider) - .where(MCPToolProvider.tenant_id == tenant_id, MCPToolProvider.server_identifier == provider_id) - .first() - ) - - if mcp_provider is None: - raise ToolProviderNotFoundError(f"mcp provider {provider_id} not found") - - return mcp_provider.provider_icon + with Session(db.engine) as session: + mcp_service = MCPToolManageService(session=session) + try: + mcp_provider = mcp_service.get_provider_entity( + provider_id=provider_id, tenant_id=tenant_id, by_server_id=True + ) + return mcp_provider.provider_icon + except ValueError: + raise ToolProviderNotFoundError(f"mcp provider {provider_id} not found") except Exception: return {"background": "#252525", "content": "\ud83d\ude01"} diff --git a/api/core/tools/workflow_as_tool/tool.py b/api/core/tools/workflow_as_tool/tool.py index 2cd46647a0..5703c19c88 100644 --- a/api/core/tools/workflow_as_tool/tool.py +++ b/api/core/tools/workflow_as_tool/tool.py @@ -117,7 +117,7 @@ class WorkflowTool(Tool): self._latest_usage = self._derive_usage_from_result(data) yield self.create_text_message(json.dumps(outputs, ensure_ascii=False)) - yield self.create_json_message(outputs) + yield self.create_json_message(outputs, suppress_output=True) @property def latest_usage(self) -> LLMUsage: diff --git a/api/core/trigger/debug/event_bus.py b/api/core/trigger/debug/event_bus.py index 1c4bba0009..9d10e1a0e0 100644 --- a/api/core/trigger/debug/event_bus.py +++ b/api/core/trigger/debug/event_bus.py @@ -107,7 +107,7 @@ class TriggerDebugEventBus: Returns: Event object if available, None otherwise """ - address_id: str = hashlib.sha1(f"{user_id}|{app_id}|{node_id}".encode()).hexdigest() + address_id: str = hashlib.sha256(f"{user_id}|{app_id}|{node_id}".encode()).hexdigest() address: str = f"trigger_debug_inbox:{tenant_id}:{address_id}" try: @@ -118,7 +118,6 @@ class TriggerDebugEventBus: pool_key, address_id, ) - logger.info("event_data: %s", event_data) return event_type.model_validate_json(json_data=event_data) if event_data else None except RedisError: logger.exception("Failed to poll event from pool: %s", pool_key) diff --git a/api/core/trigger/debug/event_selectors.py b/api/core/trigger/debug/event_selectors.py index f940d7f6b0..bd1ff4ebfe 100644 --- a/api/core/trigger/debug/event_selectors.py +++ b/api/core/trigger/debug/event_selectors.py @@ -1,8 +1,11 @@ """Trigger debug service supporting plugin and webhook debugging in draft workflows.""" +import hashlib import logging +import time from abc import ABC, abstractmethod from collections.abc import Mapping +from datetime import datetime from typing import Any from pydantic import BaseModel @@ -14,11 +17,14 @@ from core.trigger.debug.events import ( ScheduleDebugEvent, WebhookDebugEvent, build_plugin_pool_key, - build_schedule_pool_key, build_webhook_pool_key, ) from core.workflow.enums import NodeType from core.workflow.nodes.trigger_plugin.entities import TriggerEventNodeData +from core.workflow.nodes.trigger_schedule.entities import ScheduleConfig +from extensions.ext_redis import redis_client +from libs.datetime_utils import ensure_naive_utc, naive_utc_now +from libs.schedule_utils import calculate_next_run_at from models.model import App from models.provider_ids import TriggerProviderID from models.workflow import Workflow @@ -125,19 +131,71 @@ class WebhookTriggerDebugEventPoller(TriggerDebugEventPoller): class ScheduleTriggerDebugEventPoller(TriggerDebugEventPoller): - def poll(self) -> TriggerDebugEvent | None: - pool_key: str = build_schedule_pool_key(tenant_id=self.tenant_id, app_id=self.app_id, node_id=self.node_id) - schedule_event: ScheduleDebugEvent | None = TriggerDebugEventBus.poll( - event_type=ScheduleDebugEvent, - pool_key=pool_key, - tenant_id=self.tenant_id, - user_id=self.user_id, - app_id=self.app_id, + """ + Poller for schedule trigger debug events. + + This poller will simulate the schedule trigger event by creating a schedule debug runtime cache + and calculating the next run at. + """ + + RUNTIME_CACHE_TTL = 60 * 5 + + class ScheduleDebugRuntime(BaseModel): + cache_key: str + timezone: str + cron_expression: str + next_run_at: datetime + + def schedule_debug_runtime_key(self, cron_hash: str) -> str: + return f"schedule_debug_runtime:{self.tenant_id}:{self.user_id}:{self.app_id}:{self.node_id}:{cron_hash}" + + def get_or_create_schedule_debug_runtime(self): + from services.trigger.schedule_service import ScheduleService + + schedule_config: ScheduleConfig = ScheduleService.to_schedule_config(self.node_config) + cron_hash = hashlib.sha256(schedule_config.cron_expression.encode()).hexdigest() + cache_key = self.schedule_debug_runtime_key(cron_hash) + runtime_cache = redis_client.get(cache_key) + if runtime_cache is None: + schedule_debug_runtime = self.ScheduleDebugRuntime( + cron_expression=schedule_config.cron_expression, + timezone=schedule_config.timezone, + cache_key=cache_key, + next_run_at=ensure_naive_utc( + calculate_next_run_at(schedule_config.cron_expression, schedule_config.timezone) + ), + ) + redis_client.setex( + name=self.schedule_debug_runtime_key(cron_hash), + time=self.RUNTIME_CACHE_TTL, + value=schedule_debug_runtime.model_dump_json(), + ) + return schedule_debug_runtime + else: + redis_client.expire(cache_key, self.RUNTIME_CACHE_TTL) + runtime = self.ScheduleDebugRuntime.model_validate_json(runtime_cache) + runtime.next_run_at = ensure_naive_utc(runtime.next_run_at) + return runtime + + def create_schedule_event(self, schedule_debug_runtime: ScheduleDebugRuntime) -> ScheduleDebugEvent: + redis_client.delete(schedule_debug_runtime.cache_key) + return ScheduleDebugEvent( + timestamp=int(time.time()), node_id=self.node_id, + inputs={}, ) - if not schedule_event: + + def poll(self) -> TriggerDebugEvent | None: + schedule_debug_runtime = self.get_or_create_schedule_debug_runtime() + if schedule_debug_runtime.next_run_at > naive_utc_now(): return None - return TriggerDebugEvent(workflow_args=schedule_event.inputs, node_id=self.node_id) + + schedule_event: ScheduleDebugEvent = self.create_schedule_event(schedule_debug_runtime) + workflow_args: Mapping[str, Any] = { + "inputs": schedule_event.inputs or {}, + "files": [], + } + return TriggerDebugEvent(workflow_args=workflow_args, node_id=self.node_id) def create_event_poller( diff --git a/api/core/trigger/debug/events.py b/api/core/trigger/debug/events.py index 4766ec4c6a..9f7bab5e49 100644 --- a/api/core/trigger/debug/events.py +++ b/api/core/trigger/debug/events.py @@ -26,16 +26,6 @@ class ScheduleDebugEvent(BaseDebugEvent): inputs: Mapping[str, Any] -def build_schedule_pool_key(tenant_id: str, app_id: str, node_id: str) -> str: - """Generate pool key for schedule events. - Args: - tenant_id: Tenant ID - app_id: App ID - node_id: Node ID - """ - return f"{TriggerDebugPoolKey.SCHEDULE}:{tenant_id}:{app_id}:{node_id}" - - class WebhookDebugEvent(BaseDebugEvent): """Debug event for webhook triggers.""" diff --git a/api/core/trigger/entities/api_entities.py b/api/core/trigger/entities/api_entities.py index 7e77e89932..ad7c816144 100644 --- a/api/core/trigger/entities/api_entities.py +++ b/api/core/trigger/entities/api_entities.py @@ -1,5 +1,5 @@ from collections.abc import Mapping -from typing import Any, Optional +from typing import Any from pydantic import BaseModel, Field @@ -31,7 +31,7 @@ class EventApiEntity(BaseModel): identity: EventIdentity = Field(description="The identity of the trigger") description: I18nObject = Field(description="The description of the trigger") parameters: list[EventParameter] = Field(description="The parameters of the trigger") - output_schema: Optional[Mapping[str, Any]] = Field(description="The output schema of the trigger") + output_schema: Mapping[str, Any] | None = Field(description="The output schema of the trigger") class TriggerProviderApiEntity(BaseModel): @@ -39,19 +39,19 @@ class TriggerProviderApiEntity(BaseModel): name: str = Field(..., description="The name of the trigger provider") label: I18nObject = Field(..., description="The label of the trigger provider") description: I18nObject = Field(..., description="The description of the trigger provider") - icon: Optional[str] = Field(default=None, description="The icon of the trigger provider") - icon_dark: Optional[str] = Field(default=None, description="The dark icon of the trigger provider") + icon: str | None = Field(default=None, description="The icon of the trigger provider") + icon_dark: str | None = Field(default=None, description="The dark icon of the trigger provider") tags: list[str] = Field(default_factory=list, description="The tags of the trigger provider") - plugin_id: Optional[str] = Field(default="", description="The plugin id of the tool") - plugin_unique_identifier: Optional[str] = Field(default="", description="The unique identifier of the tool") + plugin_id: str | None = Field(default="", description="The plugin id of the tool") + plugin_unique_identifier: str | None = Field(default="", description="The unique identifier of the tool") supported_creation_methods: list[TriggerCreationMethod] = Field( default_factory=list, description="Supported creation methods for the trigger provider. like 'OAUTH', 'APIKEY', 'MANUAL'.", ) - subscription_constructor: Optional[SubscriptionConstructor] = Field( + subscription_constructor: SubscriptionConstructor | None = Field( default=None, description="The subscription constructor of the trigger provider" ) diff --git a/api/core/trigger/entities/entities.py b/api/core/trigger/entities/entities.py index 5f70bdeae2..49e24fe8b8 100644 --- a/api/core/trigger/entities/entities.py +++ b/api/core/trigger/entities/entities.py @@ -1,7 +1,7 @@ from collections.abc import Mapping from datetime import datetime from enum import StrEnum -from typing import Any, Optional, Union +from typing import Any, Union from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator @@ -40,12 +40,12 @@ class EventParameter(BaseModel): name: str = Field(..., description="The name of the parameter") label: I18nObject = Field(..., description="The label presented to the user") type: EventParameterType = Field(..., description="The type of the parameter") - auto_generate: Optional[PluginParameterAutoGenerate] = Field( + auto_generate: PluginParameterAutoGenerate | None = Field( default=None, description="The auto generate of the parameter" ) - template: Optional[PluginParameterTemplate] = Field(default=None, description="The template of the parameter") - scope: Optional[str] = None - required: Optional[bool] = False + template: PluginParameterTemplate | None = Field(default=None, description="The template of the parameter") + scope: str | None = None + required: bool | None = False multiple: bool | None = Field( default=False, description="Whether the parameter is multiple select, only valid for select or dynamic-select type", @@ -53,9 +53,9 @@ class EventParameter(BaseModel): default: Union[int, float, str, list[Any], None] = None min: Union[float, int, None] = None max: Union[float, int, None] = None - precision: Optional[int] = None - options: Optional[list[PluginParameterOption]] = None - description: Optional[I18nObject] = None + precision: int | None = None + options: list[PluginParameterOption] | None = None + description: I18nObject | None = None class TriggerProviderIdentity(BaseModel): @@ -67,8 +67,8 @@ class TriggerProviderIdentity(BaseModel): name: str = Field(..., description="The name of the trigger provider") label: I18nObject = Field(..., description="The label of the trigger provider") description: I18nObject = Field(..., description="The description of the trigger provider") - icon: Optional[str] = Field(default=None, description="The icon of the trigger provider") - icon_dark: Optional[str] = Field(default=None, description="The dark icon of the trigger provider") + icon: str | None = Field(default=None, description="The icon of the trigger provider") + icon_dark: str | None = Field(default=None, description="The dark icon of the trigger provider") tags: list[str] = Field(default_factory=list, description="The tags of the trigger provider") @@ -80,7 +80,7 @@ class EventIdentity(BaseModel): author: str = Field(..., description="The author of the event") name: str = Field(..., description="The name of the event") label: I18nObject = Field(..., description="The label of the event") - provider: Optional[str] = Field(default=None, description="The provider of the event") + provider: str | None = Field(default=None, description="The provider of the event") class EventEntity(BaseModel): @@ -93,7 +93,7 @@ class EventEntity(BaseModel): default_factory=list[EventParameter], description="The parameters of the event" ) description: I18nObject = Field(..., description="The description of the event") - output_schema: Optional[Mapping[str, Any]] = Field( + output_schema: Mapping[str, Any] | None = Field( default=None, description="The output schema that this event produces" ) @@ -124,7 +124,7 @@ class SubscriptionConstructor(BaseModel): description="The credentials schema of the subscription constructor", ) - oauth_schema: Optional[OAuthSchema] = Field( + oauth_schema: OAuthSchema | None = Field( default=None, description="The OAuth schema of the subscription constructor if OAuth is supported", ) @@ -183,7 +183,7 @@ class UnsubscribeResult(BaseModel): success: bool = Field(..., description="Whether the unsubscription was successful") - message: Optional[str] = Field( + message: str | None = Field( None, description="Human-readable message about the operation result. " "Success message for successful operations, " @@ -208,7 +208,7 @@ class SubscriptionBuilder(BaseModel): endpoint_id: str = Field(..., description="The endpoint id of the subscription builder") parameters: Mapping[str, Any] = Field(..., description="The parameters of the subscription builder") properties: Mapping[str, Any] = Field(..., description="The properties of the subscription builder") - credentials: Mapping[str, str] = Field(..., description="The credentials of the subscription builder") + credentials: Mapping[str, Any] = Field(..., description="The credentials of the subscription builder") credential_type: str | None = Field(default=None, description="The credential type of the subscription builder") credential_expires_at: int | None = Field( default=None, description="The credential expires at of the subscription builder" @@ -227,7 +227,7 @@ class SubscriptionBuilderUpdater(BaseModel): name: str | None = Field(default=None, description="The name of the subscription builder") parameters: Mapping[str, Any] | None = Field(default=None, description="The parameters of the subscription builder") properties: Mapping[str, Any] | None = Field(default=None, description="The properties of the subscription builder") - credentials: Mapping[str, str] | None = Field( + credentials: Mapping[str, Any] | None = Field( default=None, description="The credentials of the subscription builder" ) credential_type: str | None = Field(default=None, description="The credential type of the subscription builder") diff --git a/api/core/trigger/trigger_manager.py b/api/core/trigger/trigger_manager.py index 8568d7f204..0ef968b265 100644 --- a/api/core/trigger/trigger_manager.py +++ b/api/core/trigger/trigger_manager.py @@ -13,14 +13,14 @@ import contexts from configs import dify_config from core.plugin.entities.plugin_daemon import CredentialType, PluginTriggerProviderEntity from core.plugin.entities.request import TriggerInvokeEventResponse -from core.plugin.impl.exc import PluginDaemonError, PluginInvokeError, PluginNotFoundError +from core.plugin.impl.exc import PluginDaemonError, PluginNotFoundError from core.plugin.impl.trigger import PluginTriggerClient from core.trigger.entities.entities import ( EventEntity, Subscription, UnsubscribeResult, ) -from core.trigger.errors import EventIgnoreError, TriggerPluginInvokeError +from core.trigger.errors import EventIgnoreError from core.trigger.provider import PluginTriggerProviderController from models.provider_ids import TriggerProviderID @@ -189,13 +189,10 @@ class TriggerManager: request=request, payload=payload, ) - except EventIgnoreError as e: + except EventIgnoreError: return TriggerInvokeEventResponse(variables={}, cancelled=True) - except PluginInvokeError as e: - logger.exception("Failed to invoke trigger event") - raise TriggerPluginInvokeError( - description=e.to_user_friendly_error(plugin_name=provider.entity.identity.name) - ) from e + except Exception as e: + raise e @classmethod def subscribe_trigger( diff --git a/api/core/variables/types.py b/api/core/variables/types.py index a2e12e742b..ce71711344 100644 --- a/api/core/variables/types.py +++ b/api/core/variables/types.py @@ -1,9 +1,12 @@ from collections.abc import Mapping from enum import StrEnum -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional from core.file.models import File +if TYPE_CHECKING: + pass + class ArrayValidation(StrEnum): """Strategy for validating array elements. @@ -155,6 +158,17 @@ class SegmentType(StrEnum): return isinstance(value, File) elif self == SegmentType.NONE: return value is None + elif self == SegmentType.GROUP: + from .segment_group import SegmentGroup + from .segments import Segment + + if isinstance(value, SegmentGroup): + return all(isinstance(item, Segment) for item in value.value) + + if isinstance(value, list): + return all(isinstance(item, Segment) for item in value) + + return False else: raise AssertionError("this statement should be unreachable.") @@ -202,6 +216,35 @@ class SegmentType(StrEnum): raise ValueError(f"element_type is only supported by array type, got {self}") return _ARRAY_ELEMENT_TYPES_MAPPING.get(self) + @staticmethod + def get_zero_value(t: "SegmentType"): + # Lazy import to avoid circular dependency + from factories import variable_factory + + match t: + case ( + SegmentType.ARRAY_OBJECT + | SegmentType.ARRAY_ANY + | SegmentType.ARRAY_STRING + | SegmentType.ARRAY_NUMBER + | SegmentType.ARRAY_BOOLEAN + ): + return variable_factory.build_segment_with_type(t, []) + case SegmentType.OBJECT: + return variable_factory.build_segment({}) + case SegmentType.STRING: + return variable_factory.build_segment("") + case SegmentType.INTEGER: + return variable_factory.build_segment(0) + case SegmentType.FLOAT: + return variable_factory.build_segment(0.0) + case SegmentType.NUMBER: + return variable_factory.build_segment(0) + case SegmentType.BOOLEAN: + return variable_factory.build_segment(False) + case _: + raise ValueError(f"unsupported variable type: {t}") + _ARRAY_ELEMENT_TYPES_MAPPING: Mapping[SegmentType, SegmentType] = { # ARRAY_ANY does not have corresponding element type. diff --git a/api/core/workflow/entities/__init__.py b/api/core/workflow/entities/__init__.py index 185f0ad620..f4ce9052e0 100644 --- a/api/core/workflow/entities/__init__.py +++ b/api/core/workflow/entities/__init__.py @@ -4,6 +4,7 @@ from .agent import AgentNodeStrategyInit from .graph_init_params import GraphInitParams from .workflow_execution import WorkflowExecution from .workflow_node_execution import WorkflowNodeExecution +from .workflow_pause import WorkflowPauseEntity __all__ = [ "AgentNodeStrategyInit", @@ -12,4 +13,5 @@ __all__ = [ "VariablePool", "WorkflowExecution", "WorkflowNodeExecution", + "WorkflowPauseEntity", ] diff --git a/api/core/workflow/entities/pause_reason.py b/api/core/workflow/entities/pause_reason.py new file mode 100644 index 0000000000..16ad3d639d --- /dev/null +++ b/api/core/workflow/entities/pause_reason.py @@ -0,0 +1,49 @@ +from enum import StrEnum, auto +from typing import Annotated, Any, ClassVar, TypeAlias + +from pydantic import BaseModel, Discriminator, Tag + + +class _PauseReasonType(StrEnum): + HUMAN_INPUT_REQUIRED = auto() + SCHEDULED_PAUSE = auto() + + +class _PauseReasonBase(BaseModel): + TYPE: ClassVar[_PauseReasonType] + + +class HumanInputRequired(_PauseReasonBase): + TYPE = _PauseReasonType.HUMAN_INPUT_REQUIRED + + +class SchedulingPause(_PauseReasonBase): + TYPE = _PauseReasonType.SCHEDULED_PAUSE + + message: str + + +def _get_pause_reason_discriminator(v: Any) -> _PauseReasonType | None: + if isinstance(v, _PauseReasonBase): + return v.TYPE + elif isinstance(v, dict): + reason_type_str = v.get("TYPE") + if reason_type_str is None: + return None + try: + reason_type = _PauseReasonType(reason_type_str) + except ValueError: + return None + return reason_type + else: + # return None if the discriminator value isn't found + return None + + +PauseReason: TypeAlias = Annotated[ + ( + Annotated[HumanInputRequired, Tag(_PauseReasonType.HUMAN_INPUT_REQUIRED)] + | Annotated[SchedulingPause, Tag(_PauseReasonType.SCHEDULED_PAUSE)] + ), + Discriminator(_get_pause_reason_discriminator), +] diff --git a/api/core/workflow/entities/workflow_pause.py b/api/core/workflow/entities/workflow_pause.py new file mode 100644 index 0000000000..2f31c1ff53 --- /dev/null +++ b/api/core/workflow/entities/workflow_pause.py @@ -0,0 +1,61 @@ +""" +Domain entities for workflow pause management. + +This module contains the domain model for workflow pause, which is used +by the core workflow module. These models are independent of the storage mechanism +and don't contain implementation details like tenant_id, app_id, etc. +""" + +from abc import ABC, abstractmethod +from datetime import datetime + + +class WorkflowPauseEntity(ABC): + """ + Abstract base class for workflow pause entities. + + This domain model represents a paused workflow execution state, + without implementation details like tenant_id, app_id, etc. + It provides the interface for managing workflow pause/resume operations + and state persistence through file storage. + + The `WorkflowPauseEntity` is never reused. If a workflow execution pauses multiple times, + it will generate multiple `WorkflowPauseEntity` records. + """ + + @property + @abstractmethod + def id(self) -> str: + """The identifier of current WorkflowPauseEntity""" + pass + + @property + @abstractmethod + def workflow_execution_id(self) -> str: + """The identifier of the workflow execution record the pause associated with. + Correspond to `WorkflowExecution.id`. + """ + + @abstractmethod + def get_state(self) -> bytes: + """ + Retrieve the serialized workflow state from storage. + + This method should load and return the workflow execution state + that was saved when the workflow was paused. The state contains + all necessary information to resume the workflow execution. + + Returns: + bytes: The serialized workflow state containing + execution context, variable values, node states, etc. + + """ + ... + + @property + @abstractmethod + def resumed_at(self) -> datetime | None: + """`resumed_at` return the resumption time of the current pause, or `None` if + the pause is not resumed yet. + """ + pass diff --git a/api/core/workflow/enums.py b/api/core/workflow/enums.py index 4e9a30dd6e..cf12d5ec1f 100644 --- a/api/core/workflow/enums.py +++ b/api/core/workflow/enums.py @@ -22,6 +22,7 @@ class SystemVariableKey(StrEnum): APP_ID = "app_id" WORKFLOW_ID = "workflow_id" WORKFLOW_EXECUTION_ID = "workflow_run_id" + TIMESTAMP = "timestamp" # RAG Pipeline DOCUMENT_ID = "document_id" ORIGINAL_DOCUMENT_ID = "original_document_id" @@ -63,11 +64,21 @@ class NodeType(StrEnum): TRIGGER_PLUGIN = "trigger-plugin" HUMAN_INPUT = "human-input" + @property + def is_trigger_node(self) -> bool: + """Check if this node type is a trigger node.""" + return self in [ + NodeType.TRIGGER_WEBHOOK, + NodeType.TRIGGER_SCHEDULE, + NodeType.TRIGGER_PLUGIN, + ] + @property def is_start_node(self) -> bool: """Check if this node type can serve as a workflow entry point.""" return self in [ NodeType.START, + NodeType.DATASOURCE, NodeType.TRIGGER_WEBHOOK, NodeType.TRIGGER_SCHEDULE, NodeType.TRIGGER_PLUGIN, @@ -105,13 +116,111 @@ class WorkflowType(StrEnum): class WorkflowExecutionStatus(StrEnum): + # State diagram for the workflw status: + # (@) means start, (*) means end + # + # ┌------------------>------------------------->------------------->--------------┐ + # | | + # | ┌-----------------------<--------------------┐ | + # ^ | | | + # | | ^ | + # | V | | + # ┌-----------┐ ┌-----------------------┐ ┌-----------┐ V + # | Scheduled |------->| Running |---------------------->| paused | | + # └-----------┘ └-----------------------┘ └-----------┘ | + # | | | | | | | + # | | | | | | | + # ^ | | | V V | + # | | | | | ┌---------┐ | + # (@) | | | └------------------------>| Stopped |<----┘ + # | | | └---------┘ + # | | | | + # | | V V + # | | ┌-----------┐ | + # | | | Succeeded |------------->--------------┤ + # | | └-----------┘ | + # | V V + # | +--------┐ | + # | | Failed |---------------------->----------------┤ + # | └--------┘ | + # V V + # ┌---------------------┐ | + # | Partially Succeeded |---------------------->-----------------┘--------> (*) + # └---------------------┘ + # + # Mermaid diagram: + # + # --- + # title: State diagram for Workflow run state + # --- + # stateDiagram-v2 + # scheduled: Scheduled + # running: Running + # succeeded: Succeeded + # failed: Failed + # partial_succeeded: Partial Succeeded + # paused: Paused + # stopped: Stopped + # + # [*] --> scheduled: + # scheduled --> running: Start Execution + # running --> paused: Human input required + # paused --> running: human input added + # paused --> stopped: User stops execution + # running --> succeeded: Execution finishes without any error + # running --> failed: Execution finishes with errors + # running --> stopped: User stops execution + # running --> partial_succeeded: some execution occurred and handled during execution + # + # scheduled --> stopped: User stops execution + # + # succeeded --> [*] + # failed --> [*] + # partial_succeeded --> [*] + # stopped --> [*] + + # `SCHEDULED` means that the workflow is scheduled to run, but has not + # started running yet. (maybe due to possible worker saturation.) + # + # This enum value is currently unused. + SCHEDULED = "scheduled" + + # `RUNNING` means the workflow is exeuting. RUNNING = "running" + + # `SUCCEEDED` means the execution of workflow succeed without any error. SUCCEEDED = "succeeded" + + # `FAILED` means the execution of workflow failed without some errors. FAILED = "failed" + + # `STOPPED` means the execution of workflow was stopped, either manually + # by the user, or automatically by the Dify application (E.G. the moderation + # mechanism.) STOPPED = "stopped" + + # `PARTIAL_SUCCEEDED` indicates that some errors occurred during the workflow + # execution, but they were successfully handled (e.g., by using an error + # strategy such as "fail branch" or "default value"). PARTIAL_SUCCEEDED = "partial-succeeded" + + # `PAUSED` indicates that the workflow execution is temporarily paused + # (e.g., awaiting human input) and is expected to resume later. PAUSED = "paused" + def is_ended(self) -> bool: + return self in _END_STATE + + +_END_STATE = frozenset( + [ + WorkflowExecutionStatus.SUCCEEDED, + WorkflowExecutionStatus.FAILED, + WorkflowExecutionStatus.PARTIAL_SUCCEEDED, + WorkflowExecutionStatus.STOPPED, + ] +) + class WorkflowNodeExecutionMetadataKey(StrEnum): """ diff --git a/api/core/workflow/graph/graph.py b/api/core/workflow/graph/graph.py index d04724425c..ba5a01fc94 100644 --- a/api/core/workflow/graph/graph.py +++ b/api/core/workflow/graph/graph.py @@ -117,7 +117,7 @@ class Graph: node_type = node_data.get("type") if not isinstance(node_type, str): continue - if node_type in [NodeType.START, NodeType.DATASOURCE]: + if NodeType(node_type).is_start_node: start_node_id = nid break diff --git a/api/core/workflow/graph/validation.py b/api/core/workflow/graph/validation.py index 87aa7db2e4..41b4fdfa60 100644 --- a/api/core/workflow/graph/validation.py +++ b/api/core/workflow/graph/validation.py @@ -114,9 +114,45 @@ class GraphValidator: raise GraphValidationError(issues) +@dataclass(frozen=True, slots=True) +class _TriggerStartExclusivityValidator: + """Ensures trigger nodes do not coexist with UserInput (start) nodes.""" + + conflict_code: str = "TRIGGER_START_NODE_CONFLICT" + + def validate(self, graph: Graph) -> Sequence[GraphValidationIssue]: + start_node_id: str | None = None + trigger_node_ids: list[str] = [] + + for node in graph.nodes.values(): + node_type = getattr(node, "node_type", None) + if not isinstance(node_type, NodeType): + continue + + if node_type == NodeType.START: + start_node_id = node.id + elif node_type.is_trigger_node: + trigger_node_ids.append(node.id) + + if start_node_id and trigger_node_ids: + trigger_list = ", ".join(trigger_node_ids) + return [ + GraphValidationIssue( + code=self.conflict_code, + message=( + f"UserInput (start) node '{start_node_id}' cannot coexist with trigger nodes: {trigger_list}." + ), + node_id=start_node_id, + ) + ] + + return [] + + _DEFAULT_RULES: tuple[GraphValidationRule, ...] = ( _EdgeEndpointValidator(), _RootNodeValidator(), + _TriggerStartExclusivityValidator(), ) diff --git a/api/core/workflow/graph_engine/command_processing/command_handlers.py b/api/core/workflow/graph_engine/command_processing/command_handlers.py index c26c98c496..e9f109c88c 100644 --- a/api/core/workflow/graph_engine/command_processing/command_handlers.py +++ b/api/core/workflow/graph_engine/command_processing/command_handlers.py @@ -3,6 +3,8 @@ from typing import final from typing_extensions import override +from core.workflow.entities.pause_reason import SchedulingPause + from ..domain.graph_execution import GraphExecution from ..entities.commands import AbortCommand, GraphEngineCommand, PauseCommand from .command_processor import CommandHandler @@ -25,4 +27,7 @@ class PauseCommandHandler(CommandHandler): def handle(self, command: GraphEngineCommand, execution: GraphExecution) -> None: assert isinstance(command, PauseCommand) logger.debug("Pausing workflow %s: %s", execution.workflow_id, command.reason) - execution.pause(command.reason) + # Convert string reason to PauseReason if needed + reason = command.reason + pause_reason = SchedulingPause(message=reason) + execution.pause(pause_reason) diff --git a/api/core/workflow/graph_engine/domain/graph_execution.py b/api/core/workflow/graph_engine/domain/graph_execution.py index 6482c927d6..3d587d6691 100644 --- a/api/core/workflow/graph_engine/domain/graph_execution.py +++ b/api/core/workflow/graph_engine/domain/graph_execution.py @@ -8,6 +8,7 @@ from typing import Literal from pydantic import BaseModel, Field +from core.workflow.entities.pause_reason import PauseReason from core.workflow.enums import NodeState from .node_execution import NodeExecution @@ -41,7 +42,7 @@ class GraphExecutionState(BaseModel): completed: bool = Field(default=False) aborted: bool = Field(default=False) paused: bool = Field(default=False) - pause_reason: str | None = Field(default=None) + pause_reason: PauseReason | None = Field(default=None) error: GraphExecutionErrorState | None = Field(default=None) exceptions_count: int = Field(default=0) node_executions: list[NodeExecutionState] = Field(default_factory=list[NodeExecutionState]) @@ -106,7 +107,7 @@ class GraphExecution: completed: bool = False aborted: bool = False paused: bool = False - pause_reason: str | None = None + pause_reason: PauseReason | None = None error: Exception | None = None node_executions: dict[str, NodeExecution] = field(default_factory=dict[str, NodeExecution]) exceptions_count: int = 0 @@ -130,7 +131,7 @@ class GraphExecution: self.aborted = True self.error = RuntimeError(f"Aborted: {reason}") - def pause(self, reason: str | None = None) -> None: + def pause(self, reason: PauseReason) -> None: """Pause the graph execution without marking it complete.""" if self.completed: raise RuntimeError("Cannot pause execution that has completed") diff --git a/api/core/workflow/graph_engine/entities/commands.py b/api/core/workflow/graph_engine/entities/commands.py index 6070ed8812..0d51b2b716 100644 --- a/api/core/workflow/graph_engine/entities/commands.py +++ b/api/core/workflow/graph_engine/entities/commands.py @@ -36,4 +36,4 @@ class PauseCommand(GraphEngineCommand): """Command to pause a running workflow execution.""" command_type: CommandType = Field(default=CommandType.PAUSE, description="Type of command") - reason: str | None = Field(default=None, description="Optional reason for pause") + reason: str = Field(default="unknown reason", description="reason for pause") diff --git a/api/core/workflow/graph_engine/event_management/event_handlers.py b/api/core/workflow/graph_engine/event_management/event_handlers.py index b054ebd7ad..5b0f56e59d 100644 --- a/api/core/workflow/graph_engine/event_management/event_handlers.py +++ b/api/core/workflow/graph_engine/event_management/event_handlers.py @@ -210,7 +210,7 @@ class EventHandler: def _(self, event: NodeRunPauseRequestedEvent) -> None: """Handle pause requests emitted by nodes.""" - pause_reason = event.reason or "Awaiting human input" + pause_reason = event.reason self._graph_execution.pause(pause_reason) self._state_manager.finish_execution(event.node_id) if event.node_id in self._graph.nodes: diff --git a/api/core/workflow/graph_engine/graph_engine.py b/api/core/workflow/graph_engine/graph_engine.py index dd2ca3f93b..98e1a20044 100644 --- a/api/core/workflow/graph_engine/graph_engine.py +++ b/api/core/workflow/graph_engine/graph_engine.py @@ -192,7 +192,6 @@ class GraphEngine: self._dispatcher = Dispatcher( event_queue=self._event_queue, event_handler=self._event_handler_registry, - event_collector=self._event_manager, execution_coordinator=self._execution_coordinator, event_emitter=self._event_manager, ) @@ -247,8 +246,11 @@ class GraphEngine: # Handle completion if self._graph_execution.is_paused: + pause_reason = self._graph_execution.pause_reason + assert pause_reason is not None, "pause_reason should not be None when execution is paused." + # Ensure we have a valid PauseReason for the event paused_event = GraphRunPausedEvent( - reason=self._graph_execution.pause_reason, + reason=pause_reason, outputs=self._graph_runtime_state.outputs, ) self._event_manager.notify_layers(paused_event) diff --git a/api/core/workflow/graph_engine/layers/persistence.py b/api/core/workflow/graph_engine/layers/persistence.py index ecd8e12ca5..b70f36ec9e 100644 --- a/api/core/workflow/graph_engine/layers/persistence.py +++ b/api/core/workflow/graph_engine/layers/persistence.py @@ -216,7 +216,6 @@ class WorkflowPersistenceLayer(GraphEngineLayer): def _handle_graph_run_paused(self, event: GraphRunPausedEvent) -> None: execution = self._get_workflow_execution() execution.status = WorkflowExecutionStatus.PAUSED - execution.error_message = event.reason or "Workflow execution paused" execution.outputs = event.outputs self._populate_completion_statistics(execution, update_finished=False) @@ -296,7 +295,7 @@ class WorkflowPersistenceLayer(GraphEngineLayer): domain_execution, event.node_run_result, WorkflowNodeExecutionStatus.PAUSED, - error=event.reason, + error="", update_outputs=False, ) diff --git a/api/core/workflow/graph_engine/orchestration/dispatcher.py b/api/core/workflow/graph_engine/orchestration/dispatcher.py index 4097cead9c..334a3f77bf 100644 --- a/api/core/workflow/graph_engine/orchestration/dispatcher.py +++ b/api/core/workflow/graph_engine/orchestration/dispatcher.py @@ -43,7 +43,6 @@ class Dispatcher: self, event_queue: queue.Queue[GraphNodeEventBase], event_handler: "EventHandler", - event_collector: EventManager, execution_coordinator: ExecutionCoordinator, event_emitter: EventManager | None = None, ) -> None: @@ -53,13 +52,11 @@ class Dispatcher: Args: event_queue: Queue of events from workers event_handler: Event handler registry for processing events - event_collector: Event manager for collecting unhandled events execution_coordinator: Coordinator for execution flow event_emitter: Optional event manager to signal completion """ self._event_queue = event_queue self._event_handler = event_handler - self._event_collector = event_collector self._execution_coordinator = execution_coordinator self._event_emitter = event_emitter @@ -86,37 +83,31 @@ class Dispatcher: def _dispatcher_loop(self) -> None: """Main dispatcher loop.""" try: + self._process_commands() while not self._stop_event.is_set(): - commands_checked = False - should_check_commands = False - should_break = False + if ( + self._execution_coordinator.aborted + or self._execution_coordinator.paused + or self._execution_coordinator.execution_complete + ): + break - if self._execution_coordinator.is_execution_complete(): - should_check_commands = True - should_break = True - else: - # Check for scaling - self._execution_coordinator.check_scaling() + self._execution_coordinator.check_scaling() + try: + event = self._event_queue.get(timeout=0.1) + self._event_handler.dispatch(event) + self._event_queue.task_done() + self._process_commands(event) + except queue.Empty: + time.sleep(0.1) - # Process events - try: - event = self._event_queue.get(timeout=0.1) - # Route to the event handler - self._event_handler.dispatch(event) - should_check_commands = self._should_check_commands(event) - self._event_queue.task_done() - except queue.Empty: - # Process commands even when no new events arrive so abort requests are not missed - should_check_commands = True - time.sleep(0.1) - - if should_check_commands and not commands_checked: - self._execution_coordinator.check_commands() - commands_checked = True - - if should_break: - if not commands_checked: - self._execution_coordinator.check_commands() + self._process_commands() + while True: + try: + event = self._event_queue.get(block=False) + self._event_handler.dispatch(event) + self._event_queue.task_done() + except queue.Empty: break except Exception as e: @@ -129,6 +120,6 @@ class Dispatcher: if self._event_emitter: self._event_emitter.mark_complete() - def _should_check_commands(self, event: GraphNodeEventBase) -> bool: - """Return True if the event represents a node completion.""" - return isinstance(event, self._COMMAND_TRIGGER_EVENTS) + def _process_commands(self, event: GraphNodeEventBase | None = None): + if event is None or isinstance(event, self._COMMAND_TRIGGER_EVENTS): + self._execution_coordinator.process_commands() diff --git a/api/core/workflow/graph_engine/orchestration/execution_coordinator.py b/api/core/workflow/graph_engine/orchestration/execution_coordinator.py index a3162de244..e8e8f9f16c 100644 --- a/api/core/workflow/graph_engine/orchestration/execution_coordinator.py +++ b/api/core/workflow/graph_engine/orchestration/execution_coordinator.py @@ -40,7 +40,7 @@ class ExecutionCoordinator: self._command_processor = command_processor self._worker_pool = worker_pool - def check_commands(self) -> None: + def process_commands(self) -> None: """Process any pending commands.""" self._command_processor.process_commands() @@ -48,24 +48,16 @@ class ExecutionCoordinator: """Check and perform worker scaling if needed.""" self._worker_pool.check_and_scale() - def is_execution_complete(self) -> bool: - """ - Check if execution is complete. - - Returns: - True if execution is complete - """ - # Treat paused, aborted, or failed executions as terminal states - if self._graph_execution.is_paused: - return True - - if self._graph_execution.aborted or self._graph_execution.has_error: - return True - + @property + def execution_complete(self): return self._state_manager.is_execution_complete() @property - def is_paused(self) -> bool: + def aborted(self): + return self._graph_execution.aborted or self._graph_execution.has_error + + @property + def paused(self) -> bool: """Expose whether the underlying graph execution is paused.""" return self._graph_execution.is_paused diff --git a/api/core/workflow/graph_engine/worker.py b/api/core/workflow/graph_engine/worker.py index 42c9b936dd..73e59ee298 100644 --- a/api/core/workflow/graph_engine/worker.py +++ b/api/core/workflow/graph_engine/worker.py @@ -16,7 +16,6 @@ from uuid import uuid4 from flask import Flask from typing_extensions import override -from core.workflow.enums import NodeType from core.workflow.graph import Graph from core.workflow.graph_events import GraphNodeEventBase, NodeRunFailedEvent from core.workflow.nodes.base.node import Node @@ -108,8 +107,8 @@ class Worker(threading.Thread): except Exception as e: error_event = NodeRunFailedEvent( id=str(uuid4()), - node_id="unknown", - node_type=NodeType.CODE, + node_id=node.id, + node_type=node.node_type, in_iteration_id=None, error=str(e), start_at=datetime.now(), diff --git a/api/core/workflow/graph_events/graph.py b/api/core/workflow/graph_events/graph.py index 0da962aa1c..9faafc3173 100644 --- a/api/core/workflow/graph_events/graph.py +++ b/api/core/workflow/graph_events/graph.py @@ -1,5 +1,6 @@ from pydantic import Field +from core.workflow.entities.pause_reason import PauseReason from core.workflow.graph_events import BaseGraphEvent @@ -44,7 +45,8 @@ class GraphRunAbortedEvent(BaseGraphEvent): class GraphRunPausedEvent(BaseGraphEvent): """Event emitted when a graph run is paused by user command.""" - reason: str | None = Field(default=None, description="reason for pause") + # reason: str | None = Field(default=None, description="reason for pause") + reason: PauseReason = Field(..., description="reason for pause") outputs: dict[str, object] = Field( default_factory=dict, description="Outputs available to the client while the run is paused.", diff --git a/api/core/workflow/graph_events/node.py b/api/core/workflow/graph_events/node.py index b880df60d1..f225798d41 100644 --- a/api/core/workflow/graph_events/node.py +++ b/api/core/workflow/graph_events/node.py @@ -5,6 +5,7 @@ from pydantic import Field from core.rag.entities.citation_metadata import RetrievalSourceMetadata from core.workflow.entities import AgentNodeStrategyInit +from core.workflow.entities.pause_reason import PauseReason from .base import GraphNodeEventBase @@ -54,4 +55,4 @@ class NodeRunRetryEvent(NodeRunStartedEvent): class NodeRunPauseRequestedEvent(GraphNodeEventBase): - reason: str | None = Field(default=None, description="Optional pause reason") + reason: PauseReason = Field(..., description="pause reason") diff --git a/api/core/workflow/node_events/node.py b/api/core/workflow/node_events/node.py index 4fd5684436..ebf93f2fc2 100644 --- a/api/core/workflow/node_events/node.py +++ b/api/core/workflow/node_events/node.py @@ -5,6 +5,7 @@ from pydantic import Field from core.model_runtime.entities.llm_entities import LLMUsage from core.rag.entities.citation_metadata import RetrievalSourceMetadata +from core.workflow.entities.pause_reason import PauseReason from core.workflow.node_events import NodeRunResult from .base import NodeEventBase @@ -43,4 +44,4 @@ class StreamCompletedEvent(NodeEventBase): class PauseRequestedEvent(NodeEventBase): - reason: str | None = Field(default=None, description="Optional pause reason") + reason: PauseReason = Field(..., description="pause reason") diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index cd5f50aaab..12cd7e2bd9 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -171,6 +171,7 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) ".txt" | ".markdown" | ".md" + | ".mdx" | ".html" | ".htm" | ".xml" diff --git a/api/core/workflow/nodes/http_request/node.py b/api/core/workflow/nodes/http_request/node.py index 55dec3fb08..152d3cc562 100644 --- a/api/core/workflow/nodes/http_request/node.py +++ b/api/core/workflow/nodes/http_request/node.py @@ -104,7 +104,7 @@ class HttpRequestNode(Node): status=WorkflowNodeExecutionStatus.FAILED, outputs={ "status_code": response.status_code, - "body": response.text if not files else "", + "body": response.text if not files.value else "", "headers": response.headers, "files": files, }, diff --git a/api/core/workflow/nodes/human_input/human_input_node.py b/api/core/workflow/nodes/human_input/human_input_node.py index e49f9a8c81..2d6d9760af 100644 --- a/api/core/workflow/nodes/human_input/human_input_node.py +++ b/api/core/workflow/nodes/human_input/human_input_node.py @@ -1,6 +1,7 @@ from collections.abc import Mapping from typing import Any +from core.workflow.entities.pause_reason import HumanInputRequired from core.workflow.enums import ErrorStrategy, NodeExecutionType, NodeType, WorkflowNodeExecutionStatus from core.workflow.node_events import NodeRunResult, PauseRequestedEvent from core.workflow.nodes.base.entities import BaseNodeData, RetryConfig @@ -64,7 +65,7 @@ class HumanInputNode(Node): return self._pause_generator() def _pause_generator(self): - yield PauseRequestedEvent(reason=self._node_data.pause_reason) + yield PauseRequestedEvent(reason=HumanInputRequired()) def _is_completion_ready(self) -> bool: """Determine whether all required inputs are satisfied.""" diff --git a/api/core/workflow/nodes/iteration/entities.py b/api/core/workflow/nodes/iteration/entities.py index ed4ab2c11c..63a41ec755 100644 --- a/api/core/workflow/nodes/iteration/entities.py +++ b/api/core/workflow/nodes/iteration/entities.py @@ -23,6 +23,7 @@ class IterationNodeData(BaseIterationNodeData): is_parallel: bool = False # open the parallel mode or not parallel_nums: int = 10 # the numbers of parallel error_handle_mode: ErrorHandleMode = ErrorHandleMode.TERMINATED # how to handle the error + flatten_output: bool = True # whether to flatten the output array if all elements are lists class IterationStartNodeData(BaseNodeData): diff --git a/api/core/workflow/nodes/iteration/iteration_node.py b/api/core/workflow/nodes/iteration/iteration_node.py index 3a3a2290be..ce83352dcb 100644 --- a/api/core/workflow/nodes/iteration/iteration_node.py +++ b/api/core/workflow/nodes/iteration/iteration_node.py @@ -98,6 +98,7 @@ class IterationNode(LLMUsageTrackingMixin, Node): "is_parallel": False, "parallel_nums": 10, "error_handle_mode": ErrorHandleMode.TERMINATED, + "flatten_output": True, }, } @@ -411,7 +412,14 @@ class IterationNode(LLMUsageTrackingMixin, Node): """ Flatten the outputs list if all elements are lists. This maintains backward compatibility with version 1.8.1 behavior. + + If flatten_output is False, returns outputs as-is (nested structure). + If flatten_output is True (default), flattens the list if all elements are lists. """ + # If flatten_output is disabled, return outputs as-is + if not self._node_data.flatten_output: + return outputs + if not outputs: return outputs diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 4a63900527..e8ee44d5a9 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -6,8 +6,7 @@ from collections import defaultdict from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any, cast -from sqlalchemy import Float, and_, func, or_, select, text -from sqlalchemy import cast as sqlalchemy_cast +from sqlalchemy import and_, func, literal, or_, select from sqlalchemy.orm import sessionmaker from core.app.app_config.entities import DatasetRetrieveConfigEntity @@ -597,79 +596,79 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node): if value is None and condition not in ("empty", "not empty"): return filters - key = f"{metadata_name}_{sequence}" - key_value = f"{metadata_name}_{sequence}_value" + json_field = Document.doc_metadata[metadata_name].as_string() + match condition: case "contains": - filters.append( - (text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params( - **{key: metadata_name, key_value: f"%{value}%"} - ) - ) + filters.append(json_field.like(f"%{value}%")) + case "not contains": - filters.append( - (text(f"documents.doc_metadata ->> :{key} NOT LIKE :{key_value}")).params( - **{key: metadata_name, key_value: f"%{value}%"} - ) - ) + filters.append(json_field.notlike(f"%{value}%")) + case "start with": - filters.append( - (text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params( - **{key: metadata_name, key_value: f"{value}%"} - ) - ) + filters.append(json_field.like(f"{value}%")) + case "end with": - filters.append( - (text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params( - **{key: metadata_name, key_value: f"%{value}"} - ) - ) + filters.append(json_field.like(f"%{value}")) case "in": if isinstance(value, str): - escaped_values = [v.strip().replace("'", "''") for v in str(value).split(",")] - escaped_value_str = ",".join(escaped_values) + value_list = [v.strip() for v in value.split(",") if v.strip()] + elif isinstance(value, (list, tuple)): + value_list = [str(v) for v in value if v is not None] else: - escaped_value_str = str(value) - filters.append( - (text(f"documents.doc_metadata ->> :{key} = any(string_to_array(:{key_value},','))")).params( - **{key: metadata_name, key_value: escaped_value_str} - ) - ) + value_list = [str(value)] if value is not None else [] + + if not value_list: + filters.append(literal(False)) + else: + filters.append(json_field.in_(value_list)) + case "not in": if isinstance(value, str): - escaped_values = [v.strip().replace("'", "''") for v in str(value).split(",")] - escaped_value_str = ",".join(escaped_values) + value_list = [v.strip() for v in value.split(",") if v.strip()] + elif isinstance(value, (list, tuple)): + value_list = [str(v) for v in value if v is not None] else: - escaped_value_str = str(value) - filters.append( - (text(f"documents.doc_metadata ->> :{key} != all(string_to_array(:{key_value},','))")).params( - **{key: metadata_name, key_value: escaped_value_str} - ) - ) - case "=" | "is": + value_list = [str(value)] if value is not None else [] + + if not value_list: + filters.append(literal(True)) + else: + filters.append(json_field.notin_(value_list)) + + case "is" | "=": if isinstance(value, str): - filters.append(Document.doc_metadata[metadata_name] == f'"{value}"') - else: - filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) == value) + filters.append(json_field == value) + elif isinstance(value, (int, float)): + filters.append(Document.doc_metadata[metadata_name].as_float() == value) + case "is not" | "≠": if isinstance(value, str): - filters.append(Document.doc_metadata[metadata_name] != f'"{value}"') - else: - filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) != value) + filters.append(json_field != value) + elif isinstance(value, (int, float)): + filters.append(Document.doc_metadata[metadata_name].as_float() != value) + case "empty": filters.append(Document.doc_metadata[metadata_name].is_(None)) + case "not empty": filters.append(Document.doc_metadata[metadata_name].isnot(None)) + case "before" | "<": - filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) < value) + filters.append(Document.doc_metadata[metadata_name].as_float() < value) + case "after" | ">": - filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) > value) + filters.append(Document.doc_metadata[metadata_name].as_float() > value) + case "≤" | "<=": - filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) <= value) + filters.append(Document.doc_metadata[metadata_name].as_float() <= value) + case "≥" | ">=": - filters.append(sqlalchemy_cast(Document.doc_metadata[metadata_name].astext, Float) >= value) + filters.append(Document.doc_metadata[metadata_name].as_float() >= value) + case _: pass + return filters @classmethod diff --git a/api/core/workflow/nodes/llm/node.py b/api/core/workflow/nodes/llm/node.py index 1644f683bf..06c9beaed2 100644 --- a/api/core/workflow/nodes/llm/node.py +++ b/api/core/workflow/nodes/llm/node.py @@ -3,6 +3,7 @@ import io import json import logging import re +import time from collections.abc import Generator, Mapping, Sequence from typing import TYPE_CHECKING, Any, Literal @@ -384,6 +385,8 @@ class LLMNode(Node): output_schema = LLMNode.fetch_structured_output_schema( structured_output=structured_output or {}, ) + request_start_time = time.perf_counter() + invoke_result = invoke_llm_with_structured_output( provider=model_instance.provider, model_schema=model_schema, @@ -396,6 +399,8 @@ class LLMNode(Node): user=user_id, ) else: + request_start_time = time.perf_counter() + invoke_result = model_instance.invoke_llm( prompt_messages=list(prompt_messages), model_parameters=node_data_model.completion_params, @@ -411,6 +416,7 @@ class LLMNode(Node): node_id=node_id, node_type=node_type, reasoning_format=reasoning_format, + request_start_time=request_start_time, ) @staticmethod @@ -422,14 +428,20 @@ class LLMNode(Node): node_id: str, node_type: NodeType, reasoning_format: Literal["separated", "tagged"] = "tagged", + request_start_time: float | None = None, ) -> Generator[NodeEventBase | LLMStructuredOutput, None, None]: # For blocking mode if isinstance(invoke_result, LLMResult): + duration = None + if request_start_time is not None: + duration = time.perf_counter() - request_start_time + invoke_result.usage.latency = round(duration, 3) event = LLMNode.handle_blocking_result( invoke_result=invoke_result, saver=file_saver, file_outputs=file_outputs, reasoning_format=reasoning_format, + request_latency=duration, ) yield event return @@ -441,6 +453,12 @@ class LLMNode(Node): usage = LLMUsage.empty_usage() finish_reason = None full_text_buffer = io.StringIO() + + # Initialize streaming metrics tracking + start_time = request_start_time if request_start_time is not None else time.perf_counter() + first_token_time = None + has_content = False + collected_structured_output = None # Collect structured_output from streaming chunks # Consume the invoke result and handle generator exception try: @@ -457,6 +475,11 @@ class LLMNode(Node): file_saver=file_saver, file_outputs=file_outputs, ): + # Detect first token for TTFT calculation + if text_part and not has_content: + first_token_time = time.perf_counter() + has_content = True + full_text_buffer.write(text_part) yield StreamChunkEvent( selector=[node_id, "text"], @@ -489,6 +512,16 @@ class LLMNode(Node): # Extract clean text and reasoning from tags clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format) + # Calculate streaming metrics + end_time = time.perf_counter() + total_duration = end_time - start_time + usage.latency = round(total_duration, 3) + if has_content and first_token_time: + gen_ai_server_time_to_first_token = first_token_time - start_time + llm_streaming_time_to_generate = end_time - first_token_time + usage.time_to_first_token = round(gen_ai_server_time_to_first_token, 3) + usage.time_to_generate = round(llm_streaming_time_to_generate, 3) + yield ModelInvokeCompletedEvent( # Use clean_text for separated mode, full_text for tagged mode text=clean_text if reasoning_format == "separated" else full_text, @@ -1068,6 +1101,7 @@ class LLMNode(Node): saver: LLMFileSaver, file_outputs: list["File"], reasoning_format: Literal["separated", "tagged"] = "tagged", + request_latency: float | None = None, ) -> ModelInvokeCompletedEvent: buffer = io.StringIO() for text_part in LLMNode._save_multimodal_output_and_convert_result_to_markdown( @@ -1088,7 +1122,7 @@ class LLMNode(Node): # Extract clean text and reasoning from tags clean_text, reasoning_content = LLMNode._split_reasoning(full_text, reasoning_format) - return ModelInvokeCompletedEvent( + event = ModelInvokeCompletedEvent( # Use clean_text for separated mode, full_text for tagged mode text=clean_text if reasoning_format == "separated" else full_text, usage=invoke_result.usage, @@ -1098,6 +1132,9 @@ class LLMNode(Node): # Pass structured output if enabled structured_output=getattr(invoke_result, "structured_output", None), ) + if request_latency is not None: + event.usage.latency = round(request_latency, 3) + return event @staticmethod def save_multimodal_image_output( diff --git a/api/core/workflow/nodes/trigger_plugin/entities.py b/api/core/workflow/nodes/trigger_plugin/entities.py index e6853bb854..6c53acee4f 100644 --- a/api/core/workflow/nodes/trigger_plugin/entities.py +++ b/api/core/workflow/nodes/trigger_plugin/entities.py @@ -1,5 +1,5 @@ from collections.abc import Mapping -from typing import Any, Literal, Optional, Union +from typing import Any, Literal, Union from pydantic import BaseModel, Field, ValidationInfo, field_validator @@ -39,7 +39,7 @@ class TriggerEventNodeData(BaseNodeData): return type title: str - desc: Optional[str] = None + desc: str | None = None plugin_id: str = Field(..., description="Plugin ID") provider_id: str = Field(..., description="Provider ID") event_name: str = Field(..., description="Event name") @@ -62,7 +62,7 @@ class TriggerEventNodeData(BaseNodeData): Mapping[str, Any]: A dictionary containing the generated parameters. """ - result: Mapping[str, Any] = {} + result: dict[str, Any] = {} for parameter_name in self.event_parameters: parameter: EventParameter | None = parameter_schemas.get(parameter_name) if not parameter: diff --git a/api/core/workflow/nodes/trigger_plugin/trigger_event_node.py b/api/core/workflow/nodes/trigger_plugin/trigger_event_node.py index 7b7e634e0f..c4c2ff87db 100644 --- a/api/core/workflow/nodes/trigger_plugin/trigger_event_node.py +++ b/api/core/workflow/nodes/trigger_plugin/trigger_event_node.py @@ -1,6 +1,7 @@ from collections.abc import Mapping -from typing import Any, Optional +from typing import Any +from core.workflow.constants import SYSTEM_VARIABLE_NODE_ID from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus from core.workflow.enums import ErrorStrategy, NodeExecutionType, NodeType from core.workflow.node_events import NodeRunResult @@ -19,7 +20,7 @@ class TriggerEventNode(Node): def init_node_data(self, data: Mapping[str, Any]) -> None: self._node_data = TriggerEventNodeData.model_validate(data) - def _get_error_strategy(self) -> Optional[ErrorStrategy]: + def _get_error_strategy(self) -> ErrorStrategy | None: return self._node_data.error_strategy def _get_retry_config(self) -> RetryConfig: @@ -28,7 +29,7 @@ class TriggerEventNode(Node): def _get_title(self) -> str: return self._node_data.title - def _get_description(self) -> Optional[str]: + def _get_description(self) -> str | None: return self._node_data.desc def _get_default_value_dict(self) -> dict[str, Any]: @@ -65,18 +66,24 @@ class TriggerEventNode(Node): """ # Get trigger data passed when workflow was triggered - inputs = dict(self.graph_runtime_state.variable_pool.user_inputs) metadata = { WorkflowNodeExecutionMetadataKey.TRIGGER_INFO: { - **inputs, "provider_id": self._node_data.provider_id, "event_name": self._node_data.event_name, "plugin_unique_identifier": self._node_data.plugin_unique_identifier, }, } + node_inputs = dict(self.graph_runtime_state.variable_pool.user_inputs) + system_inputs = self.graph_runtime_state.variable_pool.system_variables.to_dict() + + # TODO: System variables should be directly accessible, no need for special handling + # Set system variables as node outputs. + for var in system_inputs: + node_inputs[SYSTEM_VARIABLE_NODE_ID + "." + var] = system_inputs[var] + outputs = dict(node_inputs) return NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, - inputs={}, - outputs=inputs, + inputs=node_inputs, + outputs=outputs, metadata=metadata, ) diff --git a/api/core/workflow/nodes/trigger_schedule/entities.py b/api/core/workflow/nodes/trigger_schedule/entities.py index 7ff78c4054..a515d02d55 100644 --- a/api/core/workflow/nodes/trigger_schedule/entities.py +++ b/api/core/workflow/nodes/trigger_schedule/entities.py @@ -1,4 +1,4 @@ -from typing import Literal, Optional, Union +from typing import Literal, Union from pydantic import BaseModel, Field @@ -11,11 +11,9 @@ class TriggerScheduleNodeData(BaseNodeData): """ mode: str = Field(default="visual", description="Schedule mode: visual or cron") - frequency: Optional[str] = Field( - default=None, description="Frequency for visual mode: hourly, daily, weekly, monthly" - ) - cron_expression: Optional[str] = Field(default=None, description="Cron expression for cron mode") - visual_config: Optional[dict] = Field(default=None, description="Visual configuration details") + frequency: str | None = Field(default=None, description="Frequency for visual mode: hourly, daily, weekly, monthly") + cron_expression: str | None = Field(default=None, description="Cron expression for cron mode") + visual_config: dict | None = Field(default=None, description="Visual configuration details") timezone: str = Field(default="UTC", description="Timezone for schedule execution") @@ -26,26 +24,26 @@ class ScheduleConfig(BaseModel): class SchedulePlanUpdate(BaseModel): - node_id: Optional[str] = None - cron_expression: Optional[str] = None - timezone: Optional[str] = None + node_id: str | None = None + cron_expression: str | None = None + timezone: str | None = None class VisualConfig(BaseModel): """Visual configuration for schedule trigger""" # For hourly frequency - on_minute: Optional[int] = Field(default=0, ge=0, le=59, description="Minute of the hour (0-59)") + on_minute: int | None = Field(default=0, ge=0, le=59, description="Minute of the hour (0-59)") # For daily, weekly, monthly frequencies - time: Optional[str] = Field(default="12:00 AM", description="Time in 12-hour format (e.g., '2:30 PM')") + time: str | None = Field(default="12:00 AM", description="Time in 12-hour format (e.g., '2:30 PM')") # For weekly frequency - weekdays: Optional[list[Literal["sun", "mon", "tue", "wed", "thu", "fri", "sat"]]] = Field( + weekdays: list[Literal["sun", "mon", "tue", "wed", "thu", "fri", "sat"]] | None = Field( default=None, description="List of weekdays to run on" ) # For monthly frequency - monthly_days: Optional[list[Union[int, Literal["last"]]]] = Field( + monthly_days: list[Union[int, Literal["last"]]] | None = Field( default=None, description="Days of month to run on (1-31 or 'last')" ) diff --git a/api/core/workflow/nodes/trigger_schedule/trigger_schedule_node.py b/api/core/workflow/nodes/trigger_schedule/trigger_schedule_node.py index 4fa50f1ead..98a841d1be 100644 --- a/api/core/workflow/nodes/trigger_schedule/trigger_schedule_node.py +++ b/api/core/workflow/nodes/trigger_schedule/trigger_schedule_node.py @@ -1,7 +1,7 @@ from collections.abc import Mapping -from datetime import UTC, datetime -from typing import Any, Optional +from typing import Any +from core.workflow.constants import SYSTEM_VARIABLE_NODE_ID from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus from core.workflow.enums import ErrorStrategy, NodeExecutionType, NodeType from core.workflow.node_events import NodeRunResult @@ -19,7 +19,7 @@ class TriggerScheduleNode(Node): def init_node_data(self, data: Mapping[str, Any]) -> None: self._node_data = TriggerScheduleNodeData(**data) - def _get_error_strategy(self) -> Optional[ErrorStrategy]: + def _get_error_strategy(self) -> ErrorStrategy | None: return self._node_data.error_strategy def _get_retry_config(self) -> RetryConfig: @@ -28,7 +28,7 @@ class TriggerScheduleNode(Node): def _get_title(self) -> str: return self._node_data.title - def _get_description(self) -> Optional[str]: + def _get_description(self) -> str | None: return self._node_data.desc def _get_default_value_dict(self) -> dict[str, Any]: @@ -54,10 +54,16 @@ class TriggerScheduleNode(Node): } def _run(self) -> NodeRunResult: - current_time = datetime.now(UTC) - node_outputs = {"current_time": current_time.isoformat()} + node_inputs = dict(self.graph_runtime_state.variable_pool.user_inputs) + system_inputs = self.graph_runtime_state.variable_pool.system_variables.to_dict() + # TODO: System variables should be directly accessible, no need for special handling + # Set system variables as node outputs. + for var in system_inputs: + node_inputs[SYSTEM_VARIABLE_NODE_ID + "." + var] = system_inputs[var] + outputs = dict(node_inputs) return NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, - outputs=node_outputs, + inputs=node_inputs, + outputs=outputs, ) diff --git a/api/core/workflow/nodes/trigger_webhook/entities.py b/api/core/workflow/nodes/trigger_webhook/entities.py index edb7338473..1011e60b43 100644 --- a/api/core/workflow/nodes/trigger_webhook/entities.py +++ b/api/core/workflow/nodes/trigger_webhook/entities.py @@ -1,6 +1,6 @@ from collections.abc import Sequence from enum import StrEnum -from typing import Literal, Optional +from typing import Literal from pydantic import BaseModel, Field, field_validator @@ -75,5 +75,5 @@ class WebhookData(BaseNodeData): response_body: str = "" # Template for response body # Webhook specific fields (not from client data, set internally) - webhook_id: Optional[str] = None # Set when webhook trigger is created + webhook_id: str | None = None # Set when webhook trigger is created timeout: int = 30 # Timeout in seconds to wait for webhook response diff --git a/api/core/workflow/nodes/trigger_webhook/node.py b/api/core/workflow/nodes/trigger_webhook/node.py index aa49b3f3e2..15009f90d0 100644 --- a/api/core/workflow/nodes/trigger_webhook/node.py +++ b/api/core/workflow/nodes/trigger_webhook/node.py @@ -1,6 +1,7 @@ from collections.abc import Mapping -from typing import Any, Optional +from typing import Any +from core.workflow.constants import SYSTEM_VARIABLE_NODE_ID from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus from core.workflow.enums import ErrorStrategy, NodeExecutionType, NodeType from core.workflow.node_events import NodeRunResult @@ -19,7 +20,7 @@ class TriggerWebhookNode(Node): def init_node_data(self, data: Mapping[str, Any]) -> None: self._node_data = WebhookData.model_validate(data) - def _get_error_strategy(self) -> Optional[ErrorStrategy]: + def _get_error_strategy(self) -> ErrorStrategy | None: return self._node_data.error_strategy def _get_retry_config(self) -> RetryConfig: @@ -28,7 +29,7 @@ class TriggerWebhookNode(Node): def _get_title(self) -> str: return self._node_data.title - def _get_description(self) -> Optional[str]: + def _get_description(self) -> str | None: return self._node_data.desc def _get_default_value_dict(self) -> dict[str, Any]: @@ -71,7 +72,12 @@ class TriggerWebhookNode(Node): # Extract webhook-specific outputs based on node configuration outputs = self._extract_configured_outputs(webhook_inputs) + system_inputs = self.graph_runtime_state.variable_pool.system_variables.to_dict() + # TODO: System variables should be directly accessible, no need for special handling + # Set system variables as node outputs. + for var in system_inputs: + outputs[SYSTEM_VARIABLE_NODE_ID + "." + var] = system_inputs[var] return NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=webhook_inputs, diff --git a/api/core/workflow/nodes/variable_assigner/v1/node.py b/api/core/workflow/nodes/variable_assigner/v1/node.py index 8cd267c4a7..3a0793f092 100644 --- a/api/core/workflow/nodes/variable_assigner/v1/node.py +++ b/api/core/workflow/nodes/variable_assigner/v1/node.py @@ -2,7 +2,6 @@ from collections.abc import Callable, Mapping, Sequence from typing import TYPE_CHECKING, Any, TypeAlias from core.variables import SegmentType, Variable -from core.variables.segments import BooleanSegment from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID from core.workflow.conversation_variable_updater import ConversationVariableUpdater from core.workflow.entities import GraphInitParams @@ -12,7 +11,6 @@ from core.workflow.nodes.base.entities import BaseNodeData, RetryConfig from core.workflow.nodes.base.node import Node from core.workflow.nodes.variable_assigner.common import helpers as common_helpers from core.workflow.nodes.variable_assigner.common.exc import VariableOperatorNodeError -from factories import variable_factory from ..common.impl import conversation_variable_updater_factory from .node_data import VariableAssignerData, WriteMode @@ -116,7 +114,7 @@ class VariableAssignerNode(Node): updated_variable = original_variable.model_copy(update={"value": updated_value}) case WriteMode.CLEAR: - income_value = get_zero_value(original_variable.value_type) + income_value = SegmentType.get_zero_value(original_variable.value_type) updated_variable = original_variable.model_copy(update={"value": income_value.to_object()}) # Over write the variable. @@ -143,24 +141,3 @@ class VariableAssignerNode(Node): process_data=common_helpers.set_updated_variables({}, updated_variables), outputs={}, ) - - -def get_zero_value(t: SegmentType): - # TODO(QuantumGhost): this should be a method of `SegmentType`. - match t: - case SegmentType.ARRAY_OBJECT | SegmentType.ARRAY_STRING | SegmentType.ARRAY_NUMBER | SegmentType.ARRAY_BOOLEAN: - return variable_factory.build_segment_with_type(t, []) - case SegmentType.OBJECT: - return variable_factory.build_segment({}) - case SegmentType.STRING: - return variable_factory.build_segment("") - case SegmentType.INTEGER: - return variable_factory.build_segment(0) - case SegmentType.FLOAT: - return variable_factory.build_segment(0.0) - case SegmentType.NUMBER: - return variable_factory.build_segment(0) - case SegmentType.BOOLEAN: - return BooleanSegment(value=False) - case _: - raise VariableOperatorNodeError(f"unsupported variable type: {t}") diff --git a/api/core/workflow/nodes/variable_assigner/v2/constants.py b/api/core/workflow/nodes/variable_assigner/v2/constants.py deleted file mode 100644 index 1a4b81c39c..0000000000 --- a/api/core/workflow/nodes/variable_assigner/v2/constants.py +++ /dev/null @@ -1,14 +0,0 @@ -from core.variables import SegmentType - -# Note: This mapping is duplicated with `get_zero_value`. Consider refactoring to avoid redundancy. -EMPTY_VALUE_MAPPING = { - SegmentType.STRING: "", - SegmentType.NUMBER: 0, - SegmentType.BOOLEAN: False, - SegmentType.OBJECT: {}, - SegmentType.ARRAY_ANY: [], - SegmentType.ARRAY_STRING: [], - SegmentType.ARRAY_NUMBER: [], - SegmentType.ARRAY_OBJECT: [], - SegmentType.ARRAY_BOOLEAN: [], -} diff --git a/api/core/workflow/nodes/variable_assigner/v2/node.py b/api/core/workflow/nodes/variable_assigner/v2/node.py index a89055fd66..f15924d78f 100644 --- a/api/core/workflow/nodes/variable_assigner/v2/node.py +++ b/api/core/workflow/nodes/variable_assigner/v2/node.py @@ -16,7 +16,6 @@ from core.workflow.nodes.variable_assigner.common.exc import VariableOperatorNod from core.workflow.nodes.variable_assigner.common.impl import conversation_variable_updater_factory from . import helpers -from .constants import EMPTY_VALUE_MAPPING from .entities import VariableAssignerNodeData, VariableOperationItem from .enums import InputType, Operation from .exc import ( @@ -249,7 +248,7 @@ class VariableAssignerNode(Node): case Operation.OVER_WRITE: return value case Operation.CLEAR: - return EMPTY_VALUE_MAPPING[variable.value_type] + return SegmentType.get_zero_value(variable.value_type).to_object() case Operation.APPEND: return variable.value + [value] case Operation.EXTEND: diff --git a/api/core/workflow/runtime/graph_runtime_state.py b/api/core/workflow/runtime/graph_runtime_state.py index 4c322c6aa6..0fbc8ab23e 100644 --- a/api/core/workflow/runtime/graph_runtime_state.py +++ b/api/core/workflow/runtime/graph_runtime_state.py @@ -3,7 +3,6 @@ from __future__ import annotations import importlib import json from collections.abc import Mapping, Sequence -from collections.abc import Mapping as TypingMapping from copy import deepcopy from dataclasses import dataclass from typing import Any, Protocol @@ -100,8 +99,8 @@ class ResponseStreamCoordinatorProtocol(Protocol): class GraphProtocol(Protocol): """Structural interface required from graph instances attached to the runtime state.""" - nodes: TypingMapping[str, object] - edges: TypingMapping[str, object] + nodes: Mapping[str, object] + edges: Mapping[str, object] root_node: object def get_outgoing_edges(self, node_id: str) -> Sequence[object]: ... diff --git a/api/core/workflow/runtime/graph_runtime_state_protocol.py b/api/core/workflow/runtime/graph_runtime_state_protocol.py index 40835a936f..5e0878e873 100644 --- a/api/core/workflow/runtime/graph_runtime_state_protocol.py +++ b/api/core/workflow/runtime/graph_runtime_state_protocol.py @@ -3,6 +3,7 @@ from typing import Any, Protocol from core.model_runtime.entities.llm_entities import LLMUsage from core.variables.segments import Segment +from core.workflow.system_variable import SystemVariableReadOnlyView class ReadOnlyVariablePool(Protocol): @@ -30,6 +31,9 @@ class ReadOnlyGraphRuntimeState(Protocol): All methods return defensive copies to ensure immutability. """ + @property + def system_variable(self) -> SystemVariableReadOnlyView: ... + @property def variable_pool(self) -> ReadOnlyVariablePool: """Get read-only access to the variable pool.""" diff --git a/api/core/workflow/runtime/read_only_wrappers.py b/api/core/workflow/runtime/read_only_wrappers.py index 664c365295..8539727fd6 100644 --- a/api/core/workflow/runtime/read_only_wrappers.py +++ b/api/core/workflow/runtime/read_only_wrappers.py @@ -6,6 +6,7 @@ from typing import Any from core.model_runtime.entities.llm_entities import LLMUsage from core.variables.segments import Segment +from core.workflow.system_variable import SystemVariableReadOnlyView from .graph_runtime_state import GraphRuntimeState from .variable_pool import VariablePool @@ -42,6 +43,10 @@ class ReadOnlyGraphRuntimeStateWrapper: self._state = state self._variable_pool_wrapper = ReadOnlyVariablePoolWrapper(state.variable_pool) + @property + def system_variable(self) -> SystemVariableReadOnlyView: + return self._state.variable_pool.system_variables.as_view() + @property def variable_pool(self) -> ReadOnlyVariablePoolWrapper: return self._variable_pool_wrapper diff --git a/api/core/workflow/runtime/variable_pool.py b/api/core/workflow/runtime/variable_pool.py index d41a20dfd7..7fbaec9e70 100644 --- a/api/core/workflow/runtime/variable_pool.py +++ b/api/core/workflow/runtime/variable_pool.py @@ -153,7 +153,11 @@ class VariablePool(BaseModel): return None node_id, name = self._selector_to_keys(selector) - segment: Segment | None = self.variable_dictionary[node_id].get(name) + node_map = self.variable_dictionary.get(node_id) + if node_map is None: + return None + + segment: Segment | None = node_map.get(name) if segment is None: return None diff --git a/api/core/workflow/system_variable.py b/api/core/workflow/system_variable.py index 6716e745cd..ad925912a4 100644 --- a/api/core/workflow/system_variable.py +++ b/api/core/workflow/system_variable.py @@ -1,4 +1,5 @@ from collections.abc import Mapping, Sequence +from types import MappingProxyType from typing import Any from pydantic import AliasChoices, BaseModel, ConfigDict, Field, model_validator @@ -28,6 +29,8 @@ class SystemVariable(BaseModel): app_id: str | None = None workflow_id: str | None = None + timestamp: int | None = None + files: Sequence[File] = Field(default_factory=list) # NOTE: The `workflow_execution_id` field was previously named `workflow_run_id`. @@ -107,4 +110,105 @@ class SystemVariable(BaseModel): d[SystemVariableKey.DATASOURCE_INFO] = self.datasource_info if self.invoke_from is not None: d[SystemVariableKey.INVOKE_FROM] = self.invoke_from + if self.timestamp is not None: + d[SystemVariableKey.TIMESTAMP] = self.timestamp return d + + def as_view(self) -> "SystemVariableReadOnlyView": + return SystemVariableReadOnlyView(self) + + +class SystemVariableReadOnlyView: + """ + A read-only view of a SystemVariable that implements the ReadOnlySystemVariable protocol. + + This class wraps a SystemVariable instance and provides read-only access to all its fields. + It always reads the latest data from the wrapped instance and prevents any write operations. + """ + + def __init__(self, system_variable: SystemVariable) -> None: + """ + Initialize the read-only view with a SystemVariable instance. + + Args: + system_variable: The SystemVariable instance to wrap + """ + self._system_variable = system_variable + + @property + def user_id(self) -> str | None: + return self._system_variable.user_id + + @property + def app_id(self) -> str | None: + return self._system_variable.app_id + + @property + def workflow_id(self) -> str | None: + return self._system_variable.workflow_id + + @property + def workflow_execution_id(self) -> str | None: + return self._system_variable.workflow_execution_id + + @property + def query(self) -> str | None: + return self._system_variable.query + + @property + def conversation_id(self) -> str | None: + return self._system_variable.conversation_id + + @property + def dialogue_count(self) -> int | None: + return self._system_variable.dialogue_count + + @property + def document_id(self) -> str | None: + return self._system_variable.document_id + + @property + def original_document_id(self) -> str | None: + return self._system_variable.original_document_id + + @property + def dataset_id(self) -> str | None: + return self._system_variable.dataset_id + + @property + def batch(self) -> str | None: + return self._system_variable.batch + + @property + def datasource_type(self) -> str | None: + return self._system_variable.datasource_type + + @property + def invoke_from(self) -> str | None: + return self._system_variable.invoke_from + + @property + def files(self) -> Sequence[File]: + """ + Get a copy of the files from the wrapped SystemVariable. + + Returns: + A defensive copy of the files sequence to prevent modification + """ + return tuple(self._system_variable.files) # Convert to immutable tuple + + @property + def datasource_info(self) -> Mapping[str, Any] | None: + """ + Get a copy of the datasource info from the wrapped SystemVariable. + + Returns: + A view of the datasource info mapping to prevent modification + """ + if self._system_variable.datasource_info is None: + return None + return MappingProxyType(self._system_variable.datasource_info) + + def __repr__(self) -> str: + """Return a string representation of the read-only view.""" + return f"SystemVariableReadOnlyView(system_variable={self._system_variable!r})" diff --git a/api/core/workflow/utils/condition/processor.py b/api/core/workflow/utils/condition/processor.py index 650a44c681..c6070b83b8 100644 --- a/api/core/workflow/utils/condition/processor.py +++ b/api/core/workflow/utils/condition/processor.py @@ -265,6 +265,45 @@ def _assert_not_empty(*, value: object) -> bool: return False +def _normalize_numeric_values(value: int | float, expected: object) -> tuple[int | float, int | float]: + """ + Normalize value and expected to compatible numeric types for comparison. + + Args: + value: The actual numeric value (int or float) + expected: The expected value (int, float, or str) + + Returns: + A tuple of (normalized_value, normalized_expected) with compatible types + + Raises: + ValueError: If expected cannot be converted to a number + """ + if not isinstance(expected, (int, float, str)): + raise ValueError(f"Cannot convert {type(expected)} to number") + + # Convert expected to appropriate numeric type + if isinstance(expected, str): + # Try to convert to float first to handle decimal strings + try: + expected_float = float(expected) + except ValueError as e: + raise ValueError(f"Cannot convert '{expected}' to number") from e + + # If value is int and expected is a whole number, keep as int comparison + if isinstance(value, int) and expected_float.is_integer(): + return value, int(expected_float) + else: + # Otherwise convert value to float for comparison + return float(value) if isinstance(value, int) else value, expected_float + elif isinstance(expected, float): + # If expected is already float, convert int value to float + return float(value) if isinstance(value, int) else value, expected + else: + # expected is int + return value, expected + + def _assert_equal(*, value: object, expected: object) -> bool: if value is None: return False @@ -324,18 +363,8 @@ def _assert_greater_than(*, value: object, expected: object) -> bool: if not isinstance(value, (int, float)): raise ValueError("Invalid actual value type: number") - if isinstance(value, int): - if not isinstance(expected, (int, float, str)): - raise ValueError(f"Cannot convert {type(expected)} to int") - expected = int(expected) - else: - if not isinstance(expected, (int, float, str)): - raise ValueError(f"Cannot convert {type(expected)} to float") - expected = float(expected) - - if value <= expected: - return False - return True + value, expected = _normalize_numeric_values(value, expected) + return value > expected def _assert_less_than(*, value: object, expected: object) -> bool: @@ -345,18 +374,8 @@ def _assert_less_than(*, value: object, expected: object) -> bool: if not isinstance(value, (int, float)): raise ValueError("Invalid actual value type: number") - if isinstance(value, int): - if not isinstance(expected, (int, float, str)): - raise ValueError(f"Cannot convert {type(expected)} to int") - expected = int(expected) - else: - if not isinstance(expected, (int, float, str)): - raise ValueError(f"Cannot convert {type(expected)} to float") - expected = float(expected) - - if value >= expected: - return False - return True + value, expected = _normalize_numeric_values(value, expected) + return value < expected def _assert_greater_than_or_equal(*, value: object, expected: object) -> bool: @@ -366,18 +385,8 @@ def _assert_greater_than_or_equal(*, value: object, expected: object) -> bool: if not isinstance(value, (int, float)): raise ValueError("Invalid actual value type: number") - if isinstance(value, int): - if not isinstance(expected, (int, float, str)): - raise ValueError(f"Cannot convert {type(expected)} to int") - expected = int(expected) - else: - if not isinstance(expected, (int, float, str)): - raise ValueError(f"Cannot convert {type(expected)} to float") - expected = float(expected) - - if value < expected: - return False - return True + value, expected = _normalize_numeric_values(value, expected) + return value >= expected def _assert_less_than_or_equal(*, value: object, expected: object) -> bool: @@ -387,18 +396,8 @@ def _assert_less_than_or_equal(*, value: object, expected: object) -> bool: if not isinstance(value, (int, float)): raise ValueError("Invalid actual value type: number") - if isinstance(value, int): - if not isinstance(expected, (int, float, str)): - raise ValueError(f"Cannot convert {type(expected)} to int") - expected = int(expected) - else: - if not isinstance(expected, (int, float, str)): - raise ValueError(f"Cannot convert {type(expected)} to float") - expected = float(expected) - - if value > expected: - return False - return True + value, expected = _normalize_numeric_values(value, expected) + return value <= expected def _assert_null(*, value: object) -> bool: diff --git a/api/core/workflow/workflow_entry.py b/api/core/workflow/workflow_entry.py index 742c42ec2b..a6c6784e39 100644 --- a/api/core/workflow/workflow_entry.py +++ b/api/core/workflow/workflow_entry.py @@ -421,4 +421,10 @@ class WorkflowEntry: if len(variable_key_list) == 2 and variable_key_list[0] == "structured_output": input_value = {variable_key_list[1]: input_value} variable_key_list = variable_key_list[0:1] + + # Support for a single node to reference multiple structured_output variables + current_variable = variable_pool.get([variable_node_id] + variable_key_list) + if current_variable and isinstance(current_variable.value, dict): + input_value = current_variable.value | input_value + variable_pool.add([variable_node_id] + variable_key_list, input_value) diff --git a/api/docker/entrypoint.sh b/api/docker/entrypoint.sh index dfaaf712a5..6313085e64 100755 --- a/api/docker/entrypoint.sh +++ b/api/docker/entrypoint.sh @@ -34,10 +34,10 @@ if [[ "${MODE}" == "worker" ]]; then if [[ -z "${CELERY_QUEUES}" ]]; then if [[ "${EDITION}" == "CLOUD" ]]; then # Cloud edition: separate queues for dataset and trigger tasks - DEFAULT_QUEUES="dataset,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor" + DEFAULT_QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor" else # Community edition (SELF_HOSTED): dataset, pipeline and workflow have separate queues - DEFAULT_QUEUES="dataset,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor" + DEFAULT_QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor" fi else DEFAULT_QUEUES="${CELERY_QUEUES}" diff --git a/api/enums/__init__.py b/api/enums/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/enums/cloud_plan.py b/api/enums/cloud_plan.py new file mode 100644 index 0000000000..927cff5471 --- /dev/null +++ b/api/enums/cloud_plan.py @@ -0,0 +1,15 @@ +from enum import StrEnum, auto + + +class CloudPlan(StrEnum): + """ + Enum representing user plan types in the cloud platform. + + SANDBOX: Free/default plan with limited features + PROFESSIONAL: Professional paid plan + TEAM: Team collaboration paid plan + """ + + SANDBOX = auto() + PROFESSIONAL = auto() + TEAM = auto() diff --git a/api/enums/quota_type.py b/api/enums/quota_type.py new file mode 100644 index 0000000000..9f511b88ef --- /dev/null +++ b/api/enums/quota_type.py @@ -0,0 +1,209 @@ +import logging +from dataclasses import dataclass +from enum import StrEnum, auto + +logger = logging.getLogger(__name__) + + +@dataclass +class QuotaCharge: + """ + Result of a quota consumption operation. + + Attributes: + success: Whether the quota charge succeeded + charge_id: UUID for refund, or None if failed/disabled + """ + + success: bool + charge_id: str | None + _quota_type: "QuotaType" + + def refund(self) -> None: + """ + Refund this quota charge. + + Safe to call even if charge failed or was disabled. + This method guarantees no exceptions will be raised. + """ + if self.charge_id: + self._quota_type.refund(self.charge_id) + logger.info("Refunded quota for %s with charge_id: %s", self._quota_type.value, self.charge_id) + + +class QuotaType(StrEnum): + """ + Supported quota types for tenant feature usage. + + Add additional types here whenever new billable features become available. + """ + + # Trigger execution quota + TRIGGER = auto() + + # Workflow execution quota + WORKFLOW = auto() + + UNLIMITED = auto() + + @property + def billing_key(self) -> str: + """ + Get the billing key for the feature. + """ + match self: + case QuotaType.TRIGGER: + return "trigger_event" + case QuotaType.WORKFLOW: + return "api_rate_limit" + case _: + raise ValueError(f"Invalid quota type: {self}") + + def consume(self, tenant_id: str, amount: int = 1) -> QuotaCharge: + """ + Consume quota for the feature. + + Args: + tenant_id: The tenant identifier + amount: Amount to consume (default: 1) + + Returns: + QuotaCharge with success status and charge_id for refund + + Raises: + QuotaExceededError: When quota is insufficient + """ + from configs import dify_config + from services.billing_service import BillingService + from services.errors.app import QuotaExceededError + + if not dify_config.BILLING_ENABLED: + logger.debug("Billing disabled, allowing request for %s", tenant_id) + return QuotaCharge(success=True, charge_id=None, _quota_type=self) + + logger.info("Consuming %d %s quota for tenant %s", amount, self.value, tenant_id) + + if amount <= 0: + raise ValueError("Amount to consume must be greater than 0") + + try: + response = BillingService.update_tenant_feature_plan_usage(tenant_id, self.billing_key, delta=amount) + + if response.get("result") != "success": + logger.warning( + "Failed to consume quota for %s, feature %s details: %s", + tenant_id, + self.value, + response.get("detail"), + ) + raise QuotaExceededError(feature=self.value, tenant_id=tenant_id, required=amount) + + charge_id = response.get("history_id") + logger.debug( + "Successfully consumed %d %s quota for tenant %s, charge_id: %s", + amount, + self.value, + tenant_id, + charge_id, + ) + return QuotaCharge(success=True, charge_id=charge_id, _quota_type=self) + + except QuotaExceededError: + raise + except Exception: + # fail-safe: allow request on billing errors + logger.exception("Failed to consume quota for %s, feature %s", tenant_id, self.value) + return unlimited() + + def check(self, tenant_id: str, amount: int = 1) -> bool: + """ + Check if tenant has sufficient quota without consuming. + + Args: + tenant_id: The tenant identifier + amount: Amount to check (default: 1) + + Returns: + True if quota is sufficient, False otherwise + """ + from configs import dify_config + + if not dify_config.BILLING_ENABLED: + return True + + if amount <= 0: + raise ValueError("Amount to check must be greater than 0") + + try: + remaining = self.get_remaining(tenant_id) + return remaining >= amount if remaining != -1 else True + except Exception: + logger.exception("Failed to check quota for %s, feature %s", tenant_id, self.value) + # fail-safe: allow request on billing errors + return True + + def refund(self, charge_id: str) -> None: + """ + Refund quota using charge_id from consume(). + + This method guarantees no exceptions will be raised. + All errors are logged but silently handled. + + Args: + charge_id: The UUID returned from consume() + """ + try: + from configs import dify_config + from services.billing_service import BillingService + + if not dify_config.BILLING_ENABLED: + return + + if not charge_id: + logger.warning("Cannot refund: charge_id is empty") + return + + logger.info("Refunding %s quota with charge_id: %s", self.value, charge_id) + + response = BillingService.refund_tenant_feature_plan_usage(charge_id) + if response.get("result") == "success": + logger.debug("Successfully refunded %s quota, charge_id: %s", self.value, charge_id) + else: + logger.warning("Refund failed for charge_id: %s", charge_id) + + except Exception: + # Catch ALL exceptions - refund must never fail + logger.exception("Failed to refund quota for charge_id: %s", charge_id) + # Don't raise - refund is best-effort and must be silent + + def get_remaining(self, tenant_id: str) -> int: + """ + Get remaining quota for the tenant. + + Args: + tenant_id: The tenant identifier + + Returns: + Remaining quota amount + """ + from services.billing_service import BillingService + + try: + usage_info = BillingService.get_tenant_feature_plan_usage(tenant_id, self.billing_key) + # Assuming the API returns a dict with 'remaining' or 'limit' and 'used' + if isinstance(usage_info, dict): + return usage_info.get("remaining", 0) + # If it returns a simple number, treat it as remaining + return int(usage_info) if usage_info else 0 + except Exception: + logger.exception("Failed to get remaining quota for %s, feature %s", tenant_id, self.value) + return -1 + + +def unlimited() -> QuotaCharge: + """ + Return a quota charge for unlimited quota. + + This is useful for features that are not subject to quota limits, such as the UNLIMITED quota type. + """ + return QuotaCharge(success=True, charge_id=None, _quota_type=QuotaType.UNLIMITED) diff --git a/api/events/event_handlers/sync_workflow_schedule_when_app_published.py b/api/events/event_handlers/sync_workflow_schedule_when_app_published.py index ddbb8479d1..168513fc04 100644 --- a/api/events/event_handlers/sync_workflow_schedule_when_app_published.py +++ b/api/events/event_handlers/sync_workflow_schedule_when_app_published.py @@ -1,5 +1,5 @@ import logging -from typing import Optional, cast +from typing import cast from sqlalchemy import select from sqlalchemy.orm import Session @@ -33,7 +33,7 @@ def handle(sender, **kwargs): sync_schedule_from_workflow(tenant_id=app.tenant_id, app_id=app.id, workflow=published_workflow) -def sync_schedule_from_workflow(tenant_id: str, app_id: str, workflow: Workflow) -> Optional[WorkflowSchedulePlan]: +def sync_schedule_from_workflow(tenant_id: str, app_id: str, workflow: Workflow) -> WorkflowSchedulePlan | None: """ Sync schedule plan from workflow graph configuration. diff --git a/api/extensions/ext_redis.py b/api/extensions/ext_redis.py index 487917b2a7..588fbae285 100644 --- a/api/extensions/ext_redis.py +++ b/api/extensions/ext_redis.py @@ -10,7 +10,6 @@ from redis import RedisError from redis.cache import CacheConfig from redis.cluster import ClusterNode, RedisCluster from redis.connection import Connection, SSLConnection -from redis.lock import Lock from redis.sentinel import Sentinel from configs import dify_config diff --git a/api/extensions/ext_storage.py b/api/extensions/ext_storage.py index 2960cde242..a609f13dbc 100644 --- a/api/extensions/ext_storage.py +++ b/api/extensions/ext_storage.py @@ -85,7 +85,7 @@ class Storage: case _: raise ValueError(f"unsupported storage type {storage_type}") - def save(self, filename, data): + def save(self, filename: str, data: bytes): self.storage_runner.save(filename, data) @overload diff --git a/api/extensions/storage/base_storage.py b/api/extensions/storage/base_storage.py index 0393206e54..8ddedb24ae 100644 --- a/api/extensions/storage/base_storage.py +++ b/api/extensions/storage/base_storage.py @@ -8,7 +8,7 @@ class BaseStorage(ABC): """Interface for file storage.""" @abstractmethod - def save(self, filename, data): + def save(self, filename: str, data: bytes): raise NotImplementedError @abstractmethod diff --git a/api/extensions/storage/clickzetta_volume/clickzetta_volume_storage.py b/api/extensions/storage/clickzetta_volume/clickzetta_volume_storage.py index 1cabc57e74..c1608f58a5 100644 --- a/api/extensions/storage/clickzetta_volume/clickzetta_volume_storage.py +++ b/api/extensions/storage/clickzetta_volume/clickzetta_volume_storage.py @@ -45,7 +45,6 @@ class ClickZettaVolumeConfig(BaseModel): This method will first try to use CLICKZETTA_VOLUME_* environment variables, then fall back to CLICKZETTA_* environment variables (for vector DB config). """ - import os # Helper function to get environment variable with fallback def get_env_with_fallback(volume_key: str, fallback_key: str, default: str | None = None) -> str: diff --git a/api/extensions/storage/google_cloud_storage.py b/api/extensions/storage/google_cloud_storage.py index d352996518..7f59252f2f 100644 --- a/api/extensions/storage/google_cloud_storage.py +++ b/api/extensions/storage/google_cloud_storage.py @@ -3,7 +3,7 @@ import io import json from collections.abc import Generator -from google.cloud import storage as google_cloud_storage +from google.cloud import storage as google_cloud_storage # type: ignore from configs import dify_config from extensions.storage.base_storage import BaseStorage diff --git a/api/fields/app_fields.py b/api/fields/app_fields.py index 1f14d663b8..7191933eed 100644 --- a/api/fields/app_fields.py +++ b/api/fields/app_fields.py @@ -116,6 +116,7 @@ app_partial_fields = { "access_mode": fields.String, "create_user_name": fields.String, "author_name": fields.String, + "has_draft_trigger": fields.Boolean, } diff --git a/api/fields/dataset_fields.py b/api/fields/dataset_fields.py index 73002b6736..89c4d8fba9 100644 --- a/api/fields/dataset_fields.py +++ b/api/fields/dataset_fields.py @@ -75,6 +75,7 @@ dataset_detail_fields = { "document_count": fields.Integer, "word_count": fields.Integer, "created_by": fields.String, + "author_name": fields.String, "created_at": TimestampField, "updated_by": fields.String, "updated_at": TimestampField, diff --git a/api/fields/workflow_app_log_fields.py b/api/fields/workflow_app_log_fields.py index 243efd817c..4cbdf6f0ca 100644 --- a/api/fields/workflow_app_log_fields.py +++ b/api/fields/workflow_app_log_fields.py @@ -8,6 +8,7 @@ from libs.helper import TimestampField workflow_app_log_partial_fields = { "id": fields.String, "workflow_run": fields.Nested(workflow_run_for_log_fields, attribute="workflow_run", allow_null=True), + "details": fields.Raw(attribute="details"), "created_from": fields.String, "created_by_role": fields.String, "created_by_account": fields.Nested(simple_account_fields, attribute="created_by_account", allow_null=True), diff --git a/api/gunicorn.conf.py b/api/gunicorn.conf.py index 943ee100ca..da75d25ba6 100644 --- a/api/gunicorn.conf.py +++ b/api/gunicorn.conf.py @@ -2,6 +2,19 @@ import psycogreen.gevent as pscycogreen_gevent # type: ignore from gevent import events as gevent_events from grpc.experimental import gevent as grpc_gevent # type: ignore +# WARNING: This module is loaded very early in the Gunicorn worker lifecycle, +# before gevent's monkey-patching is applied. Importing modules at the top level here can +# interfere with gevent's ability to properly patch the standard library, +# potentially causing subtle and difficult-to-diagnose bugs. +# +# To ensure correct behavior, defer any initialization or imports that depend on monkey-patching +# to the `post_patch` hook below, or use a gevent_events subscriber as shown. +# +# For further context, see: https://github.com/langgenius/dify/issues/26689 +# +# Note: The `post_fork` hook is also executed before monkey-patching, +# so moving imports there does not resolve this issue. + # NOTE(QuantumGhost): here we cannot use post_fork to patch gRPC, as # grpc_gevent.init_gevent must be called after patching stdlib. # Gunicorn calls `post_init` before applying monkey patch. @@ -11,7 +24,7 @@ from grpc.experimental import gevent as grpc_gevent # type: ignore # ref: # - https://github.com/grpc/grpc/blob/62533ea13879d6ee95c6fda11ec0826ca822c9dd/src/python/grpcio/grpc/experimental/gevent.py # - https://github.com/gevent/gevent/issues/2060#issuecomment-3016768668 -# - https://github.com/benoitc/gunicorn/blob/master/gunicorn/arbiter.py#L607-L613 +# - https://github.com/benoitc/gunicorn/blob/23.0.0/gunicorn/arbiter.py#L605-L609 def post_patch(event): diff --git a/api/libs/broadcast_channel/channel.py b/api/libs/broadcast_channel/channel.py new file mode 100644 index 0000000000..5bbf0c79a3 --- /dev/null +++ b/api/libs/broadcast_channel/channel.py @@ -0,0 +1,134 @@ +""" +Broadcast channel for Pub/Sub messaging. +""" + +import types +from abc import abstractmethod +from collections.abc import Iterator +from contextlib import AbstractContextManager +from typing import Protocol, Self + + +class Subscription(AbstractContextManager["Subscription"], Protocol): + """A subscription to a topic that provides an iterator over received messages. + The subscription can be used as a context manager and will automatically + close when exiting the context. + + Note: `Subscription` instances are not thread-safe. Each thread should create its own + subscription. + """ + + @abstractmethod + def __iter__(self) -> Iterator[bytes]: + """`__iter__` returns an iterator used to consume the message from this subscription. + + If the caller did not enter the context, `__iter__` may lazily perform the setup before + yielding messages; otherwise `__enter__` handles it.” + + If the subscription is closed, then the returned iterator exits without + raising any error. + """ + ... + + @abstractmethod + def close(self) -> None: + """close closes the subscription, releases any resources associated with it.""" + ... + + def __enter__(self) -> Self: + """`__enter__` does the setup logic of the subscription (if any), and return itself.""" + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: types.TracebackType | None, + ) -> bool | None: + self.close() + return None + + @abstractmethod + def receive(self, timeout: float | None = 0.1) -> bytes | None: + """Receive the next message from the broadcast channel. + + If `timeout` is specified, this method returns `None` if no message is + received within the given period. If `timeout` is `None`, the call blocks + until a message is received. + + Calling receive with `timeout=None` is highly discouraged, as it is impossible to + cancel a blocking subscription. + + :param timeout: timeout for receive message, in seconds. + + Returns: + bytes: The received message as a byte string, or + None: If the timeout expires before a message is received. + + Raises: + SubscriptionClosed: If the subscription has already been closed. + """ + ... + + +class Producer(Protocol): + """Producer is an interface for message publishing. It is already bound to a specific topic. + + `Producer` implementations must be thread-safe and support concurrent use by multiple threads. + """ + + @abstractmethod + def publish(self, payload: bytes) -> None: + """Publish a message to the bounded topic.""" + ... + + +class Subscriber(Protocol): + """Subscriber is an interface for subscription creation. It is already bound to a specific topic. + + `Subscriber` implementations must be thread-safe and support concurrent use by multiple threads. + """ + + @abstractmethod + def subscribe(self) -> Subscription: + pass + + +class Topic(Producer, Subscriber, Protocol): + """A named channel for publishing and subscribing to messages. + + Topics provide both read and write access. For restricted access, + use as_producer() for write-only view or as_subscriber() for read-only view. + + `Topic` implementations must be thread-safe and support concurrent use by multiple threads. + """ + + @abstractmethod + def as_producer(self) -> Producer: + """as_producer creates a write-only view for this topic.""" + ... + + @abstractmethod + def as_subscriber(self) -> Subscriber: + """as_subscriber create a read-only view for this topic.""" + ... + + +class BroadcastChannel(Protocol): + """A broadcasting channel is a channel supporting broadcasting semantics. + + Each channel is identified by a topic, different topics are isolated and do not affect each other. + + There can be multiple subscriptions to a specific topic. When a publisher publishes a message to + a specific topic, all subscription should receive the published message. + + There are no restriction for the persistence of messages. Once a subscription is created, it + should receive all subsequent messages published. + + `BroadcastChannel` implementations must be thread-safe and support concurrent use by multiple threads. + """ + + @abstractmethod + def topic(self, topic: str) -> "Topic": + """topic returns a `Topic` instance for the given topic name.""" + ... diff --git a/api/libs/broadcast_channel/exc.py b/api/libs/broadcast_channel/exc.py new file mode 100644 index 0000000000..ab958c94ed --- /dev/null +++ b/api/libs/broadcast_channel/exc.py @@ -0,0 +1,12 @@ +class BroadcastChannelError(Exception): + """`BroadcastChannelError` is the base class for all exceptions related + to `BroadcastChannel`.""" + + pass + + +class SubscriptionClosedError(BroadcastChannelError): + """SubscriptionClosedError means that the subscription has been closed and + methods for consuming messages should not be called.""" + + pass diff --git a/api/libs/broadcast_channel/redis/__init__.py b/api/libs/broadcast_channel/redis/__init__.py new file mode 100644 index 0000000000..f92c94f736 --- /dev/null +++ b/api/libs/broadcast_channel/redis/__init__.py @@ -0,0 +1,4 @@ +from .channel import BroadcastChannel +from .sharded_channel import ShardedRedisBroadcastChannel + +__all__ = ["BroadcastChannel", "ShardedRedisBroadcastChannel"] diff --git a/api/libs/broadcast_channel/redis/_subscription.py b/api/libs/broadcast_channel/redis/_subscription.py new file mode 100644 index 0000000000..571ad87468 --- /dev/null +++ b/api/libs/broadcast_channel/redis/_subscription.py @@ -0,0 +1,205 @@ +import logging +import queue +import threading +import types +from collections.abc import Generator, Iterator +from typing import Self + +from libs.broadcast_channel.channel import Subscription +from libs.broadcast_channel.exc import SubscriptionClosedError +from redis.client import PubSub + +_logger = logging.getLogger(__name__) + + +class RedisSubscriptionBase(Subscription): + """Base class for Redis pub/sub subscriptions with common functionality. + + This class provides shared functionality for both regular and sharded + Redis pub/sub subscriptions, reducing code duplication and improving + maintainability. + """ + + def __init__( + self, + pubsub: PubSub, + topic: str, + ): + # The _pubsub is None only if the subscription is closed. + self._pubsub: PubSub | None = pubsub + self._topic = topic + self._closed = threading.Event() + self._queue: queue.Queue[bytes] = queue.Queue(maxsize=1024) + self._dropped_count = 0 + self._listener_thread: threading.Thread | None = None + self._start_lock = threading.Lock() + self._started = False + + def _start_if_needed(self) -> None: + """Start the subscription if not already started.""" + with self._start_lock: + if self._started: + return + if self._closed.is_set(): + raise SubscriptionClosedError(f"The Redis {self._get_subscription_type()} subscription is closed") + if self._pubsub is None: + raise SubscriptionClosedError( + f"The Redis {self._get_subscription_type()} subscription has been cleaned up" + ) + + self._subscribe() + _logger.debug("Subscribed to %s channel %s", self._get_subscription_type(), self._topic) + + self._listener_thread = threading.Thread( + target=self._listen, + name=f"redis-{self._get_subscription_type().replace(' ', '-')}-broadcast-{self._topic}", + daemon=True, + ) + self._listener_thread.start() + self._started = True + + def _listen(self) -> None: + """Main listener loop for processing messages.""" + pubsub = self._pubsub + assert pubsub is not None, "PubSub should not be None while starting listening." + while not self._closed.is_set(): + raw_message = self._get_message() + + if raw_message is None: + continue + + if raw_message.get("type") != self._get_message_type(): + continue + + channel_field = raw_message.get("channel") + if isinstance(channel_field, bytes): + channel_name = channel_field.decode("utf-8") + elif isinstance(channel_field, str): + channel_name = channel_field + else: + channel_name = str(channel_field) + + if channel_name != self._topic: + _logger.warning( + "Ignoring %s message from unexpected channel %s", self._get_subscription_type(), channel_name + ) + continue + + payload_bytes: bytes | None = raw_message.get("data") + if not isinstance(payload_bytes, bytes): + _logger.error( + "Received invalid data from %s channel %s, type=%s", + self._get_subscription_type(), + self._topic, + type(payload_bytes), + ) + continue + + self._enqueue_message(payload_bytes) + + _logger.debug("%s listener thread stopped for channel %s", self._get_subscription_type().title(), self._topic) + self._unsubscribe() + pubsub.close() + _logger.debug("%s PubSub closed for topic %s", self._get_subscription_type().title(), self._topic) + self._pubsub = None + + def _enqueue_message(self, payload: bytes) -> None: + """Enqueue a message to the internal queue with dropping behavior.""" + while not self._closed.is_set(): + try: + self._queue.put_nowait(payload) + return + except queue.Full: + try: + self._queue.get_nowait() + self._dropped_count += 1 + _logger.debug( + "Dropped message from Redis %s subscription, topic=%s, total_dropped=%d", + self._get_subscription_type(), + self._topic, + self._dropped_count, + ) + except queue.Empty: + continue + return + + def _message_iterator(self) -> Generator[bytes, None, None]: + """Iterator for consuming messages from the subscription.""" + while not self._closed.is_set(): + try: + item = self._queue.get(timeout=0.1) + except queue.Empty: + continue + + yield item + + def __iter__(self) -> Iterator[bytes]: + """Return an iterator over messages from the subscription.""" + if self._closed.is_set(): + raise SubscriptionClosedError(f"The Redis {self._get_subscription_type()} subscription is closed") + self._start_if_needed() + return iter(self._message_iterator()) + + def receive(self, timeout: float | None = None) -> bytes | None: + """Receive the next message from the subscription.""" + if self._closed.is_set(): + raise SubscriptionClosedError(f"The Redis {self._get_subscription_type()} subscription is closed") + self._start_if_needed() + + try: + item = self._queue.get(timeout=timeout) + except queue.Empty: + return None + + return item + + def __enter__(self) -> Self: + """Context manager entry point.""" + self._start_if_needed() + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: types.TracebackType | None, + ) -> bool | None: + """Context manager exit point.""" + self.close() + return None + + def close(self) -> None: + """Close the subscription and clean up resources.""" + if self._closed.is_set(): + return + + self._closed.set() + # NOTE: PubSub is not thread-safe. More specifically, the `PubSub.close` method and the + # message retrieval method should NOT be called concurrently. + # + # Due to the restriction above, the PubSub cleanup logic happens inside the consumer thread. + listener = self._listener_thread + if listener is not None: + listener.join(timeout=1.0) + self._listener_thread = None + + # Abstract methods to be implemented by subclasses + def _get_subscription_type(self) -> str: + """Return the subscription type (e.g., 'regular' or 'sharded').""" + raise NotImplementedError + + def _subscribe(self) -> None: + """Subscribe to the Redis topic using the appropriate command.""" + raise NotImplementedError + + def _unsubscribe(self) -> None: + """Unsubscribe from the Redis topic using the appropriate command.""" + raise NotImplementedError + + def _get_message(self) -> dict | None: + """Get a message from Redis using the appropriate method.""" + raise NotImplementedError + + def _get_message_type(self) -> str: + """Return the expected message type (e.g., 'message' or 'smessage').""" + raise NotImplementedError diff --git a/api/libs/broadcast_channel/redis/channel.py b/api/libs/broadcast_channel/redis/channel.py new file mode 100644 index 0000000000..1fc3db8156 --- /dev/null +++ b/api/libs/broadcast_channel/redis/channel.py @@ -0,0 +1,67 @@ +from libs.broadcast_channel.channel import Producer, Subscriber, Subscription +from redis import Redis + +from ._subscription import RedisSubscriptionBase + + +class BroadcastChannel: + """ + Redis Pub/Sub based broadcast channel implementation (regular, non-sharded). + + Provides "at most once" delivery semantics for messages published to channels + using Redis PUBLISH/SUBSCRIBE commands for real-time message delivery. + + The `redis_client` used to construct BroadcastChannel should have `decode_responses` set to `False`. + """ + + def __init__( + self, + redis_client: Redis, + ): + self._client = redis_client + + def topic(self, topic: str) -> "Topic": + return Topic(self._client, topic) + + +class Topic: + def __init__(self, redis_client: Redis, topic: str): + self._client = redis_client + self._topic = topic + + def as_producer(self) -> Producer: + return self + + def publish(self, payload: bytes) -> None: + self._client.publish(self._topic, payload) + + def as_subscriber(self) -> Subscriber: + return self + + def subscribe(self) -> Subscription: + return _RedisSubscription( + pubsub=self._client.pubsub(), + topic=self._topic, + ) + + +class _RedisSubscription(RedisSubscriptionBase): + """Regular Redis pub/sub subscription implementation.""" + + def _get_subscription_type(self) -> str: + return "regular" + + def _subscribe(self) -> None: + assert self._pubsub is not None + self._pubsub.subscribe(self._topic) + + def _unsubscribe(self) -> None: + assert self._pubsub is not None + self._pubsub.unsubscribe(self._topic) + + def _get_message(self) -> dict | None: + assert self._pubsub is not None + return self._pubsub.get_message(ignore_subscribe_messages=True, timeout=0.1) + + def _get_message_type(self) -> str: + return "message" diff --git a/api/libs/broadcast_channel/redis/sharded_channel.py b/api/libs/broadcast_channel/redis/sharded_channel.py new file mode 100644 index 0000000000..16e3a80ee1 --- /dev/null +++ b/api/libs/broadcast_channel/redis/sharded_channel.py @@ -0,0 +1,65 @@ +from libs.broadcast_channel.channel import Producer, Subscriber, Subscription +from redis import Redis + +from ._subscription import RedisSubscriptionBase + + +class ShardedRedisBroadcastChannel: + """ + Redis 7.0+ Sharded Pub/Sub based broadcast channel implementation. + + Provides "at most once" delivery semantics using SPUBLISH/SSUBSCRIBE commands, + distributing channels across Redis cluster nodes for better scalability. + """ + + def __init__( + self, + redis_client: Redis, + ): + self._client = redis_client + + def topic(self, topic: str) -> "ShardedTopic": + return ShardedTopic(self._client, topic) + + +class ShardedTopic: + def __init__(self, redis_client: Redis, topic: str): + self._client = redis_client + self._topic = topic + + def as_producer(self) -> Producer: + return self + + def publish(self, payload: bytes) -> None: + self._client.spublish(self._topic, payload) # type: ignore[attr-defined] + + def as_subscriber(self) -> Subscriber: + return self + + def subscribe(self) -> Subscription: + return _RedisShardedSubscription( + pubsub=self._client.pubsub(), + topic=self._topic, + ) + + +class _RedisShardedSubscription(RedisSubscriptionBase): + """Redis 7.0+ sharded pub/sub subscription implementation.""" + + def _get_subscription_type(self) -> str: + return "sharded" + + def _subscribe(self) -> None: + assert self._pubsub is not None + self._pubsub.ssubscribe(self._topic) # type: ignore[attr-defined] + + def _unsubscribe(self) -> None: + assert self._pubsub is not None + self._pubsub.sunsubscribe(self._topic) # type: ignore[attr-defined] + + def _get_message(self) -> dict | None: + assert self._pubsub is not None + return self._pubsub.get_sharded_message(ignore_subscribe_messages=True, timeout=0.1) # type: ignore[attr-defined] + + def _get_message_type(self) -> str: + return "smessage" diff --git a/api/libs/datetime_utils.py b/api/libs/datetime_utils.py index e576a34629..c08578981b 100644 --- a/api/libs/datetime_utils.py +++ b/api/libs/datetime_utils.py @@ -2,6 +2,8 @@ import abc import datetime from typing import Protocol +import pytz + class _NowFunction(Protocol): @abc.abstractmethod @@ -20,3 +22,62 @@ def naive_utc_now() -> datetime.datetime: representing current UTC time. """ return _now_func(datetime.UTC).replace(tzinfo=None) + + +def ensure_naive_utc(dt: datetime.datetime) -> datetime.datetime: + """Return the datetime as naive UTC (tzinfo=None). + + If the input is timezone-aware, convert to UTC and drop the tzinfo. + Assumes naive datetimes are already expressed in UTC. + """ + if dt.tzinfo is None: + return dt + return dt.astimezone(datetime.UTC).replace(tzinfo=None) + + +def parse_time_range( + start: str | None, end: str | None, tzname: str +) -> tuple[datetime.datetime | None, datetime.datetime | None]: + """ + Parse time range strings and convert to UTC datetime objects. + Handles DST ambiguity and non-existent times gracefully. + + Args: + start: Start time string (YYYY-MM-DD HH:MM) + end: End time string (YYYY-MM-DD HH:MM) + tzname: Timezone name + + Returns: + tuple: (start_datetime_utc, end_datetime_utc) + + Raises: + ValueError: When time range is invalid or start > end + """ + tz = pytz.timezone(tzname) + utc = pytz.utc + + def _parse(time_str: str | None, label: str) -> datetime.datetime | None: + if not time_str: + return None + + try: + dt = datetime.datetime.strptime(time_str, "%Y-%m-%d %H:%M").replace(second=0) + except ValueError as e: + raise ValueError(f"Invalid {label} time format: {e}") + + try: + return tz.localize(dt, is_dst=None).astimezone(utc) + except pytz.AmbiguousTimeError: + return tz.localize(dt, is_dst=False).astimezone(utc) + except pytz.NonExistentTimeError: + dt += datetime.timedelta(hours=1) + return tz.localize(dt, is_dst=None).astimezone(utc) + + start_dt = _parse(start, "start") + end_dt = _parse(end, "end") + + # Range validation + if start_dt and end_dt and start_dt > end_dt: + raise ValueError("start must be earlier than or equal to end") + + return start_dt, end_dt diff --git a/api/libs/email_i18n.py b/api/libs/email_i18n.py index 37ff1a438e..ff74ccbe8e 100644 --- a/api/libs/email_i18n.py +++ b/api/libs/email_i18n.py @@ -38,6 +38,12 @@ class EmailType(StrEnum): EMAIL_REGISTER = auto() EMAIL_REGISTER_WHEN_ACCOUNT_EXIST = auto() RESET_PASSWORD_WHEN_ACCOUNT_NOT_EXIST_NO_REGISTER = auto() + TRIGGER_EVENTS_LIMIT_SANDBOX = auto() + TRIGGER_EVENTS_LIMIT_PROFESSIONAL = auto() + TRIGGER_EVENTS_USAGE_WARNING_SANDBOX = auto() + TRIGGER_EVENTS_USAGE_WARNING_PROFESSIONAL = auto() + API_RATE_LIMIT_LIMIT_SANDBOX = auto() + API_RATE_LIMIT_WARNING_SANDBOX = auto() class EmailLanguage(StrEnum): @@ -445,6 +451,78 @@ def create_default_email_config() -> EmailI18nConfig: branded_template_path="clean_document_job_mail_template_zh-CN.html", ), }, + EmailType.TRIGGER_EVENTS_LIMIT_SANDBOX: { + EmailLanguage.EN_US: EmailTemplate( + subject="You’ve reached your Sandbox Trigger Events limit", + template_path="trigger_events_limit_template_en-US.html", + branded_template_path="without-brand/trigger_events_limit_template_en-US.html", + ), + EmailLanguage.ZH_HANS: EmailTemplate( + subject="您的 Sandbox 触发事件额度已用尽", + template_path="trigger_events_limit_template_zh-CN.html", + branded_template_path="without-brand/trigger_events_limit_template_zh-CN.html", + ), + }, + EmailType.TRIGGER_EVENTS_LIMIT_PROFESSIONAL: { + EmailLanguage.EN_US: EmailTemplate( + subject="You’ve reached your monthly Trigger Events limit", + template_path="trigger_events_limit_template_en-US.html", + branded_template_path="without-brand/trigger_events_limit_template_en-US.html", + ), + EmailLanguage.ZH_HANS: EmailTemplate( + subject="您的月度触发事件额度已用尽", + template_path="trigger_events_limit_template_zh-CN.html", + branded_template_path="without-brand/trigger_events_limit_template_zh-CN.html", + ), + }, + EmailType.TRIGGER_EVENTS_USAGE_WARNING_SANDBOX: { + EmailLanguage.EN_US: EmailTemplate( + subject="You’re nearing your Sandbox Trigger Events limit", + template_path="trigger_events_usage_warning_template_en-US.html", + branded_template_path="without-brand/trigger_events_usage_warning_template_en-US.html", + ), + EmailLanguage.ZH_HANS: EmailTemplate( + subject="您的 Sandbox 触发事件额度接近上限", + template_path="trigger_events_usage_warning_template_zh-CN.html", + branded_template_path="without-brand/trigger_events_usage_warning_template_zh-CN.html", + ), + }, + EmailType.TRIGGER_EVENTS_USAGE_WARNING_PROFESSIONAL: { + EmailLanguage.EN_US: EmailTemplate( + subject="You’re nearing your Monthly Trigger Events limit", + template_path="trigger_events_usage_warning_template_en-US.html", + branded_template_path="without-brand/trigger_events_usage_warning_template_en-US.html", + ), + EmailLanguage.ZH_HANS: EmailTemplate( + subject="您的月度触发事件额度接近上限", + template_path="trigger_events_usage_warning_template_zh-CN.html", + branded_template_path="without-brand/trigger_events_usage_warning_template_zh-CN.html", + ), + }, + EmailType.API_RATE_LIMIT_LIMIT_SANDBOX: { + EmailLanguage.EN_US: EmailTemplate( + subject="You’ve reached your API Rate Limit", + template_path="api_rate_limit_limit_template_en-US.html", + branded_template_path="without-brand/api_rate_limit_limit_template_en-US.html", + ), + EmailLanguage.ZH_HANS: EmailTemplate( + subject="您的 API 速率额度已用尽", + template_path="api_rate_limit_limit_template_zh-CN.html", + branded_template_path="without-brand/api_rate_limit_limit_template_zh-CN.html", + ), + }, + EmailType.API_RATE_LIMIT_WARNING_SANDBOX: { + EmailLanguage.EN_US: EmailTemplate( + subject="You’re nearing your API Rate Limit", + template_path="api_rate_limit_warning_template_en-US.html", + branded_template_path="without-brand/api_rate_limit_warning_template_en-US.html", + ), + EmailLanguage.ZH_HANS: EmailTemplate( + subject="您的 API 速率额度接近上限", + template_path="api_rate_limit_warning_template_zh-CN.html", + branded_template_path="without-brand/api_rate_limit_warning_template_zh-CN.html", + ), + }, EmailType.EMAIL_REGISTER: { EmailLanguage.EN_US: EmailTemplate( subject="Register Your {application_title} Account", diff --git a/api/libs/external_api.py b/api/libs/external_api.py index 1a4fde960c..61a90ee4a9 100644 --- a/api/libs/external_api.py +++ b/api/libs/external_api.py @@ -9,9 +9,8 @@ from werkzeug.exceptions import HTTPException from werkzeug.http import HTTP_STATUS_CODES from configs import dify_config -from constants import COOKIE_NAME_ACCESS_TOKEN, COOKIE_NAME_CSRF_TOKEN, COOKIE_NAME_REFRESH_TOKEN from core.errors.error import AppInvokeQuotaExceededError -from libs.token import is_secure +from libs.token import build_force_logout_cookie_headers def http_status_message(code): @@ -73,15 +72,7 @@ def register_external_error_handlers(api: Api): error_code = getattr(e, "error_code", None) if error_code == "unauthorized_and_force_logout": # Add Set-Cookie headers to clear auth cookies - - secure = is_secure() - # response is not accessible, so we need to do it ugly - common_part = "Path=/; Expires=Thu, 01 Jan 1970 00:00:00 GMT; HttpOnly" - headers["Set-Cookie"] = [ - f'{COOKIE_NAME_ACCESS_TOKEN}=""; {common_part}{"; Secure" if secure else ""}; SameSite=Lax', - f'{COOKIE_NAME_CSRF_TOKEN}=""; {common_part}{"; Secure" if secure else ""}; SameSite=Lax', - f'{COOKIE_NAME_REFRESH_TOKEN}=""; {common_part}{"; Secure" if secure else ""}; SameSite=Lax', - ] + headers["Set-Cookie"] = build_force_logout_cookie_headers() return data, status_code, headers _ = handle_http_exception diff --git a/api/libs/helper.py b/api/libs/helper.py index 60484dd40b..1013c3b878 100644 --- a/api/libs/helper.py +++ b/api/libs/helper.py @@ -177,6 +177,15 @@ def timezone(timezone_string): raise ValueError(error) +def convert_datetime_to_date(field, target_timezone: str = ":tz"): + if dify_config.DB_TYPE == "postgresql": + return f"DATE(DATE_TRUNC('day', {field} AT TIME ZONE 'UTC' AT TIME ZONE {target_timezone}))" + elif dify_config.DB_TYPE == "mysql": + return f"DATE(CONVERT_TZ({field}, 'UTC', {target_timezone}))" + else: + raise NotImplementedError(f"Unsupported database type: {dify_config.DB_TYPE}") + + def generate_string(n): letters_digits = string.ascii_letters + string.digits result = "" diff --git a/api/libs/schedule_utils.py b/api/libs/schedule_utils.py index 3d70ef2ef2..1ab5f499e9 100644 --- a/api/libs/schedule_utils.py +++ b/api/libs/schedule_utils.py @@ -1,5 +1,4 @@ from datetime import UTC, datetime -from typing import Optional import pytz from croniter import croniter @@ -8,7 +7,7 @@ from croniter import croniter def calculate_next_run_at( cron_expression: str, timezone: str, - base_time: Optional[datetime] = None, + base_time: datetime | None = None, ) -> datetime: """ Calculate the next run time for a cron expression in a specific timezone. diff --git a/api/libs/token.py b/api/libs/token.py index b53663c89a..098ff958da 100644 --- a/api/libs/token.py +++ b/api/libs/token.py @@ -30,8 +30,22 @@ def is_secure() -> bool: return dify_config.CONSOLE_WEB_URL.startswith("https") and dify_config.CONSOLE_API_URL.startswith("https") +def _cookie_domain() -> str | None: + """ + Returns the normalized cookie domain. + + Leading dots are stripped from the configured domain. Historically, a leading dot + indicated that a cookie should be sent to all subdomains, but modern browsers treat + 'example.com' and '.example.com' identically. This normalization ensures consistent + behavior and avoids confusion. + """ + domain = dify_config.COOKIE_DOMAIN.strip() + domain = domain.removeprefix(".") + return domain or None + + def _real_cookie_name(cookie_name: str) -> str: - if is_secure(): + if is_secure() and _cookie_domain() is None: return "__Host-" + cookie_name else: return cookie_name @@ -91,6 +105,7 @@ def set_access_token_to_cookie(request: Request, response: Response, token: str, _real_cookie_name(COOKIE_NAME_ACCESS_TOKEN), value=token, httponly=True, + domain=_cookie_domain(), secure=is_secure(), samesite=samesite, max_age=int(dify_config.ACCESS_TOKEN_EXPIRE_MINUTES * 60), @@ -103,6 +118,7 @@ def set_refresh_token_to_cookie(request: Request, response: Response, token: str _real_cookie_name(COOKIE_NAME_REFRESH_TOKEN), value=token, httponly=True, + domain=_cookie_domain(), secure=is_secure(), samesite="Lax", max_age=int(60 * 60 * 24 * dify_config.REFRESH_TOKEN_EXPIRE_DAYS), @@ -115,6 +131,7 @@ def set_csrf_token_to_cookie(request: Request, response: Response, token: str): _real_cookie_name(COOKIE_NAME_CSRF_TOKEN), value=token, httponly=False, + domain=_cookie_domain(), secure=is_secure(), samesite="Lax", max_age=int(60 * dify_config.ACCESS_TOKEN_EXPIRE_MINUTES), @@ -133,6 +150,7 @@ def _clear_cookie( "", expires=0, path="/", + domain=_cookie_domain(), secure=is_secure(), httponly=http_only, samesite=samesite, @@ -155,6 +173,19 @@ def clear_csrf_token_from_cookie(response: Response): _clear_cookie(response, COOKIE_NAME_CSRF_TOKEN, http_only=False) +def build_force_logout_cookie_headers() -> list[str]: + """ + Generate Set-Cookie header values that clear all auth-related cookies. + This mirrors the behavior of the standard cookie clearing helpers while + allowing callers that do not have a Response instance to reuse the logic. + """ + response = Response() + clear_access_token_from_cookie(response) + clear_csrf_token_from_cookie(response) + clear_refresh_token_from_cookie(response) + return response.headers.getlist("Set-Cookie") + + def check_csrf_token(request: Request, user_id: str): # some apis are sent by beacon, so we need to bypass csrf token check # since these APIs are post, they are already protected by SameSite: Lax, so csrf is not required. diff --git a/api/migrations/versions/00bacef91f18_rename_api_provider_description.py b/api/migrations/versions/00bacef91f18_rename_api_provider_description.py index 5ae9e8769a..17ed067d81 100644 --- a/api/migrations/versions/00bacef91f18_rename_api_provider_description.py +++ b/api/migrations/versions/00bacef91f18_rename_api_provider_description.py @@ -8,6 +8,12 @@ Create Date: 2024-01-07 04:07:34.482983 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '00bacef91f18' down_revision = '8ec536f3c800' @@ -17,17 +23,31 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('description', sa.Text(), nullable=False)) - batch_op.drop_column('description_str') + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('description', sa.Text(), nullable=False)) + batch_op.drop_column('description_str') + else: + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('description', models.types.LongText(), nullable=False)) + batch_op.drop_column('description_str') # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('description_str', sa.TEXT(), autoincrement=False, nullable=False)) - batch_op.drop_column('description') + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('description_str', sa.TEXT(), autoincrement=False, nullable=False)) + batch_op.drop_column('description') + else: + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('description_str', models.types.LongText(), autoincrement=False, nullable=False)) + batch_op.drop_column('description') # ### end Alembic commands ### diff --git a/api/migrations/versions/04c602f5dc9b_update_appmodelconfig_and_add_table_.py b/api/migrations/versions/04c602f5dc9b_update_appmodelconfig_and_add_table_.py index 153861a71a..f64e16db7f 100644 --- a/api/migrations/versions/04c602f5dc9b_update_appmodelconfig_and_add_table_.py +++ b/api/migrations/versions/04c602f5dc9b_update_appmodelconfig_and_add_table_.py @@ -10,6 +10,10 @@ from alembic import op import models.types + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '04c602f5dc9b' down_revision = '4ff534e1eb11' @@ -19,15 +23,28 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tracing_app_configs', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', models.types.StringUUID(), nullable=False), - sa.Column('tracing_provider', sa.String(length=255), nullable=True), - sa.Column('tracing_config', sa.JSON(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tracing_app_config_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('tracing_app_configs', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('tracing_provider', sa.String(length=255), nullable=True), + sa.Column('tracing_config', sa.JSON(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tracing_app_config_pkey') + ) + else: + op.create_table('tracing_app_configs', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('tracing_provider', sa.String(length=255), nullable=True), + sa.Column('tracing_config', sa.JSON(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.now(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.now(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tracing_app_config_pkey') + ) # ### end Alembic commands ### diff --git a/api/migrations/versions/053da0c1d756_add_api_tool_privacy.py b/api/migrations/versions/053da0c1d756_add_api_tool_privacy.py index a589f1f08b..2f54763f00 100644 --- a/api/migrations/versions/053da0c1d756_add_api_tool_privacy.py +++ b/api/migrations/versions/053da0c1d756_add_api_tool_privacy.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '053da0c1d756' down_revision = '4829e54d2fee' @@ -18,16 +24,31 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tool_conversation_variables', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('user_id', postgresql.UUID(), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('conversation_id', postgresql.UUID(), nullable=False), - sa.Column('variables_str', sa.Text(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tool_conversation_variables_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('tool_conversation_variables', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('user_id', postgresql.UUID(), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('conversation_id', postgresql.UUID(), nullable=False), + sa.Column('variables_str', sa.Text(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_conversation_variables_pkey') + ) + else: + op.create_table('tool_conversation_variables', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('user_id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('conversation_id', models.types.StringUUID(), nullable=False), + sa.Column('variables_str', models.types.LongText(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_conversation_variables_pkey') + ) + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: batch_op.add_column(sa.Column('privacy_policy', sa.String(length=255), nullable=True)) batch_op.alter_column('icon', diff --git a/api/migrations/versions/114eed84c228_remove_tool_id_from_model_invoke.py b/api/migrations/versions/114eed84c228_remove_tool_id_from_model_invoke.py index 58863fe3a7..ed70bf5d08 100644 --- a/api/migrations/versions/114eed84c228_remove_tool_id_from_model_invoke.py +++ b/api/migrations/versions/114eed84c228_remove_tool_id_from_model_invoke.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '114eed84c228' down_revision = 'c71211c8f604' @@ -26,7 +32,13 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('tool_model_invokes', schema=None) as batch_op: - batch_op.add_column(sa.Column('tool_id', postgresql.UUID(), autoincrement=False, nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('tool_model_invokes', schema=None) as batch_op: + batch_op.add_column(sa.Column('tool_id', postgresql.UUID(), autoincrement=False, nullable=False)) + else: + with op.batch_alter_table('tool_model_invokes', schema=None) as batch_op: + batch_op.add_column(sa.Column('tool_id', models.types.StringUUID(), autoincrement=False, nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/161cadc1af8d_add_dataset_permission_tenant_id.py b/api/migrations/versions/161cadc1af8d_add_dataset_permission_tenant_id.py index 8907f78117..509bd5d0e8 100644 --- a/api/migrations/versions/161cadc1af8d_add_dataset_permission_tenant_id.py +++ b/api/migrations/versions/161cadc1af8d_add_dataset_permission_tenant_id.py @@ -8,7 +8,11 @@ Create Date: 2024-07-05 14:30:59.472593 import sqlalchemy as sa from alembic import op -import models as models +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" # revision identifiers, used by Alembic. revision = '161cadc1af8d' @@ -19,9 +23,16 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('dataset_permissions', schema=None) as batch_op: - # Step 1: Add column without NOT NULL constraint - op.add_column('dataset_permissions', sa.Column('tenant_id', sa.UUID(), nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('dataset_permissions', schema=None) as batch_op: + # Step 1: Add column without NOT NULL constraint + op.add_column('dataset_permissions', sa.Column('tenant_id', sa.UUID(), nullable=False)) + else: + with op.batch_alter_table('dataset_permissions', schema=None) as batch_op: + # Step 1: Add column without NOT NULL constraint + op.add_column('dataset_permissions', sa.Column('tenant_id', models.types.StringUUID(), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/16fa53d9faec_add_provider_model_support.py b/api/migrations/versions/16fa53d9faec_add_provider_model_support.py index 6791cf4578..ce24a20172 100644 --- a/api/migrations/versions/16fa53d9faec_add_provider_model_support.py +++ b/api/migrations/versions/16fa53d9faec_add_provider_model_support.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '16fa53d9faec' down_revision = '8d2d099ceb74' @@ -18,44 +24,87 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('provider_models', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('provider_name', sa.String(length=40), nullable=False), - sa.Column('model_name', sa.String(length=40), nullable=False), - sa.Column('model_type', sa.String(length=40), nullable=False), - sa.Column('encrypted_config', sa.Text(), nullable=True), - sa.Column('is_valid', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='provider_model_pkey'), - sa.UniqueConstraint('tenant_id', 'provider_name', 'model_name', 'model_type', name='unique_provider_model_name') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('provider_models', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('provider_name', sa.String(length=40), nullable=False), + sa.Column('model_name', sa.String(length=40), nullable=False), + sa.Column('model_type', sa.String(length=40), nullable=False), + sa.Column('encrypted_config', sa.Text(), nullable=True), + sa.Column('is_valid', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='provider_model_pkey'), + sa.UniqueConstraint('tenant_id', 'provider_name', 'model_name', 'model_type', name='unique_provider_model_name') + ) + else: + op.create_table('provider_models', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=40), nullable=False), + sa.Column('model_name', sa.String(length=40), nullable=False), + sa.Column('model_type', sa.String(length=40), nullable=False), + sa.Column('encrypted_config', models.types.LongText(), nullable=True), + sa.Column('is_valid', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='provider_model_pkey'), + sa.UniqueConstraint('tenant_id', 'provider_name', 'model_name', 'model_type', name='unique_provider_model_name') + ) + with op.batch_alter_table('provider_models', schema=None) as batch_op: batch_op.create_index('provider_model_tenant_id_provider_idx', ['tenant_id', 'provider_name'], unique=False) - op.create_table('tenant_default_models', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('provider_name', sa.String(length=40), nullable=False), - sa.Column('model_name', sa.String(length=40), nullable=False), - sa.Column('model_type', sa.String(length=40), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tenant_default_model_pkey') - ) + if _is_pg(conn): + op.create_table('tenant_default_models', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('provider_name', sa.String(length=40), nullable=False), + sa.Column('model_name', sa.String(length=40), nullable=False), + sa.Column('model_type', sa.String(length=40), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tenant_default_model_pkey') + ) + else: + op.create_table('tenant_default_models', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=40), nullable=False), + sa.Column('model_name', sa.String(length=40), nullable=False), + sa.Column('model_type', sa.String(length=40), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tenant_default_model_pkey') + ) + with op.batch_alter_table('tenant_default_models', schema=None) as batch_op: batch_op.create_index('tenant_default_model_tenant_id_provider_type_idx', ['tenant_id', 'provider_name', 'model_type'], unique=False) - op.create_table('tenant_preferred_model_providers', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('provider_name', sa.String(length=40), nullable=False), - sa.Column('preferred_provider_type', sa.String(length=40), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tenant_preferred_model_provider_pkey') - ) + if _is_pg(conn): + op.create_table('tenant_preferred_model_providers', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('provider_name', sa.String(length=40), nullable=False), + sa.Column('preferred_provider_type', sa.String(length=40), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tenant_preferred_model_provider_pkey') + ) + else: + op.create_table('tenant_preferred_model_providers', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=40), nullable=False), + sa.Column('preferred_provider_type', sa.String(length=40), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tenant_preferred_model_provider_pkey') + ) + with op.batch_alter_table('tenant_preferred_model_providers', schema=None) as batch_op: batch_op.create_index('tenant_preferred_model_provider_tenant_provider_idx', ['tenant_id', 'provider_name'], unique=False) diff --git a/api/migrations/versions/17b5ab037c40_add_keyworg_table_storage_type.py b/api/migrations/versions/17b5ab037c40_add_keyworg_table_storage_type.py index 7707148489..4ce073318a 100644 --- a/api/migrations/versions/17b5ab037c40_add_keyworg_table_storage_type.py +++ b/api/migrations/versions/17b5ab037c40_add_keyworg_table_storage_type.py @@ -8,6 +8,10 @@ Create Date: 2024-04-01 09:48:54.232201 import sqlalchemy as sa from alembic import op + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '17b5ab037c40' down_revision = 'a8f9b3c45e4a' @@ -17,9 +21,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - - with op.batch_alter_table('dataset_keyword_tables', schema=None) as batch_op: - batch_op.add_column(sa.Column('data_source_type', sa.String(length=255), server_default=sa.text("'database'::character varying"), nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('dataset_keyword_tables', schema=None) as batch_op: + batch_op.add_column(sa.Column('data_source_type', sa.String(length=255), server_default=sa.text("'database'::character varying"), nullable=False)) + else: + with op.batch_alter_table('dataset_keyword_tables', schema=None) as batch_op: + batch_op.add_column(sa.Column('data_source_type', sa.String(length=255), server_default=sa.text("'database'"), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_08_13_0633-63a83fcf12ba_support_conversation_variables.py b/api/migrations/versions/2024_08_13_0633-63a83fcf12ba_support_conversation_variables.py index 16e1efd4ef..e8d725e78c 100644 --- a/api/migrations/versions/2024_08_13_0633-63a83fcf12ba_support_conversation_variables.py +++ b/api/migrations/versions/2024_08_13_0633-63a83fcf12ba_support_conversation_variables.py @@ -10,6 +10,10 @@ from alembic import op import models as models + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '63a83fcf12ba' down_revision = '1787fbae959a' @@ -19,21 +23,39 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('workflow__conversation_variables', - sa.Column('id', models.types.StringUUID(), nullable=False), - sa.Column('conversation_id', models.types.StringUUID(), nullable=False), - sa.Column('app_id', models.types.StringUUID(), nullable=False), - sa.Column('data', sa.Text(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', 'conversation_id', name=op.f('workflow__conversation_variables_pkey')) - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('workflow__conversation_variables', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('conversation_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('data', sa.Text(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', 'conversation_id', name=op.f('workflow__conversation_variables_pkey')) + ) + else: + op.create_table('workflow__conversation_variables', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('conversation_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('data', models.types.LongText(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', 'conversation_id', name=op.f('workflow__conversation_variables_pkey')) + ) + with op.batch_alter_table('workflow__conversation_variables', schema=None) as batch_op: batch_op.create_index(batch_op.f('workflow__conversation_variables_app_id_idx'), ['app_id'], unique=False) batch_op.create_index(batch_op.f('workflow__conversation_variables_created_at_idx'), ['created_at'], unique=False) - with op.batch_alter_table('workflows', schema=None) as batch_op: - batch_op.add_column(sa.Column('conversation_variables', sa.Text(), server_default='{}', nullable=False)) + if _is_pg(conn): + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.add_column(sa.Column('conversation_variables', sa.Text(), server_default='{}', nullable=False)) + else: + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.add_column(sa.Column('conversation_variables', models.types.LongText(), default='{}', nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_08_15_0956-0251a1c768cc_add_tidb_auth_binding.py b/api/migrations/versions/2024_08_15_0956-0251a1c768cc_add_tidb_auth_binding.py index ca2e410442..1e6743fba8 100644 --- a/api/migrations/versions/2024_08_15_0956-0251a1c768cc_add_tidb_auth_binding.py +++ b/api/migrations/versions/2024_08_15_0956-0251a1c768cc_add_tidb_auth_binding.py @@ -10,6 +10,10 @@ from alembic import op import models as models + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '0251a1c768cc' down_revision = 'bbadea11becb' @@ -19,18 +23,35 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tidb_auth_bindings', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=True), - sa.Column('cluster_id', sa.String(length=255), nullable=False), - sa.Column('cluster_name', sa.String(length=255), nullable=False), - sa.Column('active', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('status', sa.String(length=255), server_default=sa.text("'CREATING'::character varying"), nullable=False), - sa.Column('account', sa.String(length=255), nullable=False), - sa.Column('password', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tidb_auth_bindings_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('tidb_auth_bindings', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=True), + sa.Column('cluster_id', sa.String(length=255), nullable=False), + sa.Column('cluster_name', sa.String(length=255), nullable=False), + sa.Column('active', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('status', sa.String(length=255), server_default=sa.text("'CREATING'::character varying"), nullable=False), + sa.Column('account', sa.String(length=255), nullable=False), + sa.Column('password', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tidb_auth_bindings_pkey') + ) + else: + op.create_table('tidb_auth_bindings', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=True), + sa.Column('cluster_id', sa.String(length=255), nullable=False), + sa.Column('cluster_name', sa.String(length=255), nullable=False), + sa.Column('active', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('status', sa.String(length=255), server_default=sa.text("'CREATING'"), nullable=False), + sa.Column('account', sa.String(length=255), nullable=False), + sa.Column('password', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tidb_auth_bindings_pkey') + ) + with op.batch_alter_table('tidb_auth_bindings', schema=None) as batch_op: batch_op.create_index('tidb_auth_bindings_active_idx', ['active'], unique=False) batch_op.create_index('tidb_auth_bindings_status_idx', ['status'], unique=False) diff --git a/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py b/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py index fd957eeafb..2c8bb2de89 100644 --- a/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py +++ b/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py @@ -10,6 +10,10 @@ from alembic import op import models as models + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'd57ba9ebb251' down_revision = '675b5321501b' @@ -22,8 +26,14 @@ def upgrade(): with op.batch_alter_table('messages', schema=None) as batch_op: batch_op.add_column(sa.Column('parent_message_id', models.types.StringUUID(), nullable=True)) - # Set parent_message_id for existing messages to uuid_nil() to distinguish them from new messages with actual parent IDs or NULLs - op.execute('UPDATE messages SET parent_message_id = uuid_nil() WHERE parent_message_id IS NULL') + # Set parent_message_id for existing messages to distinguish them from new messages with actual parent IDs or NULLs + conn = op.get_bind() + if _is_pg(conn): + # PostgreSQL: Use uuid_nil() function + op.execute('UPDATE messages SET parent_message_id = uuid_nil() WHERE parent_message_id IS NULL') + else: + # MySQL: Use a specific UUID value to represent nil + op.execute("UPDATE messages SET parent_message_id = '00000000-0000-0000-0000-000000000000' WHERE parent_message_id IS NULL") # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_09_24_0922-6af6a521a53e_update_retrieval_resource.py b/api/migrations/versions/2024_09_24_0922-6af6a521a53e_update_retrieval_resource.py index 5337b340db..0767b725f6 100644 --- a/api/migrations/versions/2024_09_24_0922-6af6a521a53e_update_retrieval_resource.py +++ b/api/migrations/versions/2024_09_24_0922-6af6a521a53e_update_retrieval_resource.py @@ -6,7 +6,11 @@ Create Date: 2024-09-24 09:22:43.570120 """ from alembic import op -import models as models +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" import sqlalchemy as sa from sqlalchemy.dialects import postgresql @@ -19,30 +23,58 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: - batch_op.alter_column('document_id', - existing_type=sa.UUID(), - nullable=True) - batch_op.alter_column('data_source_type', - existing_type=sa.TEXT(), - nullable=True) - batch_op.alter_column('segment_id', - existing_type=sa.UUID(), - nullable=True) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: + batch_op.alter_column('document_id', + existing_type=sa.UUID(), + nullable=True) + batch_op.alter_column('data_source_type', + existing_type=sa.TEXT(), + nullable=True) + batch_op.alter_column('segment_id', + existing_type=sa.UUID(), + nullable=True) + else: + with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: + batch_op.alter_column('document_id', + existing_type=models.types.StringUUID(), + nullable=True) + batch_op.alter_column('data_source_type', + existing_type=models.types.LongText(), + nullable=True) + batch_op.alter_column('segment_id', + existing_type=models.types.StringUUID(), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: - batch_op.alter_column('segment_id', - existing_type=sa.UUID(), - nullable=False) - batch_op.alter_column('data_source_type', - existing_type=sa.TEXT(), - nullable=False) - batch_op.alter_column('document_id', - existing_type=sa.UUID(), - nullable=False) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: + batch_op.alter_column('segment_id', + existing_type=sa.UUID(), + nullable=False) + batch_op.alter_column('data_source_type', + existing_type=sa.TEXT(), + nullable=False) + batch_op.alter_column('document_id', + existing_type=sa.UUID(), + nullable=False) + else: + with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: + batch_op.alter_column('segment_id', + existing_type=models.types.StringUUID(), + nullable=False) + batch_op.alter_column('data_source_type', + existing_type=models.types.LongText(), + nullable=False) + batch_op.alter_column('document_id', + existing_type=models.types.StringUUID(), + nullable=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_09_25_0434-33f5fac87f29_external_knowledge_api.py b/api/migrations/versions/2024_09_25_0434-33f5fac87f29_external_knowledge_api.py index 3cb76e72c1..ac81d13c61 100644 --- a/api/migrations/versions/2024_09_25_0434-33f5fac87f29_external_knowledge_api.py +++ b/api/migrations/versions/2024_09_25_0434-33f5fac87f29_external_knowledge_api.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa from sqlalchemy.dialects import postgresql + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '33f5fac87f29' down_revision = '6af6a521a53e' @@ -19,34 +23,66 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('external_knowledge_apis', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('description', sa.String(length=255), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('settings', sa.Text(), nullable=True), - sa.Column('created_by', models.types.StringUUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_by', models.types.StringUUID(), nullable=True), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='external_knowledge_apis_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('external_knowledge_apis', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('description', sa.String(length=255), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('settings', sa.Text(), nullable=True), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='external_knowledge_apis_pkey') + ) + else: + op.create_table('external_knowledge_apis', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('description', sa.String(length=255), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('settings', models.types.LongText(), nullable=True), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='external_knowledge_apis_pkey') + ) + with op.batch_alter_table('external_knowledge_apis', schema=None) as batch_op: batch_op.create_index('external_knowledge_apis_name_idx', ['name'], unique=False) batch_op.create_index('external_knowledge_apis_tenant_idx', ['tenant_id'], unique=False) - op.create_table('external_knowledge_bindings', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('external_knowledge_api_id', models.types.StringUUID(), nullable=False), - sa.Column('dataset_id', models.types.StringUUID(), nullable=False), - sa.Column('external_knowledge_id', sa.Text(), nullable=False), - sa.Column('created_by', models.types.StringUUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_by', models.types.StringUUID(), nullable=True), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='external_knowledge_bindings_pkey') - ) + if _is_pg(conn): + op.create_table('external_knowledge_bindings', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('external_knowledge_api_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('external_knowledge_id', sa.Text(), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='external_knowledge_bindings_pkey') + ) + else: + op.create_table('external_knowledge_bindings', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('external_knowledge_api_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('external_knowledge_id', sa.String(length=512), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='external_knowledge_bindings_pkey') + ) + with op.batch_alter_table('external_knowledge_bindings', schema=None) as batch_op: batch_op.create_index('external_knowledge_bindings_dataset_idx', ['dataset_id'], unique=False) batch_op.create_index('external_knowledge_bindings_external_knowledge_api_idx', ['external_knowledge_api_id'], unique=False) diff --git a/api/migrations/versions/2024_10_10_0516-bbadea11becb_add_name_and_size_to_tool_files.py b/api/migrations/versions/2024_10_10_0516-bbadea11becb_add_name_and_size_to_tool_files.py index 00f2b15802..33266ba5dd 100644 --- a/api/migrations/versions/2024_10_10_0516-bbadea11becb_add_name_and_size_to_tool_files.py +++ b/api/migrations/versions/2024_10_10_0516-bbadea11becb_add_name_and_size_to_tool_files.py @@ -16,6 +16,10 @@ branch_labels = None depends_on = None +def _is_pg(conn): + return conn.dialect.name == "postgresql" + + def upgrade(): def _has_name_or_size_column() -> bool: # We cannot access the database in offline mode, so assume @@ -46,14 +50,26 @@ def upgrade(): if _has_name_or_size_column(): return - with op.batch_alter_table("tool_files", schema=None) as batch_op: - batch_op.add_column(sa.Column("name", sa.String(), nullable=True)) - batch_op.add_column(sa.Column("size", sa.Integer(), nullable=True)) - op.execute("UPDATE tool_files SET name = '' WHERE name IS NULL") - op.execute("UPDATE tool_files SET size = -1 WHERE size IS NULL") - with op.batch_alter_table("tool_files", schema=None) as batch_op: - batch_op.alter_column("name", existing_type=sa.String(), nullable=False) - batch_op.alter_column("size", existing_type=sa.Integer(), nullable=False) + if _is_pg(conn): + # PostgreSQL: Keep original syntax + with op.batch_alter_table("tool_files", schema=None) as batch_op: + batch_op.add_column(sa.Column("name", sa.String(), nullable=True)) + batch_op.add_column(sa.Column("size", sa.Integer(), nullable=True)) + op.execute("UPDATE tool_files SET name = '' WHERE name IS NULL") + op.execute("UPDATE tool_files SET size = -1 WHERE size IS NULL") + with op.batch_alter_table("tool_files", schema=None) as batch_op: + batch_op.alter_column("name", existing_type=sa.String(), nullable=False) + batch_op.alter_column("size", existing_type=sa.Integer(), nullable=False) + else: + # MySQL: Use compatible syntax + with op.batch_alter_table("tool_files", schema=None) as batch_op: + batch_op.add_column(sa.Column("name", sa.String(length=255), nullable=True)) + batch_op.add_column(sa.Column("size", sa.Integer(), nullable=True)) + op.execute("UPDATE tool_files SET name = '' WHERE name IS NULL") + op.execute("UPDATE tool_files SET size = -1 WHERE size IS NULL") + with op.batch_alter_table("tool_files", schema=None) as batch_op: + batch_op.alter_column("name", existing_type=sa.String(length=255), nullable=False) + batch_op.alter_column("size", existing_type=sa.Integer(), nullable=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_10_22_0959-43fa78bc3b7d_add_white_list.py b/api/migrations/versions/2024_10_22_0959-43fa78bc3b7d_add_white_list.py index 9daf148bc4..22ee0ec195 100644 --- a/api/migrations/versions/2024_10_22_0959-43fa78bc3b7d_add_white_list.py +++ b/api/migrations/versions/2024_10_22_0959-43fa78bc3b7d_add_white_list.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa from sqlalchemy.dialects import postgresql + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '43fa78bc3b7d' down_revision = '0251a1c768cc' @@ -19,13 +23,25 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('whitelists', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=True), - sa.Column('category', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='whitelists_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('whitelists', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=True), + sa.Column('category', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='whitelists_pkey') + ) + else: + op.create_table('whitelists', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=True), + sa.Column('category', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='whitelists_pkey') + ) + with op.batch_alter_table('whitelists', schema=None) as batch_op: batch_op.create_index('whitelists_tenant_idx', ['tenant_id'], unique=False) diff --git a/api/migrations/versions/2024_10_28_0720-08ec4f75af5e_add_tenant_plugin_permisisons.py b/api/migrations/versions/2024_10_28_0720-08ec4f75af5e_add_tenant_plugin_permisisons.py index 51a0b1b211..666d046bb9 100644 --- a/api/migrations/versions/2024_10_28_0720-08ec4f75af5e_add_tenant_plugin_permisisons.py +++ b/api/migrations/versions/2024_10_28_0720-08ec4f75af5e_add_tenant_plugin_permisisons.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa from sqlalchemy.dialects import postgresql + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '08ec4f75af5e' down_revision = 'ddcc8bbef391' @@ -19,14 +23,26 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('account_plugin_permissions', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('install_permission', sa.String(length=16), server_default='everyone', nullable=False), - sa.Column('debug_permission', sa.String(length=16), server_default='noone', nullable=False), - sa.PrimaryKeyConstraint('id', name='account_plugin_permission_pkey'), - sa.UniqueConstraint('tenant_id', name='unique_tenant_plugin') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('account_plugin_permissions', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('install_permission', sa.String(length=16), server_default='everyone', nullable=False), + sa.Column('debug_permission', sa.String(length=16), server_default='noone', nullable=False), + sa.PrimaryKeyConstraint('id', name='account_plugin_permission_pkey'), + sa.UniqueConstraint('tenant_id', name='unique_tenant_plugin') + ) + else: + op.create_table('account_plugin_permissions', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('install_permission', sa.String(length=16), server_default='everyone', nullable=False), + sa.Column('debug_permission', sa.String(length=16), server_default='noone', nullable=False), + sa.PrimaryKeyConstraint('id', name='account_plugin_permission_pkey'), + sa.UniqueConstraint('tenant_id', name='unique_tenant_plugin') + ) # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_11_01_0540-f4d7ce70a7ca_update_upload_files_source_url.py b/api/migrations/versions/2024_11_01_0540-f4d7ce70a7ca_update_upload_files_source_url.py index 222379a490..b3fe1e9fab 100644 --- a/api/migrations/versions/2024_11_01_0540-f4d7ce70a7ca_update_upload_files_source_url.py +++ b/api/migrations/versions/2024_11_01_0540-f4d7ce70a7ca_update_upload_files_source_url.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa from sqlalchemy.dialects import postgresql + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'f4d7ce70a7ca' down_revision = '93ad8c19c40b' @@ -19,23 +23,43 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('upload_files', schema=None) as batch_op: - batch_op.alter_column('source_url', - existing_type=sa.VARCHAR(length=255), - type_=sa.TEXT(), - existing_nullable=False, - existing_server_default=sa.text("''::character varying")) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('upload_files', schema=None) as batch_op: + batch_op.alter_column('source_url', + existing_type=sa.VARCHAR(length=255), + type_=sa.TEXT(), + existing_nullable=False, + existing_server_default=sa.text("''::character varying")) + else: + with op.batch_alter_table('upload_files', schema=None) as batch_op: + batch_op.alter_column('source_url', + existing_type=sa.VARCHAR(length=255), + type_=models.types.LongText(), + existing_nullable=False, + existing_default=sa.text("''")) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('upload_files', schema=None) as batch_op: - batch_op.alter_column('source_url', - existing_type=sa.TEXT(), - type_=sa.VARCHAR(length=255), - existing_nullable=False, - existing_server_default=sa.text("''::character varying")) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('upload_files', schema=None) as batch_op: + batch_op.alter_column('source_url', + existing_type=sa.TEXT(), + type_=sa.VARCHAR(length=255), + existing_nullable=False, + existing_server_default=sa.text("''::character varying")) + else: + with op.batch_alter_table('upload_files', schema=None) as batch_op: + batch_op.alter_column('source_url', + existing_type=models.types.LongText(), + type_=sa.VARCHAR(length=255), + existing_nullable=False, + existing_default=sa.text("''")) # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_11_01_0622-d07474999927_update_type_of_custom_disclaimer_to_text.py b/api/migrations/versions/2024_11_01_0622-d07474999927_update_type_of_custom_disclaimer_to_text.py index 9a4ccf352d..45842295ea 100644 --- a/api/migrations/versions/2024_11_01_0622-d07474999927_update_type_of_custom_disclaimer_to_text.py +++ b/api/migrations/versions/2024_11_01_0622-d07474999927_update_type_of_custom_disclaimer_to_text.py @@ -7,6 +7,9 @@ Create Date: 2024-11-01 06:22:27.981398 """ from alembic import op import models as models + +def _is_pg(conn): + return conn.dialect.name == "postgresql" import sqlalchemy as sa from sqlalchemy.dialects import postgresql @@ -19,49 +22,91 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + op.execute("UPDATE recommended_apps SET custom_disclaimer = '' WHERE custom_disclaimer IS NULL") op.execute("UPDATE sites SET custom_disclaimer = '' WHERE custom_disclaimer IS NULL") op.execute("UPDATE tool_api_providers SET custom_disclaimer = '' WHERE custom_disclaimer IS NULL") - with op.batch_alter_table('recommended_apps', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.VARCHAR(length=255), - type_=sa.TEXT(), - nullable=False) + if _is_pg(conn): + with op.batch_alter_table('recommended_apps', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=sa.VARCHAR(length=255), + type_=sa.TEXT(), + nullable=False) - with op.batch_alter_table('sites', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.VARCHAR(length=255), - type_=sa.TEXT(), - nullable=False) + with op.batch_alter_table('sites', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=sa.VARCHAR(length=255), + type_=sa.TEXT(), + nullable=False) - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.VARCHAR(length=255), - type_=sa.TEXT(), - nullable=False) + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=sa.VARCHAR(length=255), + type_=sa.TEXT(), + nullable=False) + else: + with op.batch_alter_table('recommended_apps', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=sa.VARCHAR(length=255), + type_=models.types.LongText(), + nullable=False) + + with op.batch_alter_table('sites', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=sa.VARCHAR(length=255), + type_=models.types.LongText(), + nullable=False) + + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=sa.VARCHAR(length=255), + type_=models.types.LongText(), + nullable=False) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.TEXT(), - type_=sa.VARCHAR(length=255), - nullable=True) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=sa.TEXT(), + type_=sa.VARCHAR(length=255), + nullable=True) - with op.batch_alter_table('sites', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.TEXT(), - type_=sa.VARCHAR(length=255), - nullable=True) + with op.batch_alter_table('sites', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=sa.TEXT(), + type_=sa.VARCHAR(length=255), + nullable=True) - with op.batch_alter_table('recommended_apps', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.TEXT(), - type_=sa.VARCHAR(length=255), - nullable=True) + with op.batch_alter_table('recommended_apps', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=sa.TEXT(), + type_=sa.VARCHAR(length=255), + nullable=True) + else: + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=models.types.LongText(), + type_=sa.VARCHAR(length=255), + nullable=True) + + with op.batch_alter_table('sites', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=models.types.LongText(), + type_=sa.VARCHAR(length=255), + nullable=True) + + with op.batch_alter_table('recommended_apps', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=models.types.LongText(), + type_=sa.VARCHAR(length=255), + nullable=True) # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_11_01_0623-09a8d1878d9b_update_workflows_graph_features_and_.py b/api/migrations/versions/2024_11_01_0623-09a8d1878d9b_update_workflows_graph_features_and_.py index 117a7351cd..fdd8984029 100644 --- a/api/migrations/versions/2024_11_01_0623-09a8d1878d9b_update_workflows_graph_features_and_.py +++ b/api/migrations/versions/2024_11_01_0623-09a8d1878d9b_update_workflows_graph_features_and_.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa from sqlalchemy.dialects import postgresql + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '09a8d1878d9b' down_revision = 'd07474999927' @@ -19,55 +23,103 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('conversations', schema=None) as batch_op: - batch_op.alter_column('inputs', - existing_type=postgresql.JSON(astext_type=sa.Text()), - nullable=False) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('conversations', schema=None) as batch_op: + batch_op.alter_column('inputs', + existing_type=postgresql.JSON(astext_type=sa.Text()), + nullable=False) - with op.batch_alter_table('messages', schema=None) as batch_op: - batch_op.alter_column('inputs', - existing_type=postgresql.JSON(astext_type=sa.Text()), - nullable=False) + with op.batch_alter_table('messages', schema=None) as batch_op: + batch_op.alter_column('inputs', + existing_type=postgresql.JSON(astext_type=sa.Text()), + nullable=False) + else: + with op.batch_alter_table('conversations', schema=None) as batch_op: + batch_op.alter_column('inputs', + existing_type=sa.JSON(), + nullable=False) + + with op.batch_alter_table('messages', schema=None) as batch_op: + batch_op.alter_column('inputs', + existing_type=sa.JSON(), + nullable=False) op.execute("UPDATE workflows SET updated_at = created_at WHERE updated_at IS NULL") op.execute("UPDATE workflows SET graph = '' WHERE graph IS NULL") op.execute("UPDATE workflows SET features = '' WHERE features IS NULL") - - with op.batch_alter_table('workflows', schema=None) as batch_op: - batch_op.alter_column('graph', - existing_type=sa.TEXT(), - nullable=False) - batch_op.alter_column('features', - existing_type=sa.TEXT(), - nullable=False) - batch_op.alter_column('updated_at', - existing_type=postgresql.TIMESTAMP(), - nullable=False) - + if _is_pg(conn): + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.alter_column('graph', + existing_type=sa.TEXT(), + nullable=False) + batch_op.alter_column('features', + existing_type=sa.TEXT(), + nullable=False) + batch_op.alter_column('updated_at', + existing_type=postgresql.TIMESTAMP(), + nullable=False) + else: + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.alter_column('graph', + existing_type=models.types.LongText(), + nullable=False) + batch_op.alter_column('features', + existing_type=models.types.LongText(), + nullable=False) + batch_op.alter_column('updated_at', + existing_type=sa.TIMESTAMP(), + nullable=False) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('workflows', schema=None) as batch_op: - batch_op.alter_column('updated_at', - existing_type=postgresql.TIMESTAMP(), - nullable=True) - batch_op.alter_column('features', - existing_type=sa.TEXT(), - nullable=True) - batch_op.alter_column('graph', - existing_type=sa.TEXT(), - nullable=True) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.alter_column('updated_at', + existing_type=postgresql.TIMESTAMP(), + nullable=True) + batch_op.alter_column('features', + existing_type=sa.TEXT(), + nullable=True) + batch_op.alter_column('graph', + existing_type=sa.TEXT(), + nullable=True) + else: + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.alter_column('updated_at', + existing_type=sa.TIMESTAMP(), + nullable=True) + batch_op.alter_column('features', + existing_type=models.types.LongText(), + nullable=True) + batch_op.alter_column('graph', + existing_type=models.types.LongText(), + nullable=True) - with op.batch_alter_table('messages', schema=None) as batch_op: - batch_op.alter_column('inputs', - existing_type=postgresql.JSON(astext_type=sa.Text()), - nullable=True) + if _is_pg(conn): + with op.batch_alter_table('messages', schema=None) as batch_op: + batch_op.alter_column('inputs', + existing_type=postgresql.JSON(astext_type=sa.Text()), + nullable=True) - with op.batch_alter_table('conversations', schema=None) as batch_op: - batch_op.alter_column('inputs', - existing_type=postgresql.JSON(astext_type=sa.Text()), - nullable=True) + with op.batch_alter_table('conversations', schema=None) as batch_op: + batch_op.alter_column('inputs', + existing_type=postgresql.JSON(astext_type=sa.Text()), + nullable=True) + else: + with op.batch_alter_table('messages', schema=None) as batch_op: + batch_op.alter_column('inputs', + existing_type=sa.JSON(), + nullable=True) + + with op.batch_alter_table('conversations', schema=None) as batch_op: + batch_op.alter_column('inputs', + existing_type=sa.JSON(), + nullable=True) # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_11_22_0701-e19037032219_parent_child_index.py b/api/migrations/versions/2024_11_22_0701-e19037032219_parent_child_index.py index 9238e5a0a8..14048baa30 100644 --- a/api/migrations/versions/2024_11_22_0701-e19037032219_parent_child_index.py +++ b/api/migrations/versions/2024_11_22_0701-e19037032219_parent_child_index.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa +def _is_pg(conn): + return conn.dialect.name == "postgresql" + + # revision identifiers, used by Alembic. revision = 'e19037032219' down_revision = 'd7999dfa4aae' @@ -19,27 +23,53 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('child_chunks', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('dataset_id', models.types.StringUUID(), nullable=False), - sa.Column('document_id', models.types.StringUUID(), nullable=False), - sa.Column('segment_id', models.types.StringUUID(), nullable=False), - sa.Column('position', sa.Integer(), nullable=False), - sa.Column('content', sa.Text(), nullable=False), - sa.Column('word_count', sa.Integer(), nullable=False), - sa.Column('index_node_id', sa.String(length=255), nullable=True), - sa.Column('index_node_hash', sa.String(length=255), nullable=True), - sa.Column('type', sa.String(length=255), server_default=sa.text("'automatic'::character varying"), nullable=False), - sa.Column('created_by', models.types.StringUUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_by', models.types.StringUUID(), nullable=True), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('indexing_at', sa.DateTime(), nullable=True), - sa.Column('completed_at', sa.DateTime(), nullable=True), - sa.Column('error', sa.Text(), nullable=True), - sa.PrimaryKeyConstraint('id', name='child_chunk_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('child_chunks', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('document_id', models.types.StringUUID(), nullable=False), + sa.Column('segment_id', models.types.StringUUID(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('content', sa.Text(), nullable=False), + sa.Column('word_count', sa.Integer(), nullable=False), + sa.Column('index_node_id', sa.String(length=255), nullable=True), + sa.Column('index_node_hash', sa.String(length=255), nullable=True), + sa.Column('type', sa.String(length=255), server_default=sa.text("'automatic'::character varying"), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('indexing_at', sa.DateTime(), nullable=True), + sa.Column('completed_at', sa.DateTime(), nullable=True), + sa.Column('error', sa.Text(), nullable=True), + sa.PrimaryKeyConstraint('id', name='child_chunk_pkey') + ) + else: + op.create_table('child_chunks', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('document_id', models.types.StringUUID(), nullable=False), + sa.Column('segment_id', models.types.StringUUID(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('content', models.types.LongText(), nullable=False), + sa.Column('word_count', sa.Integer(), nullable=False), + sa.Column('index_node_id', sa.String(length=255), nullable=True), + sa.Column('index_node_hash', sa.String(length=255), nullable=True), + sa.Column('type', sa.String(length=255), server_default=sa.text("'automatic'"), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('indexing_at', sa.DateTime(), nullable=True), + sa.Column('completed_at', sa.DateTime(), nullable=True), + sa.Column('error', models.types.LongText(), nullable=True), + sa.PrimaryKeyConstraint('id', name='child_chunk_pkey') + ) + with op.batch_alter_table('child_chunks', schema=None) as batch_op: batch_op.create_index('child_chunk_dataset_id_idx', ['tenant_id', 'dataset_id', 'document_id', 'segment_id', 'index_node_id'], unique=False) diff --git a/api/migrations/versions/2024_12_19_1746-11b07f66c737_remove_unused_tool_providers.py b/api/migrations/versions/2024_12_19_1746-11b07f66c737_remove_unused_tool_providers.py index 881a9e3c1e..7be99fe09a 100644 --- a/api/migrations/versions/2024_12_19_1746-11b07f66c737_remove_unused_tool_providers.py +++ b/api/migrations/versions/2024_12_19_1746-11b07f66c737_remove_unused_tool_providers.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa from sqlalchemy.dialects import postgresql + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '11b07f66c737' down_revision = 'cf8f4fc45278' @@ -25,15 +29,30 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tool_providers', - sa.Column('id', sa.UUID(), server_default=sa.text('uuid_generate_v4()'), autoincrement=False, nullable=False), - sa.Column('tenant_id', sa.UUID(), autoincrement=False, nullable=False), - sa.Column('tool_name', sa.VARCHAR(length=40), autoincrement=False, nullable=False), - sa.Column('encrypted_credentials', sa.TEXT(), autoincrement=False, nullable=True), - sa.Column('is_enabled', sa.BOOLEAN(), server_default=sa.text('false'), autoincrement=False, nullable=False), - sa.Column('created_at', postgresql.TIMESTAMP(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), autoincrement=False, nullable=False), - sa.Column('updated_at', postgresql.TIMESTAMP(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('id', name='tool_provider_pkey'), - sa.UniqueConstraint('tenant_id', 'tool_name', name='unique_tool_provider_tool_name') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('tool_providers', + sa.Column('id', sa.UUID(), server_default=sa.text('uuid_generate_v4()'), autoincrement=False, nullable=False), + sa.Column('tenant_id', sa.UUID(), autoincrement=False, nullable=False), + sa.Column('tool_name', sa.VARCHAR(length=40), autoincrement=False, nullable=False), + sa.Column('encrypted_credentials', sa.TEXT(), autoincrement=False, nullable=True), + sa.Column('is_enabled', sa.BOOLEAN(), server_default=sa.text('false'), autoincrement=False, nullable=False), + sa.Column('created_at', postgresql.TIMESTAMP(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), autoincrement=False, nullable=False), + sa.Column('updated_at', postgresql.TIMESTAMP(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_provider_pkey'), + sa.UniqueConstraint('tenant_id', 'tool_name', name='unique_tool_provider_tool_name') + ) + else: + op.create_table('tool_providers', + sa.Column('id', models.types.StringUUID(), autoincrement=False, nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), autoincrement=False, nullable=False), + sa.Column('tool_name', sa.VARCHAR(length=40), autoincrement=False, nullable=False), + sa.Column('encrypted_credentials', models.types.LongText(), autoincrement=False, nullable=True), + sa.Column('is_enabled', sa.BOOLEAN(), server_default=sa.text('false'), autoincrement=False, nullable=False), + sa.Column('created_at', sa.TIMESTAMP(), server_default=sa.func.current_timestamp(), autoincrement=False, nullable=False), + sa.Column('updated_at', sa.TIMESTAMP(), server_default=sa.func.current_timestamp(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_provider_pkey'), + sa.UniqueConstraint('tenant_id', 'tool_name', name='unique_tool_provider_tool_name') + ) # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_12_25_1137-923752d42eb6_add_auto_disabled_dataset_logs.py b/api/migrations/versions/2024_12_25_1137-923752d42eb6_add_auto_disabled_dataset_logs.py index 6dadd4e4a8..750a3d02e2 100644 --- a/api/migrations/versions/2024_12_25_1137-923752d42eb6_add_auto_disabled_dataset_logs.py +++ b/api/migrations/versions/2024_12_25_1137-923752d42eb6_add_auto_disabled_dataset_logs.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa from sqlalchemy.dialects import postgresql + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '923752d42eb6' down_revision = 'e19037032219' @@ -19,15 +23,29 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('dataset_auto_disable_logs', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('dataset_id', models.types.StringUUID(), nullable=False), - sa.Column('document_id', models.types.StringUUID(), nullable=False), - sa.Column('notified', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='dataset_auto_disable_log_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('dataset_auto_disable_logs', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('document_id', models.types.StringUUID(), nullable=False), + sa.Column('notified', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_auto_disable_log_pkey') + ) + else: + op.create_table('dataset_auto_disable_logs', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('document_id', models.types.StringUUID(), nullable=False), + sa.Column('notified', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_auto_disable_log_pkey') + ) + with op.batch_alter_table('dataset_auto_disable_logs', schema=None) as batch_op: batch_op.create_index('dataset_auto_disable_log_created_atx', ['created_at'], unique=False) batch_op.create_index('dataset_auto_disable_log_dataset_idx', ['dataset_id'], unique=False) diff --git a/api/migrations/versions/2025_01_14_0617-f051706725cc_add_rate_limit_logs.py b/api/migrations/versions/2025_01_14_0617-f051706725cc_add_rate_limit_logs.py index ef495be661..5d79877e28 100644 --- a/api/migrations/versions/2025_01_14_0617-f051706725cc_add_rate_limit_logs.py +++ b/api/migrations/versions/2025_01_14_0617-f051706725cc_add_rate_limit_logs.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa from sqlalchemy.dialects import postgresql + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'f051706725cc' down_revision = 'ee79d9b1c156' @@ -19,14 +23,27 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('rate_limit_logs', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('subscription_plan', sa.String(length=255), nullable=False), - sa.Column('operation', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='rate_limit_log_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('rate_limit_logs', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('subscription_plan', sa.String(length=255), nullable=False), + sa.Column('operation', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='rate_limit_log_pkey') + ) + else: + op.create_table('rate_limit_logs', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('subscription_plan', sa.String(length=255), nullable=False), + sa.Column('operation', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='rate_limit_log_pkey') + ) + with op.batch_alter_table('rate_limit_logs', schema=None) as batch_op: batch_op.create_index('rate_limit_log_operation_idx', ['operation'], unique=False) batch_op.create_index('rate_limit_log_tenant_idx', ['tenant_id'], unique=False) diff --git a/api/migrations/versions/2025_02_27_0917-d20049ed0af6_add_metadata_function.py b/api/migrations/versions/2025_02_27_0917-d20049ed0af6_add_metadata_function.py index 877e3a5eed..da512704a6 100644 --- a/api/migrations/versions/2025_02_27_0917-d20049ed0af6_add_metadata_function.py +++ b/api/migrations/versions/2025_02_27_0917-d20049ed0af6_add_metadata_function.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa from sqlalchemy.dialects import postgresql + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'd20049ed0af6' down_revision = 'f051706725cc' @@ -19,34 +23,66 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('dataset_metadata_bindings', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('dataset_id', models.types.StringUUID(), nullable=False), - sa.Column('metadata_id', models.types.StringUUID(), nullable=False), - sa.Column('document_id', models.types.StringUUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('created_by', models.types.StringUUID(), nullable=False), - sa.PrimaryKeyConstraint('id', name='dataset_metadata_binding_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('dataset_metadata_bindings', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('metadata_id', models.types.StringUUID(), nullable=False), + sa.Column('document_id', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_metadata_binding_pkey') + ) + else: + op.create_table('dataset_metadata_bindings', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('metadata_id', models.types.StringUUID(), nullable=False), + sa.Column('document_id', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_metadata_binding_pkey') + ) + with op.batch_alter_table('dataset_metadata_bindings', schema=None) as batch_op: batch_op.create_index('dataset_metadata_binding_dataset_idx', ['dataset_id'], unique=False) batch_op.create_index('dataset_metadata_binding_document_idx', ['document_id'], unique=False) batch_op.create_index('dataset_metadata_binding_metadata_idx', ['metadata_id'], unique=False) batch_op.create_index('dataset_metadata_binding_tenant_idx', ['tenant_id'], unique=False) - op.create_table('dataset_metadatas', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('dataset_id', models.types.StringUUID(), nullable=False), - sa.Column('type', sa.String(length=255), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('created_by', models.types.StringUUID(), nullable=False), - sa.Column('updated_by', models.types.StringUUID(), nullable=True), - sa.PrimaryKeyConstraint('id', name='dataset_metadata_pkey') - ) + if _is_pg(conn): + # PostgreSQL: Keep original syntax + op.create_table('dataset_metadatas', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('type', sa.String(length=255), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.PrimaryKeyConstraint('id', name='dataset_metadata_pkey') + ) + else: + # MySQL: Use compatible syntax + op.create_table('dataset_metadatas', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('type', sa.String(length=255), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.PrimaryKeyConstraint('id', name='dataset_metadata_pkey') + ) + with op.batch_alter_table('dataset_metadatas', schema=None) as batch_op: batch_op.create_index('dataset_metadata_dataset_idx', ['dataset_id'], unique=False) batch_op.create_index('dataset_metadata_tenant_idx', ['tenant_id'], unique=False) @@ -54,23 +90,31 @@ def upgrade(): with op.batch_alter_table('datasets', schema=None) as batch_op: batch_op.add_column(sa.Column('built_in_field_enabled', sa.Boolean(), server_default=sa.text('false'), nullable=False)) - with op.batch_alter_table('documents', schema=None) as batch_op: - batch_op.alter_column('doc_metadata', - existing_type=postgresql.JSON(astext_type=sa.Text()), - type_=postgresql.JSONB(astext_type=sa.Text()), - existing_nullable=True) - batch_op.create_index('document_metadata_idx', ['doc_metadata'], unique=False, postgresql_using='gin') + if _is_pg(conn): + with op.batch_alter_table('documents', schema=None) as batch_op: + batch_op.alter_column('doc_metadata', + existing_type=postgresql.JSON(astext_type=sa.Text()), + type_=postgresql.JSONB(astext_type=sa.Text()), + existing_nullable=True) + batch_op.create_index('document_metadata_idx', ['doc_metadata'], unique=False, postgresql_using='gin') + else: + pass # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('documents', schema=None) as batch_op: - batch_op.drop_index('document_metadata_idx', postgresql_using='gin') - batch_op.alter_column('doc_metadata', - existing_type=postgresql.JSONB(astext_type=sa.Text()), - type_=postgresql.JSON(astext_type=sa.Text()), - existing_nullable=True) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('documents', schema=None) as batch_op: + batch_op.drop_index('document_metadata_idx', postgresql_using='gin') + batch_op.alter_column('doc_metadata', + existing_type=postgresql.JSONB(astext_type=sa.Text()), + type_=postgresql.JSON(astext_type=sa.Text()), + existing_nullable=True) + else: + pass with op.batch_alter_table('datasets', schema=None) as batch_op: batch_op.drop_column('built_in_field_enabled') diff --git a/api/migrations/versions/2025_03_03_1436-ee79d9b1c156_add_marked_name_and_marked_comment_in_.py b/api/migrations/versions/2025_03_03_1436-ee79d9b1c156_add_marked_name_and_marked_comment_in_.py index 5189de40e4..ea1b24b0fa 100644 --- a/api/migrations/versions/2025_03_03_1436-ee79d9b1c156_add_marked_name_and_marked_comment_in_.py +++ b/api/migrations/versions/2025_03_03_1436-ee79d9b1c156_add_marked_name_and_marked_comment_in_.py @@ -17,10 +17,23 @@ branch_labels = None depends_on = None +def _is_pg(conn): + return conn.dialect.name == "postgresql" + + def upgrade(): - with op.batch_alter_table('workflows', schema=None) as batch_op: - batch_op.add_column(sa.Column('marked_name', sa.String(), nullable=False, server_default='')) - batch_op.add_column(sa.Column('marked_comment', sa.String(), nullable=False, server_default='')) + conn = op.get_bind() + + if _is_pg(conn): + # PostgreSQL: Keep original syntax + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.add_column(sa.Column('marked_name', sa.String(), nullable=False, server_default='')) + batch_op.add_column(sa.Column('marked_comment', sa.String(), nullable=False, server_default='')) + else: + # MySQL: Use compatible syntax + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.add_column(sa.Column('marked_name', sa.String(length=255), nullable=False, server_default='')) + batch_op.add_column(sa.Column('marked_comment', sa.String(length=255), nullable=False, server_default='')) def downgrade(): diff --git a/api/migrations/versions/2025_05_15_1531-2adcbe1f5dfb_add_workflowdraftvariable_model.py b/api/migrations/versions/2025_05_15_1531-2adcbe1f5dfb_add_workflowdraftvariable_model.py index 5bf394b21c..ef781b63c2 100644 --- a/api/migrations/versions/2025_05_15_1531-2adcbe1f5dfb_add_workflowdraftvariable_model.py +++ b/api/migrations/versions/2025_05_15_1531-2adcbe1f5dfb_add_workflowdraftvariable_model.py @@ -11,6 +11,10 @@ from alembic import op import models as models + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = "2adcbe1f5dfb" down_revision = "d28f2004b072" @@ -20,24 +24,46 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "workflow_draft_variables", - sa.Column("id", models.types.StringUUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False), - sa.Column("created_at", sa.DateTime(), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), - sa.Column("updated_at", sa.DateTime(), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), - sa.Column("app_id", models.types.StringUUID(), nullable=False), - sa.Column("last_edited_at", sa.DateTime(), nullable=True), - sa.Column("node_id", sa.String(length=255), nullable=False), - sa.Column("name", sa.String(length=255), nullable=False), - sa.Column("description", sa.String(length=255), nullable=False), - sa.Column("selector", sa.String(length=255), nullable=False), - sa.Column("value_type", sa.String(length=20), nullable=False), - sa.Column("value", sa.Text(), nullable=False), - sa.Column("visible", sa.Boolean(), nullable=False), - sa.Column("editable", sa.Boolean(), nullable=False), - sa.PrimaryKeyConstraint("id", name=op.f("workflow_draft_variables_pkey")), - sa.UniqueConstraint("app_id", "node_id", "name", name=op.f("workflow_draft_variables_app_id_key")), - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table( + "workflow_draft_variables", + sa.Column("id", models.types.StringUUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False), + sa.Column("created_at", sa.DateTime(), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), + sa.Column("updated_at", sa.DateTime(), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), + sa.Column("app_id", models.types.StringUUID(), nullable=False), + sa.Column("last_edited_at", sa.DateTime(), nullable=True), + sa.Column("node_id", sa.String(length=255), nullable=False), + sa.Column("name", sa.String(length=255), nullable=False), + sa.Column("description", sa.String(length=255), nullable=False), + sa.Column("selector", sa.String(length=255), nullable=False), + sa.Column("value_type", sa.String(length=20), nullable=False), + sa.Column("value", sa.Text(), nullable=False), + sa.Column("visible", sa.Boolean(), nullable=False), + sa.Column("editable", sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("workflow_draft_variables_pkey")), + sa.UniqueConstraint("app_id", "node_id", "name", name=op.f("workflow_draft_variables_app_id_key")), + ) + else: + op.create_table( + "workflow_draft_variables", + sa.Column("id", models.types.StringUUID(), nullable=False), + sa.Column("created_at", sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column("updated_at", sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column("app_id", models.types.StringUUID(), nullable=False), + sa.Column("last_edited_at", sa.DateTime(), nullable=True), + sa.Column("node_id", sa.String(length=255), nullable=False), + sa.Column("name", sa.String(length=255), nullable=False), + sa.Column("description", sa.String(length=255), nullable=False), + sa.Column("selector", sa.String(length=255), nullable=False), + sa.Column("value_type", sa.String(length=20), nullable=False), + sa.Column("value", models.types.LongText(), nullable=False), + sa.Column("visible", sa.Boolean(), nullable=False), + sa.Column("editable", sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("workflow_draft_variables_pkey")), + sa.UniqueConstraint("app_id", "node_id", "name", name=op.f("workflow_draft_variables_app_id_key")), + ) # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_06_06_1424-4474872b0ee6_workflow_draft_varaibles_add_node_execution_id.py b/api/migrations/versions/2025_06_06_1424-4474872b0ee6_workflow_draft_varaibles_add_node_execution_id.py index d7a5d116c9..610064320a 100644 --- a/api/migrations/versions/2025_06_06_1424-4474872b0ee6_workflow_draft_varaibles_add_node_execution_id.py +++ b/api/migrations/versions/2025_06_06_1424-4474872b0ee6_workflow_draft_varaibles_add_node_execution_id.py @@ -7,6 +7,10 @@ Create Date: 2025-06-06 14:24:44.213018 """ from alembic import op import models as models + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" import sqlalchemy as sa @@ -18,19 +22,30 @@ depends_on = None def upgrade(): - # `CREATE INDEX CONCURRENTLY` cannot run within a transaction, so use the `autocommit_block` - # context manager to wrap the index creation statement. - # Reference: - # - # - https://www.postgresql.org/docs/current/sql-createindex.html#:~:text=Another%20difference%20is,CREATE%20INDEX%20CONCURRENTLY%20cannot. - # - https://alembic.sqlalchemy.org/en/latest/api/runtime.html#alembic.runtime.migration.MigrationContext.autocommit_block - with op.get_context().autocommit_block(): + # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + + if _is_pg(conn): + # `CREATE INDEX CONCURRENTLY` cannot run within a transaction, so use the `autocommit_block` + # context manager to wrap the index creation statement. + # Reference: + # + # - https://www.postgresql.org/docs/current/sql-createindex.html#:~:text=Another%20difference%20is,CREATE%20INDEX%20CONCURRENTLY%20cannot. + # - https://alembic.sqlalchemy.org/en/latest/api/runtime.html#alembic.runtime.migration.MigrationContext.autocommit_block + with op.get_context().autocommit_block(): + op.create_index( + op.f('workflow_node_executions_tenant_id_idx'), + "workflow_node_executions", + ['tenant_id', 'workflow_id', 'node_id', sa.literal_column('created_at DESC')], + unique=False, + postgresql_concurrently=True, + ) + else: op.create_index( op.f('workflow_node_executions_tenant_id_idx'), "workflow_node_executions", ['tenant_id', 'workflow_id', 'node_id', sa.literal_column('created_at DESC')], unique=False, - postgresql_concurrently=True, ) with op.batch_alter_table('workflow_draft_variables', schema=None) as batch_op: @@ -51,8 +66,13 @@ def downgrade(): # Reference: # # https://www.postgresql.org/docs/current/sql-createindex.html#:~:text=Another%20difference%20is,CREATE%20INDEX%20CONCURRENTLY%20cannot. - with op.get_context().autocommit_block(): - op.drop_index(op.f('workflow_node_executions_tenant_id_idx'), postgresql_concurrently=True) + conn = op.get_bind() + + if _is_pg(conn): + with op.get_context().autocommit_block(): + op.drop_index(op.f('workflow_node_executions_tenant_id_idx'), postgresql_concurrently=True) + else: + op.drop_index(op.f('workflow_node_executions_tenant_id_idx')) with op.batch_alter_table('workflow_draft_variables', schema=None) as batch_op: batch_op.drop_column('node_execution_id') diff --git a/api/migrations/versions/2025_06_25_0936-58eb7bdb93fe_add_mcp_server_tool_and_app_server.py b/api/migrations/versions/2025_06_25_0936-58eb7bdb93fe_add_mcp_server_tool_and_app_server.py index 0548bf05ef..83a7d1814c 100644 --- a/api/migrations/versions/2025_06_25_0936-58eb7bdb93fe_add_mcp_server_tool_and_app_server.py +++ b/api/migrations/versions/2025_06_25_0936-58eb7bdb93fe_add_mcp_server_tool_and_app_server.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa +def _is_pg(conn): + return conn.dialect.name == "postgresql" + + # revision identifiers, used by Alembic. revision = '58eb7bdb93fe' down_revision = '0ab65e1cc7fa' @@ -19,40 +23,80 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('app_mcp_servers', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('app_id', models.types.StringUUID(), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('description', sa.String(length=255), nullable=False), - sa.Column('server_code', sa.String(length=255), nullable=False), - sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'::character varying"), nullable=False), - sa.Column('parameters', sa.Text(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='app_mcp_server_pkey'), - sa.UniqueConstraint('tenant_id', 'app_id', name='unique_app_mcp_server_tenant_app_id'), - sa.UniqueConstraint('server_code', name='unique_app_mcp_server_server_code') - ) - op.create_table('tool_mcp_providers', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('name', sa.String(length=40), nullable=False), - sa.Column('server_identifier', sa.String(length=24), nullable=False), - sa.Column('server_url', sa.Text(), nullable=False), - sa.Column('server_url_hash', sa.String(length=64), nullable=False), - sa.Column('icon', sa.String(length=255), nullable=True), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('user_id', models.types.StringUUID(), nullable=False), - sa.Column('encrypted_credentials', sa.Text(), nullable=True), - sa.Column('authed', sa.Boolean(), nullable=False), - sa.Column('tools', sa.Text(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tool_mcp_provider_pkey'), - sa.UniqueConstraint('tenant_id', 'name', name='unique_mcp_provider_name'), - sa.UniqueConstraint('tenant_id', 'server_identifier', name='unique_mcp_provider_server_identifier'), - sa.UniqueConstraint('tenant_id', 'server_url_hash', name='unique_mcp_provider_server_url') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('app_mcp_servers', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('description', sa.String(length=255), nullable=False), + sa.Column('server_code', sa.String(length=255), nullable=False), + sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'::character varying"), nullable=False), + sa.Column('parameters', sa.Text(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_mcp_server_pkey'), + sa.UniqueConstraint('tenant_id', 'app_id', name='unique_app_mcp_server_tenant_app_id'), + sa.UniqueConstraint('server_code', name='unique_app_mcp_server_server_code') + ) + else: + op.create_table('app_mcp_servers', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('description', sa.String(length=255), nullable=False), + sa.Column('server_code', sa.String(length=255), nullable=False), + sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'"), nullable=False), + sa.Column('parameters', models.types.LongText(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_mcp_server_pkey'), + sa.UniqueConstraint('tenant_id', 'app_id', name='unique_app_mcp_server_tenant_app_id'), + sa.UniqueConstraint('server_code', name='unique_app_mcp_server_server_code') + ) + if _is_pg(conn): + op.create_table('tool_mcp_providers', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('name', sa.String(length=40), nullable=False), + sa.Column('server_identifier', sa.String(length=24), nullable=False), + sa.Column('server_url', sa.Text(), nullable=False), + sa.Column('server_url_hash', sa.String(length=64), nullable=False), + sa.Column('icon', sa.String(length=255), nullable=True), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('user_id', models.types.StringUUID(), nullable=False), + sa.Column('encrypted_credentials', sa.Text(), nullable=True), + sa.Column('authed', sa.Boolean(), nullable=False), + sa.Column('tools', sa.Text(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_mcp_provider_pkey'), + sa.UniqueConstraint('tenant_id', 'name', name='unique_mcp_provider_name'), + sa.UniqueConstraint('tenant_id', 'server_identifier', name='unique_mcp_provider_server_identifier'), + sa.UniqueConstraint('tenant_id', 'server_url_hash', name='unique_mcp_provider_server_url') + ) + else: + op.create_table('tool_mcp_providers', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=40), nullable=False), + sa.Column('server_identifier', sa.String(length=24), nullable=False), + sa.Column('server_url', models.types.LongText(), nullable=False), + sa.Column('server_url_hash', sa.String(length=64), nullable=False), + sa.Column('icon', sa.String(length=255), nullable=True), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('user_id', models.types.StringUUID(), nullable=False), + sa.Column('encrypted_credentials', models.types.LongText(), nullable=True), + sa.Column('authed', sa.Boolean(), nullable=False), + sa.Column('tools', models.types.LongText(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_mcp_provider_pkey'), + sa.UniqueConstraint('tenant_id', 'name', name='unique_mcp_provider_name'), + sa.UniqueConstraint('tenant_id', 'server_identifier', name='unique_mcp_provider_server_identifier'), + sa.UniqueConstraint('tenant_id', 'server_url_hash', name='unique_mcp_provider_server_url') + ) # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_07_02_2332-1c9ba48be8e4_add_uuidv7_function_in_sql.py b/api/migrations/versions/2025_07_02_2332-1c9ba48be8e4_add_uuidv7_function_in_sql.py index 2bbbb3d28e..1aa92b7d50 100644 --- a/api/migrations/versions/2025_07_02_2332-1c9ba48be8e4_add_uuidv7_function_in_sql.py +++ b/api/migrations/versions/2025_07_02_2332-1c9ba48be8e4_add_uuidv7_function_in_sql.py @@ -27,6 +27,10 @@ import models as models import sqlalchemy as sa +def _is_pg(conn): + return conn.dialect.name == "postgresql" + + # revision identifiers, used by Alembic. revision = '1c9ba48be8e4' down_revision = '58eb7bdb93fe' @@ -40,7 +44,11 @@ def upgrade(): # The ability to specify source timestamp has been removed because its type signature is incompatible with # PostgreSQL 18's `uuidv7` function. This capability is rarely needed in practice, as IDs can be # generated and controlled within the application layer. - op.execute(sa.text(r""" + conn = op.get_bind() + + if _is_pg(conn): + # PostgreSQL: Create uuidv7 functions + op.execute(sa.text(r""" /* Main function to generate a uuidv7 value with millisecond precision */ CREATE FUNCTION uuidv7() RETURNS uuid AS @@ -63,7 +71,7 @@ COMMENT ON FUNCTION uuidv7 IS 'Generate a uuid-v7 value with a 48-bit timestamp (millisecond precision) and 74 bits of randomness'; """)) - op.execute(sa.text(r""" + op.execute(sa.text(r""" CREATE FUNCTION uuidv7_boundary(timestamptz) RETURNS uuid AS $$ @@ -79,8 +87,15 @@ COMMENT ON FUNCTION uuidv7_boundary(timestamptz) IS 'Generate a non-random uuidv7 with the given timestamp (first 48 bits) and all random bits to 0. As the smallest possible uuidv7 for that timestamp, it may be used as a boundary for partitions.'; """ )) + else: + pass def downgrade(): - op.execute(sa.text("DROP FUNCTION uuidv7")) - op.execute(sa.text("DROP FUNCTION uuidv7_boundary")) + conn = op.get_bind() + + if _is_pg(conn): + op.execute(sa.text("DROP FUNCTION uuidv7")) + op.execute(sa.text("DROP FUNCTION uuidv7_boundary")) + else: + pass diff --git a/api/migrations/versions/2025_07_04_1705-71f5020c6470_tool_oauth.py b/api/migrations/versions/2025_07_04_1705-71f5020c6470_tool_oauth.py index df4fbf0a0e..e22af7cb8a 100644 --- a/api/migrations/versions/2025_07_04_1705-71f5020c6470_tool_oauth.py +++ b/api/migrations/versions/2025_07_04_1705-71f5020c6470_tool_oauth.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa +def _is_pg(conn): + return conn.dialect.name == "postgresql" + + # revision identifiers, used by Alembic. revision = '71f5020c6470' down_revision = '1c9ba48be8e4' @@ -19,31 +23,63 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tool_oauth_system_clients', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('plugin_id', sa.String(length=512), nullable=False), - sa.Column('provider', sa.String(length=255), nullable=False), - sa.Column('encrypted_oauth_params', sa.Text(), nullable=False), - sa.PrimaryKeyConstraint('id', name='tool_oauth_system_client_pkey'), - sa.UniqueConstraint('plugin_id', 'provider', name='tool_oauth_system_client_plugin_id_provider_idx') - ) - op.create_table('tool_oauth_tenant_clients', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('plugin_id', sa.String(length=512), nullable=False), - sa.Column('provider', sa.String(length=255), nullable=False), - sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), - sa.Column('encrypted_oauth_params', sa.Text(), nullable=False), - sa.PrimaryKeyConstraint('id', name='tool_oauth_tenant_client_pkey'), - sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', name='unique_tool_oauth_tenant_client') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('tool_oauth_system_clients', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('plugin_id', sa.String(length=512), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('encrypted_oauth_params', sa.Text(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_oauth_system_client_pkey'), + sa.UniqueConstraint('plugin_id', 'provider', name='tool_oauth_system_client_plugin_id_provider_idx') + ) + else: + op.create_table('tool_oauth_system_clients', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('plugin_id', sa.String(length=512), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('encrypted_oauth_params', models.types.LongText(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_oauth_system_client_pkey'), + sa.UniqueConstraint('plugin_id', 'provider', name='tool_oauth_system_client_plugin_id_provider_idx') + ) + if _is_pg(conn): + op.create_table('tool_oauth_tenant_clients', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('plugin_id', sa.String(length=512), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('encrypted_oauth_params', sa.Text(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_oauth_tenant_client_pkey'), + sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', name='unique_tool_oauth_tenant_client') + ) + else: + op.create_table('tool_oauth_tenant_clients', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('plugin_id', sa.String(length=255), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('encrypted_oauth_params', models.types.LongText(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_oauth_tenant_client_pkey'), + sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', name='unique_tool_oauth_tenant_client') + ) - with op.batch_alter_table('tool_builtin_providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('name', sa.String(length=256), server_default=sa.text("'API KEY 1'::character varying"), nullable=False)) - batch_op.add_column(sa.Column('is_default', sa.Boolean(), server_default=sa.text('false'), nullable=False)) - batch_op.add_column(sa.Column('credential_type', sa.String(length=32), server_default=sa.text("'api-key'::character varying"), nullable=False)) - batch_op.drop_constraint(batch_op.f('unique_builtin_tool_provider'), type_='unique') - batch_op.create_unique_constraint(batch_op.f('unique_builtin_tool_provider'), ['tenant_id', 'provider', 'name']) + if _is_pg(conn): + with op.batch_alter_table('tool_builtin_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('name', sa.String(length=256), server_default=sa.text("'API KEY 1'::character varying"), nullable=False)) + batch_op.add_column(sa.Column('is_default', sa.Boolean(), server_default=sa.text('false'), nullable=False)) + batch_op.add_column(sa.Column('credential_type', sa.String(length=32), server_default=sa.text("'api-key'::character varying"), nullable=False)) + batch_op.drop_constraint(batch_op.f('unique_builtin_tool_provider'), type_='unique') + batch_op.create_unique_constraint(batch_op.f('unique_builtin_tool_provider'), ['tenant_id', 'provider', 'name']) + else: + with op.batch_alter_table('tool_builtin_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('name', sa.String(length=256), server_default=sa.text("'API KEY 1'"), nullable=False)) + batch_op.add_column(sa.Column('is_default', sa.Boolean(), server_default=sa.text('false'), nullable=False)) + batch_op.add_column(sa.Column('credential_type', sa.String(length=32), server_default=sa.text("'api-key'"), nullable=False)) + batch_op.drop_constraint(batch_op.f('unique_builtin_tool_provider'), type_='unique') + batch_op.create_unique_constraint(batch_op.f('unique_builtin_tool_provider'), ['tenant_id', 'provider', 'name']) # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_07_23_1508-8bcc02c9bd07_add_tenant_plugin_autoupgrade_table.py b/api/migrations/versions/2025_07_23_1508-8bcc02c9bd07_add_tenant_plugin_autoupgrade_table.py index 4ff0402a97..48b6ceb145 100644 --- a/api/migrations/versions/2025_07_23_1508-8bcc02c9bd07_add_tenant_plugin_autoupgrade_table.py +++ b/api/migrations/versions/2025_07_23_1508-8bcc02c9bd07_add_tenant_plugin_autoupgrade_table.py @@ -10,6 +10,10 @@ import models as models import sqlalchemy as sa from sqlalchemy.dialects import postgresql + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '8bcc02c9bd07' down_revision = '375fe79ead14' @@ -19,19 +23,36 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tenant_plugin_auto_upgrade_strategies', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('strategy_setting', sa.String(length=16), server_default='fix_only', nullable=False), - sa.Column('upgrade_time_of_day', sa.Integer(), nullable=False), - sa.Column('upgrade_mode', sa.String(length=16), server_default='exclude', nullable=False), - sa.Column('exclude_plugins', sa.ARRAY(sa.String(length=255)), nullable=False), - sa.Column('include_plugins', sa.ARRAY(sa.String(length=255)), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tenant_plugin_auto_upgrade_strategy_pkey'), - sa.UniqueConstraint('tenant_id', name='unique_tenant_plugin_auto_upgrade_strategy') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('tenant_plugin_auto_upgrade_strategies', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('strategy_setting', sa.String(length=16), server_default='fix_only', nullable=False), + sa.Column('upgrade_time_of_day', sa.Integer(), nullable=False), + sa.Column('upgrade_mode', sa.String(length=16), server_default='exclude', nullable=False), + sa.Column('exclude_plugins', sa.ARRAY(sa.String(length=255)), nullable=False), + sa.Column('include_plugins', sa.ARRAY(sa.String(length=255)), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tenant_plugin_auto_upgrade_strategy_pkey'), + sa.UniqueConstraint('tenant_id', name='unique_tenant_plugin_auto_upgrade_strategy') + ) + else: + op.create_table('tenant_plugin_auto_upgrade_strategies', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('strategy_setting', sa.String(length=16), server_default='fix_only', nullable=False), + sa.Column('upgrade_time_of_day', sa.Integer(), nullable=False), + sa.Column('upgrade_mode', sa.String(length=16), server_default='exclude', nullable=False), + sa.Column('exclude_plugins', sa.JSON(), nullable=False), + sa.Column('include_plugins', sa.JSON(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tenant_plugin_auto_upgrade_strategy_pkey'), + sa.UniqueConstraint('tenant_id', name='unique_tenant_plugin_auto_upgrade_strategy') + ) # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_07_24_1450-532b3f888abf_manual_dataset_field_update.py b/api/migrations/versions/2025_07_24_1450-532b3f888abf_manual_dataset_field_update.py index 1664fb99c4..2597067e81 100644 --- a/api/migrations/versions/2025_07_24_1450-532b3f888abf_manual_dataset_field_update.py +++ b/api/migrations/versions/2025_07_24_1450-532b3f888abf_manual_dataset_field_update.py @@ -7,6 +7,10 @@ Create Date: 2025-07-24 14:50:48.779833 """ from alembic import op import models as models + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" import sqlalchemy as sa @@ -18,8 +22,18 @@ depends_on = None def upgrade(): - op.execute("ALTER TABLE tidb_auth_bindings ALTER COLUMN status SET DEFAULT 'CREATING'::character varying") + conn = op.get_bind() + + if _is_pg(conn): + op.execute("ALTER TABLE tidb_auth_bindings ALTER COLUMN status SET DEFAULT 'CREATING'::character varying") + else: + op.execute("ALTER TABLE tidb_auth_bindings ALTER COLUMN status SET DEFAULT 'CREATING'") def downgrade(): - op.execute("ALTER TABLE tidb_auth_bindings ALTER COLUMN status SET DEFAULT 'CREATING'") + conn = op.get_bind() + + if _is_pg(conn): + op.execute("ALTER TABLE tidb_auth_bindings ALTER COLUMN status SET DEFAULT 'CREATING'::character varying") + else: + op.execute("ALTER TABLE tidb_auth_bindings ALTER COLUMN status SET DEFAULT 'CREATING'") diff --git a/api/migrations/versions/2025_08_09_1553-e8446f481c1e_add_provider_credential_pool_support.py b/api/migrations/versions/2025_08_09_1553-e8446f481c1e_add_provider_credential_pool_support.py index da8b1aa796..18e1b8d601 100644 --- a/api/migrations/versions/2025_08_09_1553-e8446f481c1e_add_provider_credential_pool_support.py +++ b/api/migrations/versions/2025_08_09_1553-e8446f481c1e_add_provider_credential_pool_support.py @@ -11,6 +11,10 @@ import models as models import sqlalchemy as sa from sqlalchemy.sql import table, column + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'e8446f481c1e' down_revision = 'fa8b0fa6f407' @@ -20,16 +24,30 @@ depends_on = None def upgrade(): # Create provider_credentials table - op.create_table('provider_credentials', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('provider_name', sa.String(length=255), nullable=False), - sa.Column('credential_name', sa.String(length=255), nullable=False), - sa.Column('encrypted_config', sa.Text(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='provider_credential_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('provider_credentials', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=255), nullable=False), + sa.Column('credential_name', sa.String(length=255), nullable=False), + sa.Column('encrypted_config', sa.Text(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='provider_credential_pkey') + ) + else: + op.create_table('provider_credentials', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=255), nullable=False), + sa.Column('credential_name', sa.String(length=255), nullable=False), + sa.Column('encrypted_config', models.types.LongText(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='provider_credential_pkey') + ) # Create index for provider_credentials with op.batch_alter_table('provider_credentials', schema=None) as batch_op: @@ -60,27 +78,49 @@ def upgrade(): def migrate_existing_providers_data(): """migrate providers table data to provider_credentials""" - + conn = op.get_bind() # Define table structure for data manipulation - providers_table = table('providers', - column('id', models.types.StringUUID()), - column('tenant_id', models.types.StringUUID()), - column('provider_name', sa.String()), - column('encrypted_config', sa.Text()), - column('created_at', sa.DateTime()), - column('updated_at', sa.DateTime()), - column('credential_id', models.types.StringUUID()), - ) + if _is_pg(conn): + providers_table = table('providers', + column('id', models.types.StringUUID()), + column('tenant_id', models.types.StringUUID()), + column('provider_name', sa.String()), + column('encrypted_config', sa.Text()), + column('created_at', sa.DateTime()), + column('updated_at', sa.DateTime()), + column('credential_id', models.types.StringUUID()), + ) + else: + providers_table = table('providers', + column('id', models.types.StringUUID()), + column('tenant_id', models.types.StringUUID()), + column('provider_name', sa.String()), + column('encrypted_config', models.types.LongText()), + column('created_at', sa.DateTime()), + column('updated_at', sa.DateTime()), + column('credential_id', models.types.StringUUID()), + ) - provider_credential_table = table('provider_credentials', - column('id', models.types.StringUUID()), - column('tenant_id', models.types.StringUUID()), - column('provider_name', sa.String()), - column('credential_name', sa.String()), - column('encrypted_config', sa.Text()), - column('created_at', sa.DateTime()), - column('updated_at', sa.DateTime()) - ) + if _is_pg(conn): + provider_credential_table = table('provider_credentials', + column('id', models.types.StringUUID()), + column('tenant_id', models.types.StringUUID()), + column('provider_name', sa.String()), + column('credential_name', sa.String()), + column('encrypted_config', sa.Text()), + column('created_at', sa.DateTime()), + column('updated_at', sa.DateTime()) + ) + else: + provider_credential_table = table('provider_credentials', + column('id', models.types.StringUUID()), + column('tenant_id', models.types.StringUUID()), + column('provider_name', sa.String()), + column('credential_name', sa.String()), + column('encrypted_config', models.types.LongText()), + column('created_at', sa.DateTime()), + column('updated_at', sa.DateTime()) + ) # Get database connection conn = op.get_bind() @@ -123,8 +163,14 @@ def migrate_existing_providers_data(): def downgrade(): # Re-add encrypted_config column to providers table - with op.batch_alter_table('providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('encrypted_config', sa.Text(), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('encrypted_config', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('encrypted_config', models.types.LongText(), nullable=True)) # Migrate data back from provider_credentials to providers diff --git a/api/migrations/versions/2025_08_13_1605-0e154742a5fa_add_provider_model_multi_credential.py b/api/migrations/versions/2025_08_13_1605-0e154742a5fa_add_provider_model_multi_credential.py index f03a215505..16ca902726 100644 --- a/api/migrations/versions/2025_08_13_1605-0e154742a5fa_add_provider_model_multi_credential.py +++ b/api/migrations/versions/2025_08_13_1605-0e154742a5fa_add_provider_model_multi_credential.py @@ -13,6 +13,10 @@ import sqlalchemy as sa from sqlalchemy.sql import table, column +def _is_pg(conn): + return conn.dialect.name == "postgresql" + + # revision identifiers, used by Alembic. revision = '0e154742a5fa' down_revision = 'e8446f481c1e' @@ -22,18 +26,34 @@ depends_on = None def upgrade(): # Create provider_model_credentials table - op.create_table('provider_model_credentials', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('provider_name', sa.String(length=255), nullable=False), - sa.Column('model_name', sa.String(length=255), nullable=False), - sa.Column('model_type', sa.String(length=40), nullable=False), - sa.Column('credential_name', sa.String(length=255), nullable=False), - sa.Column('encrypted_config', sa.Text(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='provider_model_credential_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('provider_model_credentials', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=255), nullable=False), + sa.Column('model_name', sa.String(length=255), nullable=False), + sa.Column('model_type', sa.String(length=40), nullable=False), + sa.Column('credential_name', sa.String(length=255), nullable=False), + sa.Column('encrypted_config', sa.Text(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='provider_model_credential_pkey') + ) + else: + op.create_table('provider_model_credentials', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=255), nullable=False), + sa.Column('model_name', sa.String(length=255), nullable=False), + sa.Column('model_type', sa.String(length=40), nullable=False), + sa.Column('credential_name', sa.String(length=255), nullable=False), + sa.Column('encrypted_config', models.types.LongText(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='provider_model_credential_pkey') + ) # Create index for provider_model_credentials with op.batch_alter_table('provider_model_credentials', schema=None) as batch_op: @@ -66,31 +86,57 @@ def upgrade(): def migrate_existing_provider_models_data(): """migrate provider_models table data to provider_model_credentials""" - + conn = op.get_bind() # Define table structure for data manipulation - provider_models_table = table('provider_models', - column('id', models.types.StringUUID()), - column('tenant_id', models.types.StringUUID()), - column('provider_name', sa.String()), - column('model_name', sa.String()), - column('model_type', sa.String()), - column('encrypted_config', sa.Text()), - column('created_at', sa.DateTime()), - column('updated_at', sa.DateTime()), - column('credential_id', models.types.StringUUID()), - ) + if _is_pg(conn): + provider_models_table = table('provider_models', + column('id', models.types.StringUUID()), + column('tenant_id', models.types.StringUUID()), + column('provider_name', sa.String()), + column('model_name', sa.String()), + column('model_type', sa.String()), + column('encrypted_config', sa.Text()), + column('created_at', sa.DateTime()), + column('updated_at', sa.DateTime()), + column('credential_id', models.types.StringUUID()), + ) + else: + provider_models_table = table('provider_models', + column('id', models.types.StringUUID()), + column('tenant_id', models.types.StringUUID()), + column('provider_name', sa.String()), + column('model_name', sa.String()), + column('model_type', sa.String()), + column('encrypted_config', models.types.LongText()), + column('created_at', sa.DateTime()), + column('updated_at', sa.DateTime()), + column('credential_id', models.types.StringUUID()), + ) - provider_model_credentials_table = table('provider_model_credentials', - column('id', models.types.StringUUID()), - column('tenant_id', models.types.StringUUID()), - column('provider_name', sa.String()), - column('model_name', sa.String()), - column('model_type', sa.String()), - column('credential_name', sa.String()), - column('encrypted_config', sa.Text()), - column('created_at', sa.DateTime()), - column('updated_at', sa.DateTime()) - ) + if _is_pg(conn): + provider_model_credentials_table = table('provider_model_credentials', + column('id', models.types.StringUUID()), + column('tenant_id', models.types.StringUUID()), + column('provider_name', sa.String()), + column('model_name', sa.String()), + column('model_type', sa.String()), + column('credential_name', sa.String()), + column('encrypted_config', sa.Text()), + column('created_at', sa.DateTime()), + column('updated_at', sa.DateTime()) + ) + else: + provider_model_credentials_table = table('provider_model_credentials', + column('id', models.types.StringUUID()), + column('tenant_id', models.types.StringUUID()), + column('provider_name', sa.String()), + column('model_name', sa.String()), + column('model_type', sa.String()), + column('credential_name', sa.String()), + column('encrypted_config', models.types.LongText()), + column('created_at', sa.DateTime()), + column('updated_at', sa.DateTime()) + ) # Get database connection @@ -137,8 +183,14 @@ def migrate_existing_provider_models_data(): def downgrade(): # Re-add encrypted_config column to provider_models table - with op.batch_alter_table('provider_models', schema=None) as batch_op: - batch_op.add_column(sa.Column('encrypted_config', sa.Text(), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('provider_models', schema=None) as batch_op: + batch_op.add_column(sa.Column('encrypted_config', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('provider_models', schema=None) as batch_op: + batch_op.add_column(sa.Column('encrypted_config', models.types.LongText(), nullable=True)) if not context.is_offline_mode(): # Migrate data back from provider_model_credentials to provider_models diff --git a/api/migrations/versions/2025_08_20_1747-8d289573e1da_add_oauth_provider_apps.py b/api/migrations/versions/2025_08_20_1747-8d289573e1da_add_oauth_provider_apps.py index 3a3186bcbc..75b4d61173 100644 --- a/api/migrations/versions/2025_08_20_1747-8d289573e1da_add_oauth_provider_apps.py +++ b/api/migrations/versions/2025_08_20_1747-8d289573e1da_add_oauth_provider_apps.py @@ -8,6 +8,11 @@ Create Date: 2025-08-20 17:47:17.015695 from alembic import op import models as models import sqlalchemy as sa +from libs.uuid_utils import uuidv7 + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" # revision identifiers, used by Alembic. @@ -19,17 +24,33 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('oauth_provider_apps', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('app_icon', sa.String(length=255), nullable=False), - sa.Column('app_label', sa.JSON(), server_default='{}', nullable=False), - sa.Column('client_id', sa.String(length=255), nullable=False), - sa.Column('client_secret', sa.String(length=255), nullable=False), - sa.Column('redirect_uris', sa.JSON(), server_default='[]', nullable=False), - sa.Column('scope', sa.String(length=255), server_default=sa.text("'read:name read:email read:avatar read:interface_language read:timezone'"), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='oauth_provider_app_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('oauth_provider_apps', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('app_icon', sa.String(length=255), nullable=False), + sa.Column('app_label', sa.JSON(), server_default='{}', nullable=False), + sa.Column('client_id', sa.String(length=255), nullable=False), + sa.Column('client_secret', sa.String(length=255), nullable=False), + sa.Column('redirect_uris', sa.JSON(), server_default='[]', nullable=False), + sa.Column('scope', sa.String(length=255), server_default=sa.text("'read:name read:email read:avatar read:interface_language read:timezone'"), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='oauth_provider_app_pkey') + ) + else: + op.create_table('oauth_provider_apps', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_icon', sa.String(length=255), nullable=False), + sa.Column('app_label', sa.JSON(), default='{}', nullable=False), + sa.Column('client_id', sa.String(length=255), nullable=False), + sa.Column('client_secret', sa.String(length=255), nullable=False), + sa.Column('redirect_uris', sa.JSON(), default='[]', nullable=False), + sa.Column('scope', sa.String(length=255), server_default=sa.text("'read:name read:email read:avatar read:interface_language read:timezone'"), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='oauth_provider_app_pkey') + ) + with op.batch_alter_table('oauth_provider_apps', schema=None) as batch_op: batch_op.create_index('oauth_provider_app_client_id_idx', ['client_id'], unique=False) diff --git a/api/migrations/versions/2025_09_08_1007-c20211f18133_add_headers_to_mcp_provider.py b/api/migrations/versions/2025_09_08_1007-c20211f18133_add_headers_to_mcp_provider.py index 99d47478f3..4f472fe4b4 100644 --- a/api/migrations/versions/2025_09_08_1007-c20211f18133_add_headers_to_mcp_provider.py +++ b/api/migrations/versions/2025_09_08_1007-c20211f18133_add_headers_to_mcp_provider.py @@ -7,6 +7,10 @@ Create Date: 2025-08-29 10:07:54.163626 """ from alembic import op import models as models + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" import sqlalchemy as sa @@ -19,7 +23,12 @@ depends_on = None def upgrade(): # Add encrypted_headers column to tool_mcp_providers table - op.add_column('tool_mcp_providers', sa.Column('encrypted_headers', sa.Text(), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + op.add_column('tool_mcp_providers', sa.Column('encrypted_headers', sa.Text(), nullable=True)) + else: + op.add_column('tool_mcp_providers', sa.Column('encrypted_headers', models.types.LongText(), nullable=True)) def downgrade(): diff --git a/api/migrations/versions/2025_09_11_1537-cf7c38a32b2d_add_credential_status_for_provider_table.py b/api/migrations/versions/2025_09_11_1537-cf7c38a32b2d_add_credential_status_for_provider_table.py index 17467e6495..4f78f346f4 100644 --- a/api/migrations/versions/2025_09_11_1537-cf7c38a32b2d_add_credential_status_for_provider_table.py +++ b/api/migrations/versions/2025_09_11_1537-cf7c38a32b2d_add_credential_status_for_provider_table.py @@ -7,6 +7,9 @@ Create Date: 2025-09-11 15:37:17.771298 """ from alembic import op import models as models + +def _is_pg(conn): + return conn.dialect.name == "postgresql" import sqlalchemy as sa @@ -19,8 +22,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('credential_status', sa.String(length=20), server_default=sa.text("'active'::character varying"), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('credential_status', sa.String(length=20), server_default=sa.text("'active'::character varying"), nullable=True)) + else: + with op.batch_alter_table('providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('credential_status', sa.String(length=20), server_default=sa.text("'active'"), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_09_17_1515-68519ad5cd18_knowledge_pipeline_migrate.py b/api/migrations/versions/2025_09_17_1515-68519ad5cd18_knowledge_pipeline_migrate.py index 53a95141ec..8eac0dee10 100644 --- a/api/migrations/versions/2025_09_17_1515-68519ad5cd18_knowledge_pipeline_migrate.py +++ b/api/migrations/versions/2025_09_17_1515-68519ad5cd18_knowledge_pipeline_migrate.py @@ -9,6 +9,11 @@ from alembic import op import models as models import sqlalchemy as sa from sqlalchemy.dialects import postgresql +from libs.uuid_utils import uuidv7 + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" # revision identifiers, used by Alembic. revision = '68519ad5cd18' @@ -19,152 +24,314 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('datasource_oauth_params', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('plugin_id', sa.String(length=255), nullable=False), - sa.Column('provider', sa.String(length=255), nullable=False), - sa.Column('system_credentials', postgresql.JSONB(astext_type=sa.Text()), nullable=False), - sa.PrimaryKeyConstraint('id', name='datasource_oauth_config_pkey'), - sa.UniqueConstraint('plugin_id', 'provider', name='datasource_oauth_config_datasource_id_provider_idx') - ) - op.create_table('datasource_oauth_tenant_params', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('provider', sa.String(length=255), nullable=False), - sa.Column('plugin_id', sa.String(length=255), nullable=False), - sa.Column('client_params', postgresql.JSONB(astext_type=sa.Text()), nullable=False), - sa.Column('enabled', sa.Boolean(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='datasource_oauth_tenant_config_pkey'), - sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', name='datasource_oauth_tenant_config_unique') - ) - op.create_table('datasource_providers', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('provider', sa.String(length=255), nullable=False), - sa.Column('plugin_id', sa.String(length=255), nullable=False), - sa.Column('auth_type', sa.String(length=255), nullable=False), - sa.Column('encrypted_credentials', postgresql.JSONB(astext_type=sa.Text()), nullable=False), - sa.Column('avatar_url', sa.Text(), nullable=True), - sa.Column('is_default', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('expires_at', sa.Integer(), server_default='-1', nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='datasource_provider_pkey'), - sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', 'name', name='datasource_provider_unique_name') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('datasource_oauth_params', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('plugin_id', sa.String(length=255), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('system_credentials', postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.PrimaryKeyConstraint('id', name='datasource_oauth_config_pkey'), + sa.UniqueConstraint('plugin_id', 'provider', name='datasource_oauth_config_datasource_id_provider_idx') + ) + else: + op.create_table('datasource_oauth_params', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('plugin_id', sa.String(length=255), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('system_credentials', models.types.AdjustedJSON(astext_type=sa.Text()), nullable=False), + sa.PrimaryKeyConstraint('id', name='datasource_oauth_config_pkey'), + sa.UniqueConstraint('plugin_id', 'provider', name='datasource_oauth_config_datasource_id_provider_idx') + ) + if _is_pg(conn): + op.create_table('datasource_oauth_tenant_params', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('plugin_id', sa.String(length=255), nullable=False), + sa.Column('client_params', postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column('enabled', sa.Boolean(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='datasource_oauth_tenant_config_pkey'), + sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', name='datasource_oauth_tenant_config_unique') + ) + else: + op.create_table('datasource_oauth_tenant_params', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('plugin_id', sa.String(length=255), nullable=False), + sa.Column('client_params', models.types.AdjustedJSON(astext_type=sa.Text()), nullable=False), + sa.Column('enabled', sa.Boolean(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='datasource_oauth_tenant_config_pkey'), + sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', name='datasource_oauth_tenant_config_unique') + ) + if _is_pg(conn): + op.create_table('datasource_providers', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('plugin_id', sa.String(length=255), nullable=False), + sa.Column('auth_type', sa.String(length=255), nullable=False), + sa.Column('encrypted_credentials', postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column('avatar_url', sa.Text(), nullable=True), + sa.Column('is_default', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('expires_at', sa.Integer(), server_default='-1', nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='datasource_provider_pkey'), + sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', 'name', name='datasource_provider_unique_name') + ) + else: + op.create_table('datasource_providers', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('provider', sa.String(length=128), nullable=False), + sa.Column('plugin_id', sa.String(length=255), nullable=False), + sa.Column('auth_type', sa.String(length=255), nullable=False), + sa.Column('encrypted_credentials', models.types.AdjustedJSON(astext_type=sa.Text()), nullable=False), + sa.Column('avatar_url', models.types.LongText(), nullable=True), + sa.Column('is_default', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('expires_at', sa.Integer(), server_default='-1', nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='datasource_provider_pkey'), + sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', 'name', name='datasource_provider_unique_name') + ) with op.batch_alter_table('datasource_providers', schema=None) as batch_op: batch_op.create_index('datasource_provider_auth_type_provider_idx', ['tenant_id', 'plugin_id', 'provider'], unique=False) - op.create_table('document_pipeline_execution_logs', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('pipeline_id', models.types.StringUUID(), nullable=False), - sa.Column('document_id', models.types.StringUUID(), nullable=False), - sa.Column('datasource_type', sa.String(length=255), nullable=False), - sa.Column('datasource_info', sa.Text(), nullable=False), - sa.Column('datasource_node_id', sa.String(length=255), nullable=False), - sa.Column('input_data', sa.JSON(), nullable=False), - sa.Column('created_by', models.types.StringUUID(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='document_pipeline_execution_log_pkey') - ) + if _is_pg(conn): + op.create_table('document_pipeline_execution_logs', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('pipeline_id', models.types.StringUUID(), nullable=False), + sa.Column('document_id', models.types.StringUUID(), nullable=False), + sa.Column('datasource_type', sa.String(length=255), nullable=False), + sa.Column('datasource_info', sa.Text(), nullable=False), + sa.Column('datasource_node_id', sa.String(length=255), nullable=False), + sa.Column('input_data', sa.JSON(), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='document_pipeline_execution_log_pkey') + ) + else: + op.create_table('document_pipeline_execution_logs', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('pipeline_id', models.types.StringUUID(), nullable=False), + sa.Column('document_id', models.types.StringUUID(), nullable=False), + sa.Column('datasource_type', sa.String(length=255), nullable=False), + sa.Column('datasource_info', models.types.LongText(), nullable=False), + sa.Column('datasource_node_id', sa.String(length=255), nullable=False), + sa.Column('input_data', sa.JSON(), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='document_pipeline_execution_log_pkey') + ) with op.batch_alter_table('document_pipeline_execution_logs', schema=None) as batch_op: batch_op.create_index('document_pipeline_execution_logs_document_id_idx', ['document_id'], unique=False) - op.create_table('pipeline_built_in_templates', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('description', sa.Text(), nullable=False), - sa.Column('chunk_structure', sa.String(length=255), nullable=False), - sa.Column('icon', sa.JSON(), nullable=False), - sa.Column('yaml_content', sa.Text(), nullable=False), - sa.Column('copyright', sa.String(length=255), nullable=False), - sa.Column('privacy_policy', sa.String(length=255), nullable=False), - sa.Column('position', sa.Integer(), nullable=False), - sa.Column('install_count', sa.Integer(), nullable=False), - sa.Column('language', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('created_by', models.types.StringUUID(), nullable=False), - sa.Column('updated_by', models.types.StringUUID(), nullable=True), - sa.PrimaryKeyConstraint('id', name='pipeline_built_in_template_pkey') - ) - op.create_table('pipeline_customized_templates', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('description', sa.Text(), nullable=False), - sa.Column('chunk_structure', sa.String(length=255), nullable=False), - sa.Column('icon', sa.JSON(), nullable=False), - sa.Column('position', sa.Integer(), nullable=False), - sa.Column('yaml_content', sa.Text(), nullable=False), - sa.Column('install_count', sa.Integer(), nullable=False), - sa.Column('language', sa.String(length=255), nullable=False), - sa.Column('created_by', models.types.StringUUID(), nullable=False), - sa.Column('updated_by', models.types.StringUUID(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='pipeline_customized_template_pkey') - ) + if _is_pg(conn): + op.create_table('pipeline_built_in_templates', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('description', sa.Text(), nullable=False), + sa.Column('chunk_structure', sa.String(length=255), nullable=False), + sa.Column('icon', sa.JSON(), nullable=False), + sa.Column('yaml_content', sa.Text(), nullable=False), + sa.Column('copyright', sa.String(length=255), nullable=False), + sa.Column('privacy_policy', sa.String(length=255), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('install_count', sa.Integer(), nullable=False), + sa.Column('language', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.PrimaryKeyConstraint('id', name='pipeline_built_in_template_pkey') + ) + else: + op.create_table('pipeline_built_in_templates', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('description', models.types.LongText(), nullable=False), + sa.Column('chunk_structure', sa.String(length=255), nullable=False), + sa.Column('icon', sa.JSON(), nullable=False), + sa.Column('yaml_content', models.types.LongText(), nullable=False), + sa.Column('copyright', sa.String(length=255), nullable=False), + sa.Column('privacy_policy', sa.String(length=255), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('install_count', sa.Integer(), nullable=False), + sa.Column('language', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.PrimaryKeyConstraint('id', name='pipeline_built_in_template_pkey') + ) + if _is_pg(conn): + op.create_table('pipeline_customized_templates', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('description', sa.Text(), nullable=False), + sa.Column('chunk_structure', sa.String(length=255), nullable=False), + sa.Column('icon', sa.JSON(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('yaml_content', sa.Text(), nullable=False), + sa.Column('install_count', sa.Integer(), nullable=False), + sa.Column('language', sa.String(length=255), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='pipeline_customized_template_pkey') + ) + else: + # MySQL: Use compatible syntax + op.create_table('pipeline_customized_templates', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('description', models.types.LongText(), nullable=False), + sa.Column('chunk_structure', sa.String(length=255), nullable=False), + sa.Column('icon', sa.JSON(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('yaml_content', models.types.LongText(), nullable=False), + sa.Column('install_count', sa.Integer(), nullable=False), + sa.Column('language', sa.String(length=255), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='pipeline_customized_template_pkey') + ) with op.batch_alter_table('pipeline_customized_templates', schema=None) as batch_op: batch_op.create_index('pipeline_customized_template_tenant_idx', ['tenant_id'], unique=False) - op.create_table('pipeline_recommended_plugins', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('plugin_id', sa.Text(), nullable=False), - sa.Column('provider_name', sa.Text(), nullable=False), - sa.Column('position', sa.Integer(), nullable=False), - sa.Column('active', sa.Boolean(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='pipeline_recommended_plugin_pkey') - ) - op.create_table('pipelines', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('description', sa.Text(), server_default=sa.text("''::character varying"), nullable=False), - sa.Column('workflow_id', models.types.StringUUID(), nullable=True), - sa.Column('is_public', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('is_published', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('created_by', models.types.StringUUID(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_by', models.types.StringUUID(), nullable=True), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='pipeline_pkey') - ) - op.create_table('workflow_draft_variable_files', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False, comment='The tenant to which the WorkflowDraftVariableFile belongs, referencing Tenant.id'), - sa.Column('app_id', models.types.StringUUID(), nullable=False, comment='The application to which the WorkflowDraftVariableFile belongs, referencing App.id'), - sa.Column('user_id', models.types.StringUUID(), nullable=False, comment='The owner to of the WorkflowDraftVariableFile, referencing Account.id'), - sa.Column('upload_file_id', models.types.StringUUID(), nullable=False, comment='Reference to UploadFile containing the large variable data'), - sa.Column('size', sa.BigInteger(), nullable=False, comment='Size of the original variable content in bytes'), - sa.Column('length', sa.Integer(), nullable=True, comment='Length of the original variable content. For array and array-like types, this represents the number of elements. For object types, it indicates the number of keys. For other types, the value is NULL.'), - sa.Column('value_type', sa.String(20), nullable=False), - sa.PrimaryKeyConstraint('id', name=op.f('workflow_draft_variable_files_pkey')) - ) - op.create_table('workflow_node_execution_offload', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('app_id', models.types.StringUUID(), nullable=False), - sa.Column('node_execution_id', models.types.StringUUID(), nullable=True), - sa.Column('type', sa.String(20), nullable=False), - sa.Column('file_id', models.types.StringUUID(), nullable=False), - sa.PrimaryKeyConstraint('id', name=op.f('workflow_node_execution_offload_pkey')), - sa.UniqueConstraint('node_execution_id', 'type', name=op.f('workflow_node_execution_offload_node_execution_id_key')) - ) - with op.batch_alter_table('datasets', schema=None) as batch_op: - batch_op.add_column(sa.Column('keyword_number', sa.Integer(), server_default=sa.text('10'), nullable=True)) - batch_op.add_column(sa.Column('icon_info', postgresql.JSONB(astext_type=sa.Text()), nullable=True)) - batch_op.add_column(sa.Column('runtime_mode', sa.String(length=255), server_default=sa.text("'general'::character varying"), nullable=True)) - batch_op.add_column(sa.Column('pipeline_id', models.types.StringUUID(), nullable=True)) - batch_op.add_column(sa.Column('chunk_structure', sa.String(length=255), nullable=True)) - batch_op.add_column(sa.Column('enable_api', sa.Boolean(), server_default=sa.text('true'), nullable=False)) + if _is_pg(conn): + op.create_table('pipeline_recommended_plugins', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('plugin_id', sa.Text(), nullable=False), + sa.Column('provider_name', sa.Text(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('active', sa.Boolean(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='pipeline_recommended_plugin_pkey') + ) + else: + op.create_table('pipeline_recommended_plugins', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('plugin_id', models.types.LongText(), nullable=False), + sa.Column('provider_name', models.types.LongText(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('active', sa.Boolean(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='pipeline_recommended_plugin_pkey') + ) + if _is_pg(conn): + op.create_table('pipelines', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('description', sa.Text(), server_default=sa.text("''::character varying"), nullable=False), + sa.Column('workflow_id', models.types.StringUUID(), nullable=True), + sa.Column('is_public', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('is_published', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='pipeline_pkey') + ) + else: + op.create_table('pipelines', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('description', models.types.LongText(), default=sa.text("''"), nullable=False), + sa.Column('workflow_id', models.types.StringUUID(), nullable=True), + sa.Column('is_public', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('is_published', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='pipeline_pkey') + ) + if _is_pg(conn): + op.create_table('workflow_draft_variable_files', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False, comment='The tenant to which the WorkflowDraftVariableFile belongs, referencing Tenant.id'), + sa.Column('app_id', models.types.StringUUID(), nullable=False, comment='The application to which the WorkflowDraftVariableFile belongs, referencing App.id'), + sa.Column('user_id', models.types.StringUUID(), nullable=False, comment='The owner to of the WorkflowDraftVariableFile, referencing Account.id'), + sa.Column('upload_file_id', models.types.StringUUID(), nullable=False, comment='Reference to UploadFile containing the large variable data'), + sa.Column('size', sa.BigInteger(), nullable=False, comment='Size of the original variable content in bytes'), + sa.Column('length', sa.Integer(), nullable=True, comment='Length of the original variable content. For array and array-like types, this represents the number of elements. For object types, it indicates the number of keys. For other types, the value is NULL.'), + sa.Column('value_type', sa.String(20), nullable=False), + sa.PrimaryKeyConstraint('id', name=op.f('workflow_draft_variable_files_pkey')) + ) + else: + op.create_table('workflow_draft_variable_files', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False, comment='The tenant to which the WorkflowDraftVariableFile belongs, referencing Tenant.id'), + sa.Column('app_id', models.types.StringUUID(), nullable=False, comment='The application to which the WorkflowDraftVariableFile belongs, referencing App.id'), + sa.Column('user_id', models.types.StringUUID(), nullable=False, comment='The owner to of the WorkflowDraftVariableFile, referencing Account.id'), + sa.Column('upload_file_id', models.types.StringUUID(), nullable=False, comment='Reference to UploadFile containing the large variable data'), + sa.Column('size', sa.BigInteger(), nullable=False, comment='Size of the original variable content in bytes'), + sa.Column('length', sa.Integer(), nullable=True, comment='Length of the original variable content. For array and array-like types, this represents the number of elements. For object types, it indicates the number of keys. For other types, the value is NULL.'), + sa.Column('value_type', sa.String(20), nullable=False), + sa.PrimaryKeyConstraint('id', name=op.f('workflow_draft_variable_files_pkey')) + ) + if _is_pg(conn): + op.create_table('workflow_node_execution_offload', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('node_execution_id', models.types.StringUUID(), nullable=True), + sa.Column('type', sa.String(20), nullable=False), + sa.Column('file_id', models.types.StringUUID(), nullable=False), + sa.PrimaryKeyConstraint('id', name=op.f('workflow_node_execution_offload_pkey')), + sa.UniqueConstraint('node_execution_id', 'type', name=op.f('workflow_node_execution_offload_node_execution_id_key')) + ) + else: + op.create_table('workflow_node_execution_offload', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('node_execution_id', models.types.StringUUID(), nullable=True), + sa.Column('type', sa.String(20), nullable=False), + sa.Column('file_id', models.types.StringUUID(), nullable=False), + sa.PrimaryKeyConstraint('id', name=op.f('workflow_node_execution_offload_pkey')), + sa.UniqueConstraint('node_execution_id', 'type', name=op.f('workflow_node_execution_offload_node_execution_id_key')) + ) + if _is_pg(conn): + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.add_column(sa.Column('keyword_number', sa.Integer(), server_default=sa.text('10'), nullable=True)) + batch_op.add_column(sa.Column('icon_info', postgresql.JSONB(astext_type=sa.Text()), nullable=True)) + batch_op.add_column(sa.Column('runtime_mode', sa.String(length=255), server_default=sa.text("'general'::character varying"), nullable=True)) + batch_op.add_column(sa.Column('pipeline_id', models.types.StringUUID(), nullable=True)) + batch_op.add_column(sa.Column('chunk_structure', sa.String(length=255), nullable=True)) + batch_op.add_column(sa.Column('enable_api', sa.Boolean(), server_default=sa.text('true'), nullable=False)) + else: + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.add_column(sa.Column('keyword_number', sa.Integer(), server_default=sa.text('10'), nullable=True)) + batch_op.add_column(sa.Column('icon_info', models.types.AdjustedJSON(astext_type=sa.Text()), nullable=True)) + batch_op.add_column(sa.Column('runtime_mode', sa.String(length=255), server_default=sa.text("'general'"), nullable=True)) + batch_op.add_column(sa.Column('pipeline_id', models.types.StringUUID(), nullable=True)) + batch_op.add_column(sa.Column('chunk_structure', sa.String(length=255), nullable=True)) + batch_op.add_column(sa.Column('enable_api', sa.Boolean(), server_default=sa.text('true'), nullable=False)) with op.batch_alter_table('workflow_draft_variables', schema=None) as batch_op: batch_op.add_column(sa.Column('file_id', models.types.StringUUID(), nullable=True, comment='Reference to WorkflowDraftVariableFile if variable is offloaded to external storage')) @@ -175,9 +342,12 @@ def upgrade(): comment='Indicates whether the current value is the default for a conversation variable. Always `FALSE` for other types of variables.',) ) batch_op.create_index('workflow_draft_variable_file_id_idx', ['file_id'], unique=False) - - with op.batch_alter_table('workflows', schema=None) as batch_op: - batch_op.add_column(sa.Column('rag_pipeline_variables', sa.Text(), server_default='{}', nullable=False)) + if _is_pg(conn): + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.add_column(sa.Column('rag_pipeline_variables', sa.Text(), server_default='{}', nullable=False)) + else: + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.add_column(sa.Column('rag_pipeline_variables', models.types.LongText(), default='{}', nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_10_21_1430-ae662b25d9bc_remove_builtin_template_user.py b/api/migrations/versions/2025_10_21_1430-ae662b25d9bc_remove_builtin_template_user.py index 086a02e7c3..0776ab0818 100644 --- a/api/migrations/versions/2025_10_21_1430-ae662b25d9bc_remove_builtin_template_user.py +++ b/api/migrations/versions/2025_10_21_1430-ae662b25d9bc_remove_builtin_template_user.py @@ -7,6 +7,10 @@ Create Date: 2025-10-21 14:30:28.566192 """ from alembic import op import models as models + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" import sqlalchemy as sa @@ -29,8 +33,15 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('pipeline_built_in_templates', schema=None) as batch_op: - batch_op.add_column(sa.Column('created_by', sa.UUID(), autoincrement=False, nullable=False)) - batch_op.add_column(sa.Column('updated_by', sa.UUID(), autoincrement=False, nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('pipeline_built_in_templates', schema=None) as batch_op: + batch_op.add_column(sa.Column('created_by', sa.UUID(), autoincrement=False, nullable=False)) + batch_op.add_column(sa.Column('updated_by', sa.UUID(), autoincrement=False, nullable=True)) + else: + with op.batch_alter_table('pipeline_built_in_templates', schema=None) as batch_op: + batch_op.add_column(sa.Column('created_by', models.types.StringUUID(), autoincrement=False, nullable=False)) + batch_op.add_column(sa.Column('updated_by', models.types.StringUUID(), autoincrement=False, nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_10_22_1611-03f8dcbc611e_add_workflowpause_model.py b/api/migrations/versions/2025_10_22_1611-03f8dcbc611e_add_workflowpause_model.py new file mode 100644 index 0000000000..627219cc4b --- /dev/null +++ b/api/migrations/versions/2025_10_22_1611-03f8dcbc611e_add_workflowpause_model.py @@ -0,0 +1,58 @@ +"""add WorkflowPause model + +Revision ID: 03f8dcbc611e +Revises: ae662b25d9bc +Create Date: 2025-10-22 16:11:31.805407 + +""" + +from alembic import op +import models as models +import sqlalchemy as sa +from libs.uuid_utils import uuidv7 + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + +# revision identifiers, used by Alembic. +revision = "03f8dcbc611e" +down_revision = "ae662b25d9bc" +branch_labels = None +depends_on = None + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + if _is_pg(conn): + op.create_table( + "workflow_pauses", + sa.Column("workflow_id", models.types.StringUUID(), nullable=False), + sa.Column("workflow_run_id", models.types.StringUUID(), nullable=False), + sa.Column("resumed_at", sa.DateTime(), nullable=True), + sa.Column("state_object_key", sa.String(length=255), nullable=False), + sa.Column("id", models.types.StringUUID(), server_default=sa.text("uuidv7()"), nullable=False), + sa.Column("created_at", sa.DateTime(), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), + sa.Column("updated_at", sa.DateTime(), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("workflow_pauses_pkey")), + sa.UniqueConstraint("workflow_run_id", name=op.f("workflow_pauses_workflow_run_id_key")), + ) + else: + op.create_table( + "workflow_pauses", + sa.Column("workflow_id", models.types.StringUUID(), nullable=False), + sa.Column("workflow_run_id", models.types.StringUUID(), nullable=False), + sa.Column("resumed_at", sa.DateTime(), nullable=True), + sa.Column("state_object_key", sa.String(length=255), nullable=False), + sa.Column("id", models.types.StringUUID(), nullable=False), + sa.Column("created_at", sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column("updated_at", sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("workflow_pauses_pkey")), + sa.UniqueConstraint("workflow_run_id", name=op.f("workflow_pauses_workflow_run_id_key")), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("workflow_pauses") + # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_10_27_1201-4558cfabe44e_add_workflow_trigger_logs.py b/api/migrations/versions/2025_10_27_1201-4558cfabe44e_add_workflow_trigger_logs.py deleted file mode 100644 index 5dbf7f947b..0000000000 --- a/api/migrations/versions/2025_10_27_1201-4558cfabe44e_add_workflow_trigger_logs.py +++ /dev/null @@ -1,67 +0,0 @@ -"""Add workflow trigger logs table - -Revision ID: 4558cfabe44e -Revises: ae662b25d9bc -Create Date: 2025-10-27 12:01:00.000000 - -""" -from alembic import op -import models as models -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '4558cfabe44e' -down_revision = 'ae662b25d9bc' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('workflow_trigger_logs', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('app_id', models.types.StringUUID(), nullable=False), - sa.Column('workflow_id', models.types.StringUUID(), nullable=False), - sa.Column('workflow_run_id', models.types.StringUUID(), nullable=True), - sa.Column('root_node_id', sa.String(length=255), nullable=True), - sa.Column('trigger_type', sa.String(length=50), nullable=False), - sa.Column('trigger_data', sa.Text(), nullable=False), - sa.Column('inputs', sa.Text(), nullable=False), - sa.Column('outputs', sa.Text(), nullable=True), - sa.Column('status', sa.String(length=50), nullable=False), - sa.Column('error', sa.Text(), nullable=True), - sa.Column('queue_name', sa.String(length=100), nullable=False), - sa.Column('celery_task_id', sa.String(length=255), nullable=True), - sa.Column('retry_count', sa.Integer(), nullable=False), - sa.Column('elapsed_time', sa.Float(), nullable=True), - sa.Column('total_tokens', sa.Integer(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('created_by_role', sa.String(length=255), nullable=False), - sa.Column('created_by', sa.String(length=255), nullable=False), - sa.Column('triggered_at', sa.DateTime(), nullable=True), - sa.Column('finished_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id', name='workflow_trigger_log_pkey') - ) - with op.batch_alter_table('workflow_trigger_logs', schema=None) as batch_op: - batch_op.create_index('workflow_trigger_log_created_at_idx', ['created_at'], unique=False) - batch_op.create_index('workflow_trigger_log_status_idx', ['status'], unique=False) - batch_op.create_index('workflow_trigger_log_tenant_app_idx', ['tenant_id', 'app_id'], unique=False) - batch_op.create_index('workflow_trigger_log_workflow_id_idx', ['workflow_id'], unique=False) - batch_op.create_index('workflow_trigger_log_workflow_run_idx', ['workflow_run_id'], unique=False) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('workflow_trigger_logs', schema=None) as batch_op: - batch_op.drop_index('workflow_trigger_log_workflow_run_idx') - batch_op.drop_index('workflow_trigger_log_workflow_id_idx') - batch_op.drop_index('workflow_trigger_log_tenant_app_idx') - batch_op.drop_index('workflow_trigger_log_status_idx') - batch_op.drop_index('workflow_trigger_log_created_at_idx') - - op.drop_table('workflow_trigger_logs') - # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_10_27_1202-5871f634954d_add_workflow_webhook_table.py b/api/migrations/versions/2025_10_27_1202-5871f634954d_add_workflow_webhook_table.py deleted file mode 100644 index 43466a0697..0000000000 --- a/api/migrations/versions/2025_10_27_1202-5871f634954d_add_workflow_webhook_table.py +++ /dev/null @@ -1,47 +0,0 @@ -"""Add workflow webhook table - -Revision ID: 5871f634954d -Revises: 4558cfabe44e -Create Date: 2025-10-27 12:02:00.000000 - -""" -from alembic import op -import models as models -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '5871f634954d' -down_revision = '4558cfabe44e' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('workflow_webhook_triggers', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('app_id', models.types.StringUUID(), nullable=False), - sa.Column('node_id', sa.String(length=64), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('webhook_id', sa.String(length=24), nullable=False), - sa.Column('created_by', models.types.StringUUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='workflow_webhook_trigger_pkey'), - sa.UniqueConstraint('app_id', 'node_id', name='uniq_node'), - sa.UniqueConstraint('webhook_id', name='uniq_webhook_id') - ) - with op.batch_alter_table('workflow_webhook_triggers', schema=None) as batch_op: - batch_op.create_index('workflow_webhook_trigger_tenant_idx', ['tenant_id'], unique=False) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('workflow_webhook_triggers', schema=None) as batch_op: - batch_op.drop_index('workflow_webhook_trigger_tenant_idx') - - op.drop_table('workflow_webhook_triggers') - # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_10_27_1203-9ee7d347f4c1_add_app_triggers_table.py b/api/migrations/versions/2025_10_27_1203-9ee7d347f4c1_add_app_triggers_table.py deleted file mode 100644 index fe4cd24ad6..0000000000 --- a/api/migrations/versions/2025_10_27_1203-9ee7d347f4c1_add_app_triggers_table.py +++ /dev/null @@ -1,47 +0,0 @@ -"""Add app triggers table - -Revision ID: 9ee7d347f4c1 -Revises: 5871f634954d -Create Date: 2025-10-27 12:03:00.000000 - -""" -from alembic import op -import models as models -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '9ee7d347f4c1' -down_revision = '5871f634954d' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('app_triggers', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('app_id', models.types.StringUUID(), nullable=False), - sa.Column('node_id', sa.String(length=64), nullable=False), - sa.Column('trigger_type', sa.String(length=50), nullable=False), - sa.Column('title', sa.String(length=255), nullable=False), - sa.Column('provider_name', sa.String(length=255), server_default='', nullable=True), - sa.Column('status', sa.String(length=50), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), nullable=False), - sa.PrimaryKeyConstraint('id', name='app_trigger_pkey') - ) - with op.batch_alter_table('app_triggers', schema=None) as batch_op: - batch_op.create_index('app_trigger_tenant_app_idx', ['tenant_id', 'app_id'], unique=False) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('app_triggers', schema=None) as batch_op: - batch_op.drop_index('app_trigger_tenant_app_idx') - - op.drop_table('app_triggers') - # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_10_27_1204-c19938f630b6_add_workflow_schedule_plan.py b/api/migrations/versions/2025_10_27_1204-c19938f630b6_add_workflow_schedule_plan.py deleted file mode 100644 index 85e7e0c735..0000000000 --- a/api/migrations/versions/2025_10_27_1204-c19938f630b6_add_workflow_schedule_plan.py +++ /dev/null @@ -1,47 +0,0 @@ -"""Add workflow schedule plan table - -Revision ID: c19938f630b6 -Revises: 9ee7d347f4c1 -Create Date: 2025-10-27 12:04:00.000000 - -""" -from alembic import op -import models as models -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'c19938f630b6' -down_revision = '9ee7d347f4c1' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('workflow_schedule_plans', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('app_id', models.types.StringUUID(), nullable=False), - sa.Column('node_id', sa.String(length=64), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('cron_expression', sa.String(length=255), nullable=False), - sa.Column('timezone', sa.String(length=64), nullable=False), - sa.Column('next_run_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='workflow_schedule_plan_pkey'), - sa.UniqueConstraint('app_id', 'node_id', name='uniq_app_node') - ) - with op.batch_alter_table('workflow_schedule_plans', schema=None) as batch_op: - batch_op.create_index('workflow_schedule_plan_next_idx', ['next_run_at'], unique=False) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('workflow_schedule_plans', schema=None) as batch_op: - batch_op.drop_index('workflow_schedule_plan_next_idx') - - op.drop_table('workflow_schedule_plans') - # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_10_27_1205-132392a2635f_plugin_trigger.py b/api/migrations/versions/2025_10_27_1205-132392a2635f_plugin_trigger.py deleted file mode 100644 index 426be1b071..0000000000 --- a/api/migrations/versions/2025_10_27_1205-132392a2635f_plugin_trigger.py +++ /dev/null @@ -1,102 +0,0 @@ -"""plugin_trigger - -Revision ID: 132392a2635f -Revises: c19938f630b6 -Create Date: 2025-10-27 12:05:00.000000 - -""" -from alembic import op -import models as models -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '132392a2635f' -down_revision = 'c19938f630b6' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('trigger_oauth_system_clients', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('plugin_id', sa.String(length=512), nullable=False), - sa.Column('provider', sa.String(length=255), nullable=False), - sa.Column('encrypted_oauth_params', sa.Text(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='trigger_oauth_system_client_pkey'), - sa.UniqueConstraint('plugin_id', 'provider', name='trigger_oauth_system_client_plugin_id_provider_idx') - ) - op.create_table('trigger_oauth_tenant_clients', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('plugin_id', sa.String(length=512), nullable=False), - sa.Column('provider', sa.String(length=255), nullable=False), - sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), - sa.Column('encrypted_oauth_params', sa.Text(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='trigger_oauth_tenant_client_pkey'), - sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', name='unique_trigger_oauth_tenant_client') - ) - op.create_table('trigger_subscriptions', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False, comment='Subscription instance name'), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('user_id', models.types.StringUUID(), nullable=False), - sa.Column('provider_id', sa.String(length=255), nullable=False, comment='Provider identifier (e.g., plugin_id/provider_name)'), - sa.Column('endpoint_id', sa.String(length=255), nullable=False, comment='Subscription endpoint'), - sa.Column('parameters', sa.JSON(), nullable=False, comment='Subscription parameters JSON'), - sa.Column('properties', sa.JSON(), nullable=False, comment='Subscription properties JSON'), - sa.Column('credentials', sa.JSON(), nullable=False, comment='Subscription credentials JSON'), - sa.Column('credential_type', sa.String(length=50), nullable=False, comment='oauth or api_key'), - sa.Column('credential_expires_at', sa.Integer(), nullable=False, comment='OAuth token expiration timestamp, -1 for never'), - sa.Column('expires_at', sa.Integer(), nullable=False, comment='Subscription instance expiration timestamp, -1 for never'), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='trigger_provider_pkey'), - sa.UniqueConstraint('tenant_id', 'provider_id', 'name', name='unique_trigger_provider') - ) - with op.batch_alter_table('trigger_subscriptions', schema=None) as batch_op: - batch_op.create_index('idx_trigger_providers_endpoint', ['endpoint_id'], unique=True) - batch_op.create_index('idx_trigger_providers_tenant_endpoint', ['tenant_id', 'endpoint_id'], unique=False) - batch_op.create_index('idx_trigger_providers_tenant_provider', ['tenant_id', 'provider_id'], unique=False) - - # Create workflow_plugin_triggers table with final schema (merged from all 4 migrations) - op.create_table('workflow_plugin_triggers', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), - sa.Column('app_id', models.types.StringUUID(), nullable=False), - sa.Column('node_id', sa.String(length=64), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('provider_id', sa.String(length=512), nullable=False), - sa.Column('subscription_id', sa.String(length=255), nullable=False), - sa.Column('event_name', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='workflow_plugin_trigger_pkey'), - sa.UniqueConstraint('app_id', 'node_id', name='uniq_app_node_subscription') - ) - with op.batch_alter_table('workflow_plugin_triggers', schema=None) as batch_op: - batch_op.create_index('workflow_plugin_trigger_tenant_subscription_idx', ['tenant_id', 'subscription_id', 'event_name'], unique=False) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('workflow_plugin_triggers', schema=None) as batch_op: - batch_op.drop_index('workflow_plugin_trigger_tenant_subscription_idx') - - op.drop_table('workflow_plugin_triggers') - with op.batch_alter_table('trigger_subscriptions', schema=None) as batch_op: - batch_op.drop_index('idx_trigger_providers_tenant_provider') - batch_op.drop_index('idx_trigger_providers_tenant_endpoint') - batch_op.drop_index('idx_trigger_providers_endpoint') - - op.drop_table('trigger_subscriptions') - op.drop_table('trigger_oauth_tenant_clients') - op.drop_table('trigger_oauth_system_clients') - - # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_10_30_1518-669ffd70119c_introduce_trigger.py b/api/migrations/versions/2025_10_30_1518-669ffd70119c_introduce_trigger.py new file mode 100644 index 0000000000..9641a15c89 --- /dev/null +++ b/api/migrations/versions/2025_10_30_1518-669ffd70119c_introduce_trigger.py @@ -0,0 +1,380 @@ +"""introduce_trigger + +Revision ID: 669ffd70119c +Revises: 03f8dcbc611e +Create Date: 2025-10-30 15:18:49.549156 + +""" +from alembic import op +import models as models +import sqlalchemy as sa +from libs.uuid_utils import uuidv7 + +from models.enums import AppTriggerStatus, AppTriggerType + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + +# revision identifiers, used by Alembic. +revision = '669ffd70119c' +down_revision = '03f8dcbc611e' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('app_triggers', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('node_id', sa.String(length=64), nullable=False), + sa.Column('trigger_type', models.types.EnumText(AppTriggerType, length=50), nullable=False), + sa.Column('title', sa.String(length=255), nullable=False), + sa.Column('provider_name', sa.String(length=255), server_default='', nullable=True), + sa.Column('status', models.types.EnumText(AppTriggerStatus, length=50), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_trigger_pkey') + ) + else: + op.create_table('app_triggers', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('node_id', sa.String(length=64), nullable=False), + sa.Column('trigger_type', models.types.EnumText(AppTriggerType, length=50), nullable=False), + sa.Column('title', sa.String(length=255), nullable=False), + sa.Column('provider_name', sa.String(length=255), server_default='', nullable=True), + sa.Column('status', models.types.EnumText(AppTriggerStatus, length=50), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_trigger_pkey') + ) + with op.batch_alter_table('app_triggers', schema=None) as batch_op: + batch_op.create_index('app_trigger_tenant_app_idx', ['tenant_id', 'app_id'], unique=False) + + if _is_pg(conn): + op.create_table('trigger_oauth_system_clients', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('plugin_id', sa.String(length=512), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('encrypted_oauth_params', sa.Text(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='trigger_oauth_system_client_pkey'), + sa.UniqueConstraint('plugin_id', 'provider', name='trigger_oauth_system_client_plugin_id_provider_idx') + ) + else: + op.create_table('trigger_oauth_system_clients', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('plugin_id', sa.String(length=512), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('encrypted_oauth_params', models.types.LongText(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='trigger_oauth_system_client_pkey'), + sa.UniqueConstraint('plugin_id', 'provider', name='trigger_oauth_system_client_plugin_id_provider_idx') + ) + if _is_pg(conn): + op.create_table('trigger_oauth_tenant_clients', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('plugin_id', sa.String(length=255), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('encrypted_oauth_params', sa.Text(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='trigger_oauth_tenant_client_pkey'), + sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', name='unique_trigger_oauth_tenant_client') + ) + else: + op.create_table('trigger_oauth_tenant_clients', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('plugin_id', sa.String(length=255), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('encrypted_oauth_params', models.types.LongText(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='trigger_oauth_tenant_client_pkey'), + sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', name='unique_trigger_oauth_tenant_client') + ) + if _is_pg(conn): + op.create_table('trigger_subscriptions', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False, comment='Subscription instance name'), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('user_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_id', sa.String(length=255), nullable=False, comment='Provider identifier (e.g., plugin_id/provider_name)'), + sa.Column('endpoint_id', sa.String(length=255), nullable=False, comment='Subscription endpoint'), + sa.Column('parameters', sa.JSON(), nullable=False, comment='Subscription parameters JSON'), + sa.Column('properties', sa.JSON(), nullable=False, comment='Subscription properties JSON'), + sa.Column('credentials', sa.JSON(), nullable=False, comment='Subscription credentials JSON'), + sa.Column('credential_type', sa.String(length=50), nullable=False, comment='oauth or api_key'), + sa.Column('credential_expires_at', sa.Integer(), nullable=False, comment='OAuth token expiration timestamp, -1 for never'), + sa.Column('expires_at', sa.Integer(), nullable=False, comment='Subscription instance expiration timestamp, -1 for never'), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='trigger_provider_pkey'), + sa.UniqueConstraint('tenant_id', 'provider_id', 'name', name='unique_trigger_provider') + ) + else: + op.create_table('trigger_subscriptions', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False, comment='Subscription instance name'), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('user_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_id', sa.String(length=255), nullable=False, comment='Provider identifier (e.g., plugin_id/provider_name)'), + sa.Column('endpoint_id', sa.String(length=255), nullable=False, comment='Subscription endpoint'), + sa.Column('parameters', sa.JSON(), nullable=False, comment='Subscription parameters JSON'), + sa.Column('properties', sa.JSON(), nullable=False, comment='Subscription properties JSON'), + sa.Column('credentials', sa.JSON(), nullable=False, comment='Subscription credentials JSON'), + sa.Column('credential_type', sa.String(length=50), nullable=False, comment='oauth or api_key'), + sa.Column('credential_expires_at', sa.Integer(), nullable=False, comment='OAuth token expiration timestamp, -1 for never'), + sa.Column('expires_at', sa.Integer(), nullable=False, comment='Subscription instance expiration timestamp, -1 for never'), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='trigger_provider_pkey'), + sa.UniqueConstraint('tenant_id', 'provider_id', 'name', name='unique_trigger_provider') + ) + with op.batch_alter_table('trigger_subscriptions', schema=None) as batch_op: + batch_op.create_index('idx_trigger_providers_endpoint', ['endpoint_id'], unique=True) + batch_op.create_index('idx_trigger_providers_tenant_endpoint', ['tenant_id', 'endpoint_id'], unique=False) + batch_op.create_index('idx_trigger_providers_tenant_provider', ['tenant_id', 'provider_id'], unique=False) + + if _is_pg(conn): + op.create_table('workflow_plugin_triggers', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('node_id', sa.String(length=64), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_id', sa.String(length=512), nullable=False), + sa.Column('event_name', sa.String(length=255), nullable=False), + sa.Column('subscription_id', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='workflow_plugin_trigger_pkey'), + sa.UniqueConstraint('app_id', 'node_id', name='uniq_app_node_subscription') + ) + else: + op.create_table('workflow_plugin_triggers', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('node_id', sa.String(length=64), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_id', sa.String(length=512), nullable=False), + sa.Column('event_name', sa.String(length=255), nullable=False), + sa.Column('subscription_id', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='workflow_plugin_trigger_pkey'), + sa.UniqueConstraint('app_id', 'node_id', name='uniq_app_node_subscription') + ) + with op.batch_alter_table('workflow_plugin_triggers', schema=None) as batch_op: + batch_op.create_index('workflow_plugin_trigger_tenant_subscription_idx', ['tenant_id', 'subscription_id', 'event_name'], unique=False) + + if _is_pg(conn): + op.create_table('workflow_schedule_plans', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('node_id', sa.String(length=64), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('cron_expression', sa.String(length=255), nullable=False), + sa.Column('timezone', sa.String(length=64), nullable=False), + sa.Column('next_run_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='workflow_schedule_plan_pkey'), + sa.UniqueConstraint('app_id', 'node_id', name='uniq_app_node') + ) + else: + op.create_table('workflow_schedule_plans', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('node_id', sa.String(length=64), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('cron_expression', sa.String(length=255), nullable=False), + sa.Column('timezone', sa.String(length=64), nullable=False), + sa.Column('next_run_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='workflow_schedule_plan_pkey'), + sa.UniqueConstraint('app_id', 'node_id', name='uniq_app_node') + ) + with op.batch_alter_table('workflow_schedule_plans', schema=None) as batch_op: + batch_op.create_index('workflow_schedule_plan_next_idx', ['next_run_at'], unique=False) + + if _is_pg(conn): + op.create_table('workflow_trigger_logs', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('workflow_id', models.types.StringUUID(), nullable=False), + sa.Column('workflow_run_id', models.types.StringUUID(), nullable=True), + sa.Column('root_node_id', sa.String(length=255), nullable=True), + sa.Column('trigger_metadata', sa.Text(), nullable=False), + sa.Column('trigger_type', models.types.EnumText(AppTriggerType, length=50), nullable=False), + sa.Column('trigger_data', sa.Text(), nullable=False), + sa.Column('inputs', sa.Text(), nullable=False), + sa.Column('outputs', sa.Text(), nullable=True), + sa.Column('status', models.types.EnumText(AppTriggerStatus, length=50), nullable=False), + sa.Column('error', sa.Text(), nullable=True), + sa.Column('queue_name', sa.String(length=100), nullable=False), + sa.Column('celery_task_id', sa.String(length=255), nullable=True), + sa.Column('retry_count', sa.Integer(), nullable=False), + sa.Column('elapsed_time', sa.Float(), nullable=True), + sa.Column('total_tokens', sa.Integer(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('created_by_role', sa.String(length=255), nullable=False), + sa.Column('created_by', sa.String(length=255), nullable=False), + sa.Column('triggered_at', sa.DateTime(), nullable=True), + sa.Column('finished_at', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id', name='workflow_trigger_log_pkey') + ) + else: + op.create_table('workflow_trigger_logs', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('workflow_id', models.types.StringUUID(), nullable=False), + sa.Column('workflow_run_id', models.types.StringUUID(), nullable=True), + sa.Column('root_node_id', sa.String(length=255), nullable=True), + sa.Column('trigger_metadata', models.types.LongText(), nullable=False), + sa.Column('trigger_type', models.types.EnumText(AppTriggerType, length=50), nullable=False), + sa.Column('trigger_data', models.types.LongText(), nullable=False), + sa.Column('inputs', models.types.LongText(), nullable=False), + sa.Column('outputs', models.types.LongText(), nullable=True), + sa.Column('status', models.types.EnumText(AppTriggerStatus, length=50), nullable=False), + sa.Column('error', models.types.LongText(), nullable=True), + sa.Column('queue_name', sa.String(length=100), nullable=False), + sa.Column('celery_task_id', sa.String(length=255), nullable=True), + sa.Column('retry_count', sa.Integer(), nullable=False), + sa.Column('elapsed_time', sa.Float(), nullable=True), + sa.Column('total_tokens', sa.Integer(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('created_by_role', sa.String(length=255), nullable=False), + sa.Column('created_by', sa.String(length=255), nullable=False), + sa.Column('triggered_at', sa.DateTime(), nullable=True), + sa.Column('finished_at', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id', name='workflow_trigger_log_pkey') + ) + with op.batch_alter_table('workflow_trigger_logs', schema=None) as batch_op: + batch_op.create_index('workflow_trigger_log_created_at_idx', ['created_at'], unique=False) + batch_op.create_index('workflow_trigger_log_status_idx', ['status'], unique=False) + batch_op.create_index('workflow_trigger_log_tenant_app_idx', ['tenant_id', 'app_id'], unique=False) + batch_op.create_index('workflow_trigger_log_workflow_id_idx', ['workflow_id'], unique=False) + batch_op.create_index('workflow_trigger_log_workflow_run_idx', ['workflow_run_id'], unique=False) + + if _is_pg(conn): + op.create_table('workflow_webhook_triggers', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('node_id', sa.String(length=64), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('webhook_id', sa.String(length=24), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='workflow_webhook_trigger_pkey'), + sa.UniqueConstraint('app_id', 'node_id', name='uniq_node'), + sa.UniqueConstraint('webhook_id', name='uniq_webhook_id') + ) + else: + op.create_table('workflow_webhook_triggers', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('node_id', sa.String(length=64), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('webhook_id', sa.String(length=24), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='workflow_webhook_trigger_pkey'), + sa.UniqueConstraint('app_id', 'node_id', name='uniq_node'), + sa.UniqueConstraint('webhook_id', name='uniq_webhook_id') + ) + with op.batch_alter_table('workflow_webhook_triggers', schema=None) as batch_op: + batch_op.create_index('workflow_webhook_trigger_tenant_idx', ['tenant_id'], unique=False) + + with op.batch_alter_table('celery_taskmeta', schema=None) as batch_op: + batch_op.alter_column('task_id', + existing_type=sa.VARCHAR(length=155), + nullable=False) + batch_op.alter_column('status', + existing_type=sa.VARCHAR(length=50), + nullable=False) + + with op.batch_alter_table('celery_tasksetmeta', schema=None) as batch_op: + batch_op.alter_column('taskset_id', + existing_type=sa.VARCHAR(length=155), + nullable=False) + + with op.batch_alter_table('providers', schema=None) as batch_op: + batch_op.drop_column('credential_status') + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('credential_status', sa.VARCHAR(length=20), server_default=sa.text("'active'::character varying"), autoincrement=False, nullable=True)) + else: + with op.batch_alter_table('providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('credential_status', sa.VARCHAR(length=20), server_default=sa.text("'active'"), autoincrement=False, nullable=True)) + + with op.batch_alter_table('celery_tasksetmeta', schema=None) as batch_op: + batch_op.alter_column('taskset_id', + existing_type=sa.VARCHAR(length=155), + nullable=True) + + with op.batch_alter_table('celery_taskmeta', schema=None) as batch_op: + batch_op.alter_column('status', + existing_type=sa.VARCHAR(length=50), + nullable=True) + batch_op.alter_column('task_id', + existing_type=sa.VARCHAR(length=155), + nullable=True) + + with op.batch_alter_table('workflow_webhook_triggers', schema=None) as batch_op: + batch_op.drop_index('workflow_webhook_trigger_tenant_idx') + + op.drop_table('workflow_webhook_triggers') + with op.batch_alter_table('workflow_trigger_logs', schema=None) as batch_op: + batch_op.drop_index('workflow_trigger_log_workflow_run_idx') + batch_op.drop_index('workflow_trigger_log_workflow_id_idx') + batch_op.drop_index('workflow_trigger_log_tenant_app_idx') + batch_op.drop_index('workflow_trigger_log_status_idx') + batch_op.drop_index('workflow_trigger_log_created_at_idx') + + op.drop_table('workflow_trigger_logs') + with op.batch_alter_table('workflow_schedule_plans', schema=None) as batch_op: + batch_op.drop_index('workflow_schedule_plan_next_idx') + + op.drop_table('workflow_schedule_plans') + with op.batch_alter_table('workflow_plugin_triggers', schema=None) as batch_op: + batch_op.drop_index('workflow_plugin_trigger_tenant_subscription_idx') + + op.drop_table('workflow_plugin_triggers') + with op.batch_alter_table('trigger_subscriptions', schema=None) as batch_op: + batch_op.drop_index('idx_trigger_providers_tenant_provider') + batch_op.drop_index('idx_trigger_providers_tenant_endpoint') + batch_op.drop_index('idx_trigger_providers_endpoint') + + op.drop_table('trigger_subscriptions') + op.drop_table('trigger_oauth_tenant_clients') + op.drop_table('trigger_oauth_system_clients') + with op.batch_alter_table('app_triggers', schema=None) as batch_op: + batch_op.drop_index('app_trigger_tenant_app_idx') + + op.drop_table('app_triggers') + # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_11_15_2102-09cfdda155d1_mysql_adaptation.py b/api/migrations/versions/2025_11_15_2102-09cfdda155d1_mysql_adaptation.py new file mode 100644 index 0000000000..a3f6c3cb19 --- /dev/null +++ b/api/migrations/versions/2025_11_15_2102-09cfdda155d1_mysql_adaptation.py @@ -0,0 +1,131 @@ +"""empty message + +Revision ID: 09cfdda155d1 +Revises: 669ffd70119c +Create Date: 2025-11-15 21:02:32.472885 + +""" +from alembic import op +import models as models +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql, mysql + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + +# revision identifiers, used by Alembic. +revision = '09cfdda155d1' +down_revision = '669ffd70119c' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + if _is_pg(conn): + with op.batch_alter_table('datasource_providers', schema=None) as batch_op: + batch_op.alter_column('provider', + existing_type=sa.VARCHAR(length=255), + type_=sa.String(length=128), + existing_nullable=False) + + with op.batch_alter_table('external_knowledge_bindings', schema=None) as batch_op: + batch_op.alter_column('external_knowledge_id', + existing_type=sa.TEXT(), + type_=sa.String(length=512), + existing_nullable=False) + + with op.batch_alter_table('tenant_plugin_auto_upgrade_strategies', schema=None) as batch_op: + batch_op.alter_column('exclude_plugins', + existing_type=postgresql.ARRAY(sa.VARCHAR(length=255)), + type_=sa.JSON(), + existing_nullable=False, + postgresql_using='to_jsonb(exclude_plugins)::json') + + batch_op.alter_column('include_plugins', + existing_type=postgresql.ARRAY(sa.VARCHAR(length=255)), + type_=sa.JSON(), + existing_nullable=False, + postgresql_using='to_jsonb(include_plugins)::json') + + with op.batch_alter_table('tool_oauth_tenant_clients', schema=None) as batch_op: + batch_op.alter_column('plugin_id', + existing_type=sa.VARCHAR(length=512), + type_=sa.String(length=255), + existing_nullable=False) + + with op.batch_alter_table('trigger_oauth_system_clients', schema=None) as batch_op: + batch_op.alter_column('plugin_id', + existing_type=sa.VARCHAR(length=512), + type_=sa.String(length=255), + existing_nullable=False) + else: + with op.batch_alter_table('trigger_oauth_system_clients', schema=None) as batch_op: + batch_op.alter_column('plugin_id', + existing_type=mysql.VARCHAR(length=512), + type_=sa.String(length=255), + existing_nullable=False) + + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.alter_column('updated_at', + existing_type=mysql.TIMESTAMP(), + type_=sa.DateTime(), + existing_nullable=False) + + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + if _is_pg(conn): + with op.batch_alter_table('trigger_oauth_system_clients', schema=None) as batch_op: + batch_op.alter_column('plugin_id', + existing_type=sa.String(length=255), + type_=sa.VARCHAR(length=512), + existing_nullable=False) + + with op.batch_alter_table('tool_oauth_tenant_clients', schema=None) as batch_op: + batch_op.alter_column('plugin_id', + existing_type=sa.String(length=255), + type_=sa.VARCHAR(length=512), + existing_nullable=False) + + with op.batch_alter_table('tenant_plugin_auto_upgrade_strategies', schema=None) as batch_op: + batch_op.alter_column('include_plugins', + existing_type=sa.JSON(), + type_=postgresql.ARRAY(sa.VARCHAR(length=255)), + existing_nullable=False) + batch_op.alter_column('exclude_plugins', + existing_type=sa.JSON(), + type_=postgresql.ARRAY(sa.VARCHAR(length=255)), + existing_nullable=False) + + with op.batch_alter_table('external_knowledge_bindings', schema=None) as batch_op: + batch_op.alter_column('external_knowledge_id', + existing_type=sa.String(length=512), + type_=sa.TEXT(), + existing_nullable=False) + + with op.batch_alter_table('datasource_providers', schema=None) as batch_op: + batch_op.alter_column('provider', + existing_type=sa.String(length=128), + type_=sa.VARCHAR(length=255), + existing_nullable=False) + + else: + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.alter_column('updated_at', + existing_type=sa.DateTime(), + type_=mysql.TIMESTAMP(), + existing_nullable=False) + + with op.batch_alter_table('trigger_oauth_system_clients', schema=None) as batch_op: + batch_op.alter_column('plugin_id', + existing_type=sa.String(length=255), + type_=mysql.VARCHAR(length=512), + existing_nullable=False) + + # ### end Alembic commands ### diff --git a/api/migrations/versions/23db93619b9d_add_message_files_into_agent_thought.py b/api/migrations/versions/23db93619b9d_add_message_files_into_agent_thought.py index f3eef4681e..fae506906b 100644 --- a/api/migrations/versions/23db93619b9d_add_message_files_into_agent_thought.py +++ b/api/migrations/versions/23db93619b9d_add_message_files_into_agent_thought.py @@ -8,6 +8,12 @@ Create Date: 2024-01-18 08:46:37.302657 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '23db93619b9d' down_revision = '8ae9bc661daa' @@ -17,8 +23,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: - batch_op.add_column(sa.Column('message_files', sa.Text(), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.add_column(sa.Column('message_files', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.add_column(sa.Column('message_files', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/246ba09cbbdb_add_app_anntation_setting.py b/api/migrations/versions/246ba09cbbdb_add_app_anntation_setting.py index 9816e92dd1..2676ef0b94 100644 --- a/api/migrations/versions/246ba09cbbdb_add_app_anntation_setting.py +++ b/api/migrations/versions/246ba09cbbdb_add_app_anntation_setting.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '246ba09cbbdb' down_revision = '714aafe25d39' @@ -18,17 +24,33 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('app_annotation_settings', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('score_threshold', sa.Float(), server_default=sa.text('0'), nullable=False), - sa.Column('collection_binding_id', postgresql.UUID(), nullable=False), - sa.Column('created_user_id', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_user_id', postgresql.UUID(), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='app_annotation_settings_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('app_annotation_settings', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('score_threshold', sa.Float(), server_default=sa.text('0'), nullable=False), + sa.Column('collection_binding_id', postgresql.UUID(), nullable=False), + sa.Column('created_user_id', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_user_id', postgresql.UUID(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_annotation_settings_pkey') + ) + else: + op.create_table('app_annotation_settings', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('score_threshold', sa.Float(), server_default=sa.text('0'), nullable=False), + sa.Column('collection_binding_id', models.types.StringUUID(), nullable=False), + sa.Column('created_user_id', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_user_id', models.types.StringUUID(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_annotation_settings_pkey') + ) + with op.batch_alter_table('app_annotation_settings', schema=None) as batch_op: batch_op.create_index('app_annotation_settings_app_idx', ['app_id'], unique=False) @@ -40,8 +62,14 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('annotation_reply', sa.TEXT(), autoincrement=False, nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('annotation_reply', sa.TEXT(), autoincrement=False, nullable=True)) + else: + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('annotation_reply', models.types.LongText(), autoincrement=False, nullable=True)) with op.batch_alter_table('app_annotation_settings', schema=None) as batch_op: batch_op.drop_index('app_annotation_settings_app_idx') diff --git a/api/migrations/versions/2a3aebbbf4bb_add_app_tracing.py b/api/migrations/versions/2a3aebbbf4bb_add_app_tracing.py index 99b7010612..3362a3a09f 100644 --- a/api/migrations/versions/2a3aebbbf4bb_add_app_tracing.py +++ b/api/migrations/versions/2a3aebbbf4bb_add_app_tracing.py @@ -10,6 +10,10 @@ from alembic import op import models as models + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '2a3aebbbf4bb' down_revision = 'c031d46af369' @@ -19,8 +23,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('apps', schema=None) as batch_op: - batch_op.add_column(sa.Column('tracing', sa.Text(), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('apps', schema=None) as batch_op: + batch_op.add_column(sa.Column('tracing', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('apps', schema=None) as batch_op: + batch_op.add_column(sa.Column('tracing', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/2e9819ca5b28_add_tenant_id_in_api_token.py b/api/migrations/versions/2e9819ca5b28_add_tenant_id_in_api_token.py index b06a3530b8..40bd727f66 100644 --- a/api/migrations/versions/2e9819ca5b28_add_tenant_id_in_api_token.py +++ b/api/migrations/versions/2e9819ca5b28_add_tenant_id_in_api_token.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '2e9819ca5b28' down_revision = 'ab23c11305d4' @@ -18,19 +24,35 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('api_tokens', schema=None) as batch_op: - batch_op.add_column(sa.Column('tenant_id', postgresql.UUID(), nullable=True)) - batch_op.create_index('api_token_tenant_idx', ['tenant_id', 'type'], unique=False) - batch_op.drop_column('dataset_id') + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('api_tokens', schema=None) as batch_op: + batch_op.add_column(sa.Column('tenant_id', postgresql.UUID(), nullable=True)) + batch_op.create_index('api_token_tenant_idx', ['tenant_id', 'type'], unique=False) + batch_op.drop_column('dataset_id') + else: + with op.batch_alter_table('api_tokens', schema=None) as batch_op: + batch_op.add_column(sa.Column('tenant_id', models.types.StringUUID(), nullable=True)) + batch_op.create_index('api_token_tenant_idx', ['tenant_id', 'type'], unique=False) + batch_op.drop_column('dataset_id') # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('api_tokens', schema=None) as batch_op: - batch_op.add_column(sa.Column('dataset_id', postgresql.UUID(), autoincrement=False, nullable=True)) - batch_op.drop_index('api_token_tenant_idx') - batch_op.drop_column('tenant_id') + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('api_tokens', schema=None) as batch_op: + batch_op.add_column(sa.Column('dataset_id', postgresql.UUID(), autoincrement=False, nullable=True)) + batch_op.drop_index('api_token_tenant_idx') + batch_op.drop_column('tenant_id') + else: + with op.batch_alter_table('api_tokens', schema=None) as batch_op: + batch_op.add_column(sa.Column('dataset_id', models.types.StringUUID(), autoincrement=False, nullable=True)) + batch_op.drop_index('api_token_tenant_idx') + batch_op.drop_column('tenant_id') # ### end Alembic commands ### diff --git a/api/migrations/versions/380c6aa5a70d_add_tool_labels_to_agent_thought.py b/api/migrations/versions/380c6aa5a70d_add_tool_labels_to_agent_thought.py index 6c13818463..42e403f8d1 100644 --- a/api/migrations/versions/380c6aa5a70d_add_tool_labels_to_agent_thought.py +++ b/api/migrations/versions/380c6aa5a70d_add_tool_labels_to_agent_thought.py @@ -8,6 +8,12 @@ Create Date: 2024-01-24 10:58:15.644445 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '380c6aa5a70d' down_revision = 'dfb3b7f477da' @@ -17,8 +23,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: - batch_op.add_column(sa.Column('tool_labels_str', sa.Text(), server_default=sa.text("'{}'::text"), nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.add_column(sa.Column('tool_labels_str', sa.Text(), server_default=sa.text("'{}'::text"), nullable=False)) + else: + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.add_column(sa.Column('tool_labels_str', models.types.LongText(), default=sa.text("'{}'"), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/3b18fea55204_add_tool_label_bings.py b/api/migrations/versions/3b18fea55204_add_tool_label_bings.py index bf54c247ea..ffba6c9f36 100644 --- a/api/migrations/versions/3b18fea55204_add_tool_label_bings.py +++ b/api/migrations/versions/3b18fea55204_add_tool_label_bings.py @@ -10,6 +10,10 @@ from alembic import op import models.types + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '3b18fea55204' down_revision = '7bdef072e63a' @@ -19,13 +23,24 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tool_label_bindings', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tool_id', sa.String(length=64), nullable=False), - sa.Column('tool_type', sa.String(length=40), nullable=False), - sa.Column('label_name', sa.String(length=40), nullable=False), - sa.PrimaryKeyConstraint('id', name='tool_label_bind_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('tool_label_bindings', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tool_id', sa.String(length=64), nullable=False), + sa.Column('tool_type', sa.String(length=40), nullable=False), + sa.Column('label_name', sa.String(length=40), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_label_bind_pkey') + ) + else: + op.create_table('tool_label_bindings', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tool_id', sa.String(length=64), nullable=False), + sa.Column('tool_type', sa.String(length=40), nullable=False), + sa.Column('label_name', sa.String(length=40), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_label_bind_pkey') + ) with op.batch_alter_table('tool_workflow_providers', schema=None) as batch_op: batch_op.add_column(sa.Column('privacy_policy', sa.String(length=255), server_default='', nullable=True)) diff --git a/api/migrations/versions/3c7cac9521c6_add_tags_and_binding_table.py b/api/migrations/versions/3c7cac9521c6_add_tags_and_binding_table.py index 5f11880683..6b2263b0b7 100644 --- a/api/migrations/versions/3c7cac9521c6_add_tags_and_binding_table.py +++ b/api/migrations/versions/3c7cac9521c6_add_tags_and_binding_table.py @@ -6,9 +6,15 @@ Create Date: 2024-04-11 06:17:34.278594 """ import sqlalchemy as sa -from alembic import op +from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '3c7cac9521c6' down_revision = 'c3311b089690' @@ -18,28 +24,54 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tag_bindings', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=True), - sa.Column('tag_id', postgresql.UUID(), nullable=True), - sa.Column('target_id', postgresql.UUID(), nullable=True), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tag_binding_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('tag_bindings', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=True), + sa.Column('tag_id', postgresql.UUID(), nullable=True), + sa.Column('target_id', postgresql.UUID(), nullable=True), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tag_binding_pkey') + ) + else: + op.create_table('tag_bindings', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=True), + sa.Column('tag_id', models.types.StringUUID(), nullable=True), + sa.Column('target_id', models.types.StringUUID(), nullable=True), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tag_binding_pkey') + ) + with op.batch_alter_table('tag_bindings', schema=None) as batch_op: batch_op.create_index('tag_bind_tag_id_idx', ['tag_id'], unique=False) batch_op.create_index('tag_bind_target_id_idx', ['target_id'], unique=False) - op.create_table('tags', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=True), - sa.Column('type', sa.String(length=16), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tag_pkey') - ) + if _is_pg(conn): + op.create_table('tags', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=True), + sa.Column('type', sa.String(length=16), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tag_pkey') + ) + else: + op.create_table('tags', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=True), + sa.Column('type', sa.String(length=16), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tag_pkey') + ) + with op.batch_alter_table('tags', schema=None) as batch_op: batch_op.create_index('tag_name_idx', ['name'], unique=False) batch_op.create_index('tag_type_idx', ['type'], unique=False) diff --git a/api/migrations/versions/3ef9b2b6bee6_add_assistant_app.py b/api/migrations/versions/3ef9b2b6bee6_add_assistant_app.py index 4fbc570303..553d1d8743 100644 --- a/api/migrations/versions/3ef9b2b6bee6_add_assistant_app.py +++ b/api/migrations/versions/3ef9b2b6bee6_add_assistant_app.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '3ef9b2b6bee6' down_revision = '89c7899ca936' @@ -18,44 +24,96 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tool_api_providers', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('name', sa.String(length=40), nullable=False), - sa.Column('schema', sa.Text(), nullable=False), - sa.Column('schema_type_str', sa.String(length=40), nullable=False), - sa.Column('user_id', postgresql.UUID(), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('description_str', sa.Text(), nullable=False), - sa.Column('tools_str', sa.Text(), nullable=False), - sa.PrimaryKeyConstraint('id', name='tool_api_provider_pkey') - ) - op.create_table('tool_builtin_providers', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=True), - sa.Column('user_id', postgresql.UUID(), nullable=False), - sa.Column('provider', sa.String(length=40), nullable=False), - sa.Column('encrypted_credentials', sa.Text(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tool_builtin_provider_pkey'), - sa.UniqueConstraint('tenant_id', 'provider', name='unique_builtin_tool_provider') - ) - op.create_table('tool_published_apps', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('user_id', postgresql.UUID(), nullable=False), - sa.Column('description', sa.Text(), nullable=False), - sa.Column('llm_description', sa.Text(), nullable=False), - sa.Column('query_description', sa.Text(), nullable=False), - sa.Column('query_name', sa.String(length=40), nullable=False), - sa.Column('tool_name', sa.String(length=40), nullable=False), - sa.Column('author', sa.String(length=40), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.ForeignKeyConstraint(['app_id'], ['apps.id'], ), - sa.PrimaryKeyConstraint('id', name='published_app_tool_pkey'), - sa.UniqueConstraint('app_id', 'user_id', name='unique_published_app_tool') - ) + conn = op.get_bind() + + if _is_pg(conn): + # PostgreSQL: Keep original syntax + op.create_table('tool_api_providers', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('name', sa.String(length=40), nullable=False), + sa.Column('schema', sa.Text(), nullable=False), + sa.Column('schema_type_str', sa.String(length=40), nullable=False), + sa.Column('user_id', postgresql.UUID(), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('description_str', sa.Text(), nullable=False), + sa.Column('tools_str', sa.Text(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_api_provider_pkey') + ) + else: + # MySQL: Use compatible syntax + op.create_table('tool_api_providers', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=40), nullable=False), + sa.Column('schema', models.types.LongText(), nullable=False), + sa.Column('schema_type_str', sa.String(length=40), nullable=False), + sa.Column('user_id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('description_str', models.types.LongText(), nullable=False), + sa.Column('tools_str', models.types.LongText(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_api_provider_pkey') + ) + if _is_pg(conn): + # PostgreSQL: Keep original syntax + op.create_table('tool_builtin_providers', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=True), + sa.Column('user_id', postgresql.UUID(), nullable=False), + sa.Column('provider', sa.String(length=40), nullable=False), + sa.Column('encrypted_credentials', sa.Text(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_builtin_provider_pkey'), + sa.UniqueConstraint('tenant_id', 'provider', name='unique_builtin_tool_provider') + ) + else: + # MySQL: Use compatible syntax + op.create_table('tool_builtin_providers', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=True), + sa.Column('user_id', models.types.StringUUID(), nullable=False), + sa.Column('provider', sa.String(length=40), nullable=False), + sa.Column('encrypted_credentials', models.types.LongText(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_builtin_provider_pkey'), + sa.UniqueConstraint('tenant_id', 'provider', name='unique_builtin_tool_provider') + ) + if _is_pg(conn): + # PostgreSQL: Keep original syntax + op.create_table('tool_published_apps', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('user_id', postgresql.UUID(), nullable=False), + sa.Column('description', sa.Text(), nullable=False), + sa.Column('llm_description', sa.Text(), nullable=False), + sa.Column('query_description', sa.Text(), nullable=False), + sa.Column('query_name', sa.String(length=40), nullable=False), + sa.Column('tool_name', sa.String(length=40), nullable=False), + sa.Column('author', sa.String(length=40), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.ForeignKeyConstraint(['app_id'], ['apps.id'], ), + sa.PrimaryKeyConstraint('id', name='published_app_tool_pkey'), + sa.UniqueConstraint('app_id', 'user_id', name='unique_published_app_tool') + ) + else: + # MySQL: Use compatible syntax + op.create_table('tool_published_apps', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('user_id', models.types.StringUUID(), nullable=False), + sa.Column('description', models.types.LongText(), nullable=False), + sa.Column('llm_description', models.types.LongText(), nullable=False), + sa.Column('query_description', models.types.LongText(), nullable=False), + sa.Column('query_name', sa.String(length=40), nullable=False), + sa.Column('tool_name', sa.String(length=40), nullable=False), + sa.Column('author', sa.String(length=40), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.ForeignKeyConstraint(['app_id'], ['apps.id'], ), + sa.PrimaryKeyConstraint('id', name='published_app_tool_pkey'), + sa.UniqueConstraint('app_id', 'user_id', name='unique_published_app_tool') + ) # ### end Alembic commands ### diff --git a/api/migrations/versions/42e85ed5564d_conversation_columns_set_nullable.py b/api/migrations/versions/42e85ed5564d_conversation_columns_set_nullable.py index f388b99b90..76056a9460 100644 --- a/api/migrations/versions/42e85ed5564d_conversation_columns_set_nullable.py +++ b/api/migrations/versions/42e85ed5564d_conversation_columns_set_nullable.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '42e85ed5564d' down_revision = 'f9107f83abab' @@ -18,31 +24,59 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('conversations', schema=None) as batch_op: - batch_op.alter_column('app_model_config_id', - existing_type=postgresql.UUID(), - nullable=True) - batch_op.alter_column('model_provider', - existing_type=sa.VARCHAR(length=255), - nullable=True) - batch_op.alter_column('model_id', - existing_type=sa.VARCHAR(length=255), - nullable=True) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('conversations', schema=None) as batch_op: + batch_op.alter_column('app_model_config_id', + existing_type=postgresql.UUID(), + nullable=True) + batch_op.alter_column('model_provider', + existing_type=sa.VARCHAR(length=255), + nullable=True) + batch_op.alter_column('model_id', + existing_type=sa.VARCHAR(length=255), + nullable=True) + else: + with op.batch_alter_table('conversations', schema=None) as batch_op: + batch_op.alter_column('app_model_config_id', + existing_type=models.types.StringUUID(), + nullable=True) + batch_op.alter_column('model_provider', + existing_type=sa.VARCHAR(length=255), + nullable=True) + batch_op.alter_column('model_id', + existing_type=sa.VARCHAR(length=255), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('conversations', schema=None) as batch_op: - batch_op.alter_column('model_id', - existing_type=sa.VARCHAR(length=255), - nullable=False) - batch_op.alter_column('model_provider', - existing_type=sa.VARCHAR(length=255), - nullable=False) - batch_op.alter_column('app_model_config_id', - existing_type=postgresql.UUID(), - nullable=False) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('conversations', schema=None) as batch_op: + batch_op.alter_column('model_id', + existing_type=sa.VARCHAR(length=255), + nullable=False) + batch_op.alter_column('model_provider', + existing_type=sa.VARCHAR(length=255), + nullable=False) + batch_op.alter_column('app_model_config_id', + existing_type=postgresql.UUID(), + nullable=False) + else: + with op.batch_alter_table('conversations', schema=None) as batch_op: + batch_op.alter_column('model_id', + existing_type=sa.VARCHAR(length=255), + nullable=False) + batch_op.alter_column('model_provider', + existing_type=sa.VARCHAR(length=255), + nullable=False) + batch_op.alter_column('app_model_config_id', + existing_type=models.types.StringUUID(), + nullable=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/4823da1d26cf_add_tool_file.py b/api/migrations/versions/4823da1d26cf_add_tool_file.py index 1a473a10fe..9ef9c17a3a 100644 --- a/api/migrations/versions/4823da1d26cf_add_tool_file.py +++ b/api/migrations/versions/4823da1d26cf_add_tool_file.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '4823da1d26cf' down_revision = '053da0c1d756' @@ -18,16 +24,30 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tool_files', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('user_id', postgresql.UUID(), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('conversation_id', postgresql.UUID(), nullable=False), - sa.Column('file_key', sa.String(length=255), nullable=False), - sa.Column('mimetype', sa.String(length=255), nullable=False), - sa.Column('original_url', sa.String(length=255), nullable=True), - sa.PrimaryKeyConstraint('id', name='tool_file_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('tool_files', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('user_id', postgresql.UUID(), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('conversation_id', postgresql.UUID(), nullable=False), + sa.Column('file_key', sa.String(length=255), nullable=False), + sa.Column('mimetype', sa.String(length=255), nullable=False), + sa.Column('original_url', sa.String(length=255), nullable=True), + sa.PrimaryKeyConstraint('id', name='tool_file_pkey') + ) + else: + op.create_table('tool_files', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('user_id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('conversation_id', models.types.StringUUID(), nullable=False), + sa.Column('file_key', sa.String(length=255), nullable=False), + sa.Column('mimetype', sa.String(length=255), nullable=False), + sa.Column('original_url', sa.String(length=255), nullable=True), + sa.PrimaryKeyConstraint('id', name='tool_file_pkey') + ) # ### end Alembic commands ### diff --git a/api/migrations/versions/4829e54d2fee_change_message_chain_id_to_nullable.py b/api/migrations/versions/4829e54d2fee_change_message_chain_id_to_nullable.py index 2405021856..ef066587b7 100644 --- a/api/migrations/versions/4829e54d2fee_change_message_chain_id_to_nullable.py +++ b/api/migrations/versions/4829e54d2fee_change_message_chain_id_to_nullable.py @@ -8,6 +8,12 @@ Create Date: 2024-01-12 03:42:27.362415 from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '4829e54d2fee' down_revision = '114eed84c228' @@ -17,19 +23,39 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: - batch_op.alter_column('message_chain_id', - existing_type=postgresql.UUID(), - nullable=True) + conn = op.get_bind() + + if _is_pg(conn): + # PostgreSQL: Keep original syntax + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.alter_column('message_chain_id', + existing_type=postgresql.UUID(), + nullable=True) + else: + # MySQL: Use compatible syntax + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.alter_column('message_chain_id', + existing_type=models.types.StringUUID(), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: - batch_op.alter_column('message_chain_id', - existing_type=postgresql.UUID(), - nullable=False) + conn = op.get_bind() + + if _is_pg(conn): + # PostgreSQL: Keep original syntax + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.alter_column('message_chain_id', + existing_type=postgresql.UUID(), + nullable=False) + else: + # MySQL: Use compatible syntax + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.alter_column('message_chain_id', + existing_type=models.types.StringUUID(), + nullable=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/4bcffcd64aa4_update_dataset_model_field_null_.py b/api/migrations/versions/4bcffcd64aa4_update_dataset_model_field_null_.py index 178bd24e3c..bee290e8dc 100644 --- a/api/migrations/versions/4bcffcd64aa4_update_dataset_model_field_null_.py +++ b/api/migrations/versions/4bcffcd64aa4_update_dataset_model_field_null_.py @@ -8,6 +8,10 @@ Create Date: 2023-08-28 20:58:50.077056 import sqlalchemy as sa from alembic import op + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '4bcffcd64aa4' down_revision = '853f9b9cd3b6' @@ -17,29 +21,55 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('datasets', schema=None) as batch_op: - batch_op.alter_column('embedding_model', - existing_type=sa.VARCHAR(length=255), - nullable=True, - existing_server_default=sa.text("'text-embedding-ada-002'::character varying")) - batch_op.alter_column('embedding_model_provider', - existing_type=sa.VARCHAR(length=255), - nullable=True, - existing_server_default=sa.text("'openai'::character varying")) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.alter_column('embedding_model', + existing_type=sa.VARCHAR(length=255), + nullable=True, + existing_server_default=sa.text("'text-embedding-ada-002'::character varying")) + batch_op.alter_column('embedding_model_provider', + existing_type=sa.VARCHAR(length=255), + nullable=True, + existing_server_default=sa.text("'openai'::character varying")) + else: + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.alter_column('embedding_model', + existing_type=sa.VARCHAR(length=255), + nullable=True, + existing_server_default=sa.text("'text-embedding-ada-002'")) + batch_op.alter_column('embedding_model_provider', + existing_type=sa.VARCHAR(length=255), + nullable=True, + existing_server_default=sa.text("'openai'")) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('datasets', schema=None) as batch_op: - batch_op.alter_column('embedding_model_provider', - existing_type=sa.VARCHAR(length=255), - nullable=False, - existing_server_default=sa.text("'openai'::character varying")) - batch_op.alter_column('embedding_model', - existing_type=sa.VARCHAR(length=255), - nullable=False, - existing_server_default=sa.text("'text-embedding-ada-002'::character varying")) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.alter_column('embedding_model_provider', + existing_type=sa.VARCHAR(length=255), + nullable=False, + existing_server_default=sa.text("'openai'::character varying")) + batch_op.alter_column('embedding_model', + existing_type=sa.VARCHAR(length=255), + nullable=False, + existing_server_default=sa.text("'text-embedding-ada-002'::character varying")) + else: + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.alter_column('embedding_model_provider', + existing_type=sa.VARCHAR(length=255), + nullable=False, + existing_server_default=sa.text("'openai'")) + batch_op.alter_column('embedding_model', + existing_type=sa.VARCHAR(length=255), + nullable=False, + existing_server_default=sa.text("'text-embedding-ada-002'")) # ### end Alembic commands ### diff --git a/api/migrations/versions/4e99a8df00ff_add_load_balancing.py b/api/migrations/versions/4e99a8df00ff_add_load_balancing.py index 3be4ba4f2a..a2ab39bb28 100644 --- a/api/migrations/versions/4e99a8df00ff_add_load_balancing.py +++ b/api/migrations/versions/4e99a8df00ff_add_load_balancing.py @@ -10,6 +10,10 @@ from alembic import op import models.types + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '4e99a8df00ff' down_revision = '64a70a7aab8b' @@ -19,34 +23,67 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('load_balancing_model_configs', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('provider_name', sa.String(length=255), nullable=False), - sa.Column('model_name', sa.String(length=255), nullable=False), - sa.Column('model_type', sa.String(length=40), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('encrypted_config', sa.Text(), nullable=True), - sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='load_balancing_model_config_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('load_balancing_model_configs', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=255), nullable=False), + sa.Column('model_name', sa.String(length=255), nullable=False), + sa.Column('model_type', sa.String(length=40), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('encrypted_config', sa.Text(), nullable=True), + sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='load_balancing_model_config_pkey') + ) + else: + op.create_table('load_balancing_model_configs', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=255), nullable=False), + sa.Column('model_name', sa.String(length=255), nullable=False), + sa.Column('model_type', sa.String(length=40), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('encrypted_config', models.types.LongText(), nullable=True), + sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='load_balancing_model_config_pkey') + ) + with op.batch_alter_table('load_balancing_model_configs', schema=None) as batch_op: batch_op.create_index('load_balancing_model_config_tenant_provider_model_idx', ['tenant_id', 'provider_name', 'model_type'], unique=False) - op.create_table('provider_model_settings', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('provider_name', sa.String(length=255), nullable=False), - sa.Column('model_name', sa.String(length=255), nullable=False), - sa.Column('model_type', sa.String(length=40), nullable=False), - sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), - sa.Column('load_balancing_enabled', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='provider_model_setting_pkey') - ) + if _is_pg(conn): + op.create_table('provider_model_settings', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=255), nullable=False), + sa.Column('model_name', sa.String(length=255), nullable=False), + sa.Column('model_type', sa.String(length=40), nullable=False), + sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('load_balancing_enabled', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='provider_model_setting_pkey') + ) + else: + op.create_table('provider_model_settings', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=255), nullable=False), + sa.Column('model_name', sa.String(length=255), nullable=False), + sa.Column('model_type', sa.String(length=40), nullable=False), + sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('load_balancing_enabled', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='provider_model_setting_pkey') + ) + with op.batch_alter_table('provider_model_settings', schema=None) as batch_op: batch_op.create_index('provider_model_setting_tenant_provider_model_idx', ['tenant_id', 'provider_name', 'model_type'], unique=False) diff --git a/api/migrations/versions/5022897aaceb_add_model_name_in_embedding.py b/api/migrations/versions/5022897aaceb_add_model_name_in_embedding.py index c0f4af5a00..5e4bceaef1 100644 --- a/api/migrations/versions/5022897aaceb_add_model_name_in_embedding.py +++ b/api/migrations/versions/5022897aaceb_add_model_name_in_embedding.py @@ -8,6 +8,10 @@ Create Date: 2023-08-11 14:38:15.499460 import sqlalchemy as sa from alembic import op + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '5022897aaceb' down_revision = 'bf0aec5ba2cf' @@ -17,10 +21,20 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('embeddings', schema=None) as batch_op: - batch_op.add_column(sa.Column('model_name', sa.String(length=40), server_default=sa.text("'text-embedding-ada-002'::character varying"), nullable=False)) - batch_op.drop_constraint('embedding_hash_idx', type_='unique') - batch_op.create_unique_constraint('embedding_hash_idx', ['model_name', 'hash']) + conn = op.get_bind() + + if _is_pg(conn): + # PostgreSQL: Keep original syntax + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.add_column(sa.Column('model_name', sa.String(length=40), server_default=sa.text("'text-embedding-ada-002'::character varying"), nullable=False)) + batch_op.drop_constraint('embedding_hash_idx', type_='unique') + batch_op.create_unique_constraint('embedding_hash_idx', ['model_name', 'hash']) + else: + # MySQL: Use compatible syntax + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.add_column(sa.Column('model_name', sa.String(length=40), server_default=sa.text("'text-embedding-ada-002'"), nullable=False)) + batch_op.drop_constraint('embedding_hash_idx', type_='unique') + batch_op.create_unique_constraint('embedding_hash_idx', ['model_name', 'hash']) # ### end Alembic commands ### diff --git a/api/migrations/versions/53bf8af60645_update_model.py b/api/migrations/versions/53bf8af60645_update_model.py index 3d0928d013..bb4af075c1 100644 --- a/api/migrations/versions/53bf8af60645_update_model.py +++ b/api/migrations/versions/53bf8af60645_update_model.py @@ -10,6 +10,10 @@ from alembic import op import models as models + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '53bf8af60645' down_revision = '8e5588e6412e' @@ -19,23 +23,43 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('embeddings', schema=None) as batch_op: - batch_op.alter_column('provider_name', - existing_type=sa.VARCHAR(length=40), - type_=sa.String(length=255), - existing_nullable=False, - existing_server_default=sa.text("''::character varying")) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.alter_column('provider_name', + existing_type=sa.VARCHAR(length=40), + type_=sa.String(length=255), + existing_nullable=False, + existing_server_default=sa.text("''::character varying")) + else: + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.alter_column('provider_name', + existing_type=sa.VARCHAR(length=40), + type_=sa.String(length=255), + existing_nullable=False, + existing_server_default=sa.text("''")) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('embeddings', schema=None) as batch_op: - batch_op.alter_column('provider_name', - existing_type=sa.String(length=255), - type_=sa.VARCHAR(length=40), - existing_nullable=False, - existing_server_default=sa.text("''::character varying")) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.alter_column('provider_name', + existing_type=sa.String(length=255), + type_=sa.VARCHAR(length=40), + existing_nullable=False, + existing_server_default=sa.text("''::character varying")) + else: + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.alter_column('provider_name', + existing_type=sa.String(length=255), + type_=sa.VARCHAR(length=40), + existing_nullable=False, + existing_server_default=sa.text("''")) # ### end Alembic commands ### diff --git a/api/migrations/versions/563cf8bf777b_enable_tool_file_without_conversation_id.py b/api/migrations/versions/563cf8bf777b_enable_tool_file_without_conversation_id.py index 299f442de9..b080e7680b 100644 --- a/api/migrations/versions/563cf8bf777b_enable_tool_file_without_conversation_id.py +++ b/api/migrations/versions/563cf8bf777b_enable_tool_file_without_conversation_id.py @@ -8,6 +8,12 @@ Create Date: 2024-03-14 04:54:56.679506 from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '563cf8bf777b' down_revision = 'b5429b71023c' @@ -17,19 +23,35 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('tool_files', schema=None) as batch_op: - batch_op.alter_column('conversation_id', - existing_type=postgresql.UUID(), - nullable=True) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('tool_files', schema=None) as batch_op: + batch_op.alter_column('conversation_id', + existing_type=postgresql.UUID(), + nullable=True) + else: + with op.batch_alter_table('tool_files', schema=None) as batch_op: + batch_op.alter_column('conversation_id', + existing_type=models.types.StringUUID(), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('tool_files', schema=None) as batch_op: - batch_op.alter_column('conversation_id', - existing_type=postgresql.UUID(), - nullable=False) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('tool_files', schema=None) as batch_op: + batch_op.alter_column('conversation_id', + existing_type=postgresql.UUID(), + nullable=False) + else: + with op.batch_alter_table('tool_files', schema=None) as batch_op: + batch_op.alter_column('conversation_id', + existing_type=models.types.StringUUID(), + nullable=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/614f77cecc48_add_last_active_at.py b/api/migrations/versions/614f77cecc48_add_last_active_at.py index 182f8f89f1..6d5c5bf61f 100644 --- a/api/migrations/versions/614f77cecc48_add_last_active_at.py +++ b/api/migrations/versions/614f77cecc48_add_last_active_at.py @@ -8,6 +8,10 @@ Create Date: 2023-06-15 13:33:00.357467 import sqlalchemy as sa from alembic import op + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '614f77cecc48' down_revision = 'a45f4dfde53b' @@ -17,8 +21,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('accounts', schema=None) as batch_op: - batch_op.add_column(sa.Column('last_active_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('accounts', schema=None) as batch_op: + batch_op.add_column(sa.Column('last_active_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False)) + else: + with op.batch_alter_table('accounts', schema=None) as batch_op: + batch_op.add_column(sa.Column('last_active_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/64b051264f32_init.py b/api/migrations/versions/64b051264f32_init.py index b0fb3deac6..ec0ae0fee2 100644 --- a/api/migrations/versions/64b051264f32_init.py +++ b/api/migrations/versions/64b051264f32_init.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '64b051264f32' down_revision = None @@ -18,263 +24,519 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') + conn = op.get_bind() + + if _is_pg(conn): + op.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') + else: + pass - op.create_table('account_integrates', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('account_id', postgresql.UUID(), nullable=False), - sa.Column('provider', sa.String(length=16), nullable=False), - sa.Column('open_id', sa.String(length=255), nullable=False), - sa.Column('encrypted_token', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='account_integrate_pkey'), - sa.UniqueConstraint('account_id', 'provider', name='unique_account_provider'), - sa.UniqueConstraint('provider', 'open_id', name='unique_provider_open_id') - ) - op.create_table('accounts', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('email', sa.String(length=255), nullable=False), - sa.Column('password', sa.String(length=255), nullable=True), - sa.Column('password_salt', sa.String(length=255), nullable=True), - sa.Column('avatar', sa.String(length=255), nullable=True), - sa.Column('interface_language', sa.String(length=255), nullable=True), - sa.Column('interface_theme', sa.String(length=255), nullable=True), - sa.Column('timezone', sa.String(length=255), nullable=True), - sa.Column('last_login_at', sa.DateTime(), nullable=True), - sa.Column('last_login_ip', sa.String(length=255), nullable=True), - sa.Column('status', sa.String(length=16), server_default=sa.text("'active'::character varying"), nullable=False), - sa.Column('initialized_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='account_pkey') - ) + if _is_pg(conn): + op.create_table('account_integrates', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('account_id', postgresql.UUID(), nullable=False), + sa.Column('provider', sa.String(length=16), nullable=False), + sa.Column('open_id', sa.String(length=255), nullable=False), + sa.Column('encrypted_token', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='account_integrate_pkey'), + sa.UniqueConstraint('account_id', 'provider', name='unique_account_provider'), + sa.UniqueConstraint('provider', 'open_id', name='unique_provider_open_id') + ) + else: + op.create_table('account_integrates', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('account_id', models.types.StringUUID(), nullable=False), + sa.Column('provider', sa.String(length=16), nullable=False), + sa.Column('open_id', sa.String(length=255), nullable=False), + sa.Column('encrypted_token', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='account_integrate_pkey'), + sa.UniqueConstraint('account_id', 'provider', name='unique_account_provider'), + sa.UniqueConstraint('provider', 'open_id', name='unique_provider_open_id') + ) + if _is_pg(conn): + op.create_table('accounts', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('email', sa.String(length=255), nullable=False), + sa.Column('password', sa.String(length=255), nullable=True), + sa.Column('password_salt', sa.String(length=255), nullable=True), + sa.Column('avatar', sa.String(length=255), nullable=True), + sa.Column('interface_language', sa.String(length=255), nullable=True), + sa.Column('interface_theme', sa.String(length=255), nullable=True), + sa.Column('timezone', sa.String(length=255), nullable=True), + sa.Column('last_login_at', sa.DateTime(), nullable=True), + sa.Column('last_login_ip', sa.String(length=255), nullable=True), + sa.Column('status', sa.String(length=16), server_default=sa.text("'active'::character varying"), nullable=False), + sa.Column('initialized_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='account_pkey') + ) + else: + op.create_table('accounts', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('email', sa.String(length=255), nullable=False), + sa.Column('password', sa.String(length=255), nullable=True), + sa.Column('password_salt', sa.String(length=255), nullable=True), + sa.Column('avatar', sa.String(length=255), nullable=True), + sa.Column('interface_language', sa.String(length=255), nullable=True), + sa.Column('interface_theme', sa.String(length=255), nullable=True), + sa.Column('timezone', sa.String(length=255), nullable=True), + sa.Column('last_login_at', sa.DateTime(), nullable=True), + sa.Column('last_login_ip', sa.String(length=255), nullable=True), + sa.Column('status', sa.String(length=16), server_default=sa.text("'active'"), nullable=False), + sa.Column('initialized_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='account_pkey') + ) with op.batch_alter_table('accounts', schema=None) as batch_op: batch_op.create_index('account_email_idx', ['email'], unique=False) - op.create_table('api_requests', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('api_token_id', postgresql.UUID(), nullable=False), - sa.Column('path', sa.String(length=255), nullable=False), - sa.Column('request', sa.Text(), nullable=True), - sa.Column('response', sa.Text(), nullable=True), - sa.Column('ip', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='api_request_pkey') - ) + if _is_pg(conn): + op.create_table('api_requests', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('api_token_id', postgresql.UUID(), nullable=False), + sa.Column('path', sa.String(length=255), nullable=False), + sa.Column('request', sa.Text(), nullable=True), + sa.Column('response', sa.Text(), nullable=True), + sa.Column('ip', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='api_request_pkey') + ) + else: + op.create_table('api_requests', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('api_token_id', models.types.StringUUID(), nullable=False), + sa.Column('path', sa.String(length=255), nullable=False), + sa.Column('request', models.types.LongText(), nullable=True), + sa.Column('response', models.types.LongText(), nullable=True), + sa.Column('ip', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='api_request_pkey') + ) with op.batch_alter_table('api_requests', schema=None) as batch_op: batch_op.create_index('api_request_token_idx', ['tenant_id', 'api_token_id'], unique=False) - op.create_table('api_tokens', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=True), - sa.Column('dataset_id', postgresql.UUID(), nullable=True), - sa.Column('type', sa.String(length=16), nullable=False), - sa.Column('token', sa.String(length=255), nullable=False), - sa.Column('last_used_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='api_token_pkey') - ) + if _is_pg(conn): + op.create_table('api_tokens', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=True), + sa.Column('dataset_id', postgresql.UUID(), nullable=True), + sa.Column('type', sa.String(length=16), nullable=False), + sa.Column('token', sa.String(length=255), nullable=False), + sa.Column('last_used_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='api_token_pkey') + ) + else: + op.create_table('api_tokens', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=True), + sa.Column('dataset_id', models.types.StringUUID(), nullable=True), + sa.Column('type', sa.String(length=16), nullable=False), + sa.Column('token', sa.String(length=255), nullable=False), + sa.Column('last_used_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='api_token_pkey') + ) with op.batch_alter_table('api_tokens', schema=None) as batch_op: batch_op.create_index('api_token_app_id_type_idx', ['app_id', 'type'], unique=False) batch_op.create_index('api_token_token_idx', ['token', 'type'], unique=False) - op.create_table('app_dataset_joins', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('dataset_id', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='app_dataset_join_pkey') - ) + if _is_pg(conn): + op.create_table('app_dataset_joins', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('dataset_id', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_dataset_join_pkey') + ) + else: + op.create_table('app_dataset_joins', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_dataset_join_pkey') + ) with op.batch_alter_table('app_dataset_joins', schema=None) as batch_op: batch_op.create_index('app_dataset_join_app_dataset_idx', ['dataset_id', 'app_id'], unique=False) - op.create_table('app_model_configs', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('provider', sa.String(length=255), nullable=False), - sa.Column('model_id', sa.String(length=255), nullable=False), - sa.Column('configs', sa.JSON(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('opening_statement', sa.Text(), nullable=True), - sa.Column('suggested_questions', sa.Text(), nullable=True), - sa.Column('suggested_questions_after_answer', sa.Text(), nullable=True), - sa.Column('more_like_this', sa.Text(), nullable=True), - sa.Column('model', sa.Text(), nullable=True), - sa.Column('user_input_form', sa.Text(), nullable=True), - sa.Column('pre_prompt', sa.Text(), nullable=True), - sa.Column('agent_mode', sa.Text(), nullable=True), - sa.PrimaryKeyConstraint('id', name='app_model_config_pkey') - ) + if _is_pg(conn): + op.create_table('app_model_configs', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('model_id', sa.String(length=255), nullable=False), + sa.Column('configs', sa.JSON(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('opening_statement', sa.Text(), nullable=True), + sa.Column('suggested_questions', sa.Text(), nullable=True), + sa.Column('suggested_questions_after_answer', sa.Text(), nullable=True), + sa.Column('more_like_this', sa.Text(), nullable=True), + sa.Column('model', sa.Text(), nullable=True), + sa.Column('user_input_form', sa.Text(), nullable=True), + sa.Column('pre_prompt', sa.Text(), nullable=True), + sa.Column('agent_mode', sa.Text(), nullable=True), + sa.PrimaryKeyConstraint('id', name='app_model_config_pkey') + ) + else: + op.create_table('app_model_configs', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('model_id', sa.String(length=255), nullable=False), + sa.Column('configs', sa.JSON(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('opening_statement', models.types.LongText(), nullable=True), + sa.Column('suggested_questions', models.types.LongText(), nullable=True), + sa.Column('suggested_questions_after_answer', models.types.LongText(), nullable=True), + sa.Column('more_like_this', models.types.LongText(), nullable=True), + sa.Column('model', models.types.LongText(), nullable=True), + sa.Column('user_input_form', models.types.LongText(), nullable=True), + sa.Column('pre_prompt', models.types.LongText(), nullable=True), + sa.Column('agent_mode', models.types.LongText(), nullable=True), + sa.PrimaryKeyConstraint('id', name='app_model_config_pkey') + ) with op.batch_alter_table('app_model_configs', schema=None) as batch_op: batch_op.create_index('app_app_id_idx', ['app_id'], unique=False) - op.create_table('apps', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('mode', sa.String(length=255), nullable=False), - sa.Column('icon', sa.String(length=255), nullable=True), - sa.Column('icon_background', sa.String(length=255), nullable=True), - sa.Column('app_model_config_id', postgresql.UUID(), nullable=True), - sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'::character varying"), nullable=False), - sa.Column('enable_site', sa.Boolean(), nullable=False), - sa.Column('enable_api', sa.Boolean(), nullable=False), - sa.Column('api_rpm', sa.Integer(), nullable=False), - sa.Column('api_rph', sa.Integer(), nullable=False), - sa.Column('is_demo', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('is_public', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='app_pkey') - ) + if _is_pg(conn): + op.create_table('apps', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('mode', sa.String(length=255), nullable=False), + sa.Column('icon', sa.String(length=255), nullable=True), + sa.Column('icon_background', sa.String(length=255), nullable=True), + sa.Column('app_model_config_id', postgresql.UUID(), nullable=True), + sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'::character varying"), nullable=False), + sa.Column('enable_site', sa.Boolean(), nullable=False), + sa.Column('enable_api', sa.Boolean(), nullable=False), + sa.Column('api_rpm', sa.Integer(), nullable=False), + sa.Column('api_rph', sa.Integer(), nullable=False), + sa.Column('is_demo', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('is_public', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_pkey') + ) + else: + op.create_table('apps', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('mode', sa.String(length=255), nullable=False), + sa.Column('icon', sa.String(length=255), nullable=True), + sa.Column('icon_background', sa.String(length=255), nullable=True), + sa.Column('app_model_config_id', models.types.StringUUID(), nullable=True), + sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'"), nullable=False), + sa.Column('enable_site', sa.Boolean(), nullable=False), + sa.Column('enable_api', sa.Boolean(), nullable=False), + sa.Column('api_rpm', sa.Integer(), nullable=False), + sa.Column('api_rph', sa.Integer(), nullable=False), + sa.Column('is_demo', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('is_public', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_pkey') + ) with op.batch_alter_table('apps', schema=None) as batch_op: batch_op.create_index('app_tenant_id_idx', ['tenant_id'], unique=False) - op.execute('CREATE SEQUENCE task_id_sequence;') - op.execute('CREATE SEQUENCE taskset_id_sequence;') + if _is_pg(conn): + op.execute('CREATE SEQUENCE task_id_sequence;') + op.execute('CREATE SEQUENCE taskset_id_sequence;') + else: + pass - op.create_table('celery_taskmeta', - sa.Column('id', sa.Integer(), nullable=False, - server_default=sa.text('nextval(\'task_id_sequence\')')), - sa.Column('task_id', sa.String(length=155), nullable=True), - sa.Column('status', sa.String(length=50), nullable=True), - sa.Column('result', sa.PickleType(), nullable=True), - sa.Column('date_done', sa.DateTime(), nullable=True), - sa.Column('traceback', sa.Text(), nullable=True), - sa.Column('name', sa.String(length=155), nullable=True), - sa.Column('args', sa.LargeBinary(), nullable=True), - sa.Column('kwargs', sa.LargeBinary(), nullable=True), - sa.Column('worker', sa.String(length=155), nullable=True), - sa.Column('retries', sa.Integer(), nullable=True), - sa.Column('queue', sa.String(length=155), nullable=True), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('task_id') - ) - op.create_table('celery_tasksetmeta', - sa.Column('id', sa.Integer(), nullable=False, - server_default=sa.text('nextval(\'taskset_id_sequence\')')), - sa.Column('taskset_id', sa.String(length=155), nullable=True), - sa.Column('result', sa.PickleType(), nullable=True), - sa.Column('date_done', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('taskset_id') - ) - op.create_table('conversations', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('app_model_config_id', postgresql.UUID(), nullable=False), - sa.Column('model_provider', sa.String(length=255), nullable=False), - sa.Column('override_model_configs', sa.Text(), nullable=True), - sa.Column('model_id', sa.String(length=255), nullable=False), - sa.Column('mode', sa.String(length=255), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('summary', sa.Text(), nullable=True), - sa.Column('inputs', sa.JSON(), nullable=True), - sa.Column('introduction', sa.Text(), nullable=True), - sa.Column('system_instruction', sa.Text(), nullable=True), - sa.Column('system_instruction_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), - sa.Column('status', sa.String(length=255), nullable=False), - sa.Column('from_source', sa.String(length=255), nullable=False), - sa.Column('from_end_user_id', postgresql.UUID(), nullable=True), - sa.Column('from_account_id', postgresql.UUID(), nullable=True), - sa.Column('read_at', sa.DateTime(), nullable=True), - sa.Column('read_account_id', postgresql.UUID(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='conversation_pkey') - ) + if _is_pg(conn): + op.create_table('celery_taskmeta', + sa.Column('id', sa.Integer(), nullable=False, + server_default=sa.text('nextval(\'task_id_sequence\')')), + sa.Column('task_id', sa.String(length=155), nullable=True), + sa.Column('status', sa.String(length=50), nullable=True), + sa.Column('result', sa.PickleType(), nullable=True), + sa.Column('date_done', sa.DateTime(), nullable=True), + sa.Column('traceback', sa.Text(), nullable=True), + sa.Column('name', sa.String(length=155), nullable=True), + sa.Column('args', sa.LargeBinary(), nullable=True), + sa.Column('kwargs', sa.LargeBinary(), nullable=True), + sa.Column('worker', sa.String(length=155), nullable=True), + sa.Column('retries', sa.Integer(), nullable=True), + sa.Column('queue', sa.String(length=155), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('task_id') + ) + else: + op.create_table('celery_taskmeta', + sa.Column('id', sa.Integer(), nullable=False, autoincrement=True), + sa.Column('task_id', sa.String(length=155), nullable=True), + sa.Column('status', sa.String(length=50), nullable=True), + sa.Column('result', models.types.BinaryData(), nullable=True), + sa.Column('date_done', sa.DateTime(), nullable=True), + sa.Column('traceback', models.types.LongText(), nullable=True), + sa.Column('name', sa.String(length=155), nullable=True), + sa.Column('args', models.types.BinaryData(), nullable=True), + sa.Column('kwargs', models.types.BinaryData(), nullable=True), + sa.Column('worker', sa.String(length=155), nullable=True), + sa.Column('retries', sa.Integer(), nullable=True), + sa.Column('queue', sa.String(length=155), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('task_id') + ) + if _is_pg(conn): + op.create_table('celery_tasksetmeta', + sa.Column('id', sa.Integer(), nullable=False, + server_default=sa.text('nextval(\'taskset_id_sequence\')')), + sa.Column('taskset_id', sa.String(length=155), nullable=True), + sa.Column('result', sa.PickleType(), nullable=True), + sa.Column('date_done', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('taskset_id') + ) + else: + op.create_table('celery_tasksetmeta', + sa.Column('id', sa.Integer(), nullable=False, autoincrement=True), + sa.Column('taskset_id', sa.String(length=155), nullable=True), + sa.Column('result', models.types.BinaryData(), nullable=True), + sa.Column('date_done', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('taskset_id') + ) + if _is_pg(conn): + op.create_table('conversations', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('app_model_config_id', postgresql.UUID(), nullable=False), + sa.Column('model_provider', sa.String(length=255), nullable=False), + sa.Column('override_model_configs', sa.Text(), nullable=True), + sa.Column('model_id', sa.String(length=255), nullable=False), + sa.Column('mode', sa.String(length=255), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('summary', sa.Text(), nullable=True), + sa.Column('inputs', sa.JSON(), nullable=True), + sa.Column('introduction', sa.Text(), nullable=True), + sa.Column('system_instruction', sa.Text(), nullable=True), + sa.Column('system_instruction_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), + sa.Column('status', sa.String(length=255), nullable=False), + sa.Column('from_source', sa.String(length=255), nullable=False), + sa.Column('from_end_user_id', postgresql.UUID(), nullable=True), + sa.Column('from_account_id', postgresql.UUID(), nullable=True), + sa.Column('read_at', sa.DateTime(), nullable=True), + sa.Column('read_account_id', postgresql.UUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='conversation_pkey') + ) + else: + op.create_table('conversations', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('app_model_config_id', models.types.StringUUID(), nullable=False), + sa.Column('model_provider', sa.String(length=255), nullable=False), + sa.Column('override_model_configs', models.types.LongText(), nullable=True), + sa.Column('model_id', sa.String(length=255), nullable=False), + sa.Column('mode', sa.String(length=255), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('summary', models.types.LongText(), nullable=True), + sa.Column('inputs', sa.JSON(), nullable=True), + sa.Column('introduction', models.types.LongText(), nullable=True), + sa.Column('system_instruction', models.types.LongText(), nullable=True), + sa.Column('system_instruction_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), + sa.Column('status', sa.String(length=255), nullable=False), + sa.Column('from_source', sa.String(length=255), nullable=False), + sa.Column('from_end_user_id', models.types.StringUUID(), nullable=True), + sa.Column('from_account_id', models.types.StringUUID(), nullable=True), + sa.Column('read_at', sa.DateTime(), nullable=True), + sa.Column('read_account_id', models.types.StringUUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='conversation_pkey') + ) with op.batch_alter_table('conversations', schema=None) as batch_op: batch_op.create_index('conversation_app_from_user_idx', ['app_id', 'from_source', 'from_end_user_id'], unique=False) - op.create_table('dataset_keyword_tables', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('dataset_id', postgresql.UUID(), nullable=False), - sa.Column('keyword_table', sa.Text(), nullable=False), - sa.PrimaryKeyConstraint('id', name='dataset_keyword_table_pkey'), - sa.UniqueConstraint('dataset_id') - ) + if _is_pg(conn): + op.create_table('dataset_keyword_tables', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('dataset_id', postgresql.UUID(), nullable=False), + sa.Column('keyword_table', sa.Text(), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_keyword_table_pkey'), + sa.UniqueConstraint('dataset_id') + ) + else: + op.create_table('dataset_keyword_tables', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('keyword_table', models.types.LongText(), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_keyword_table_pkey'), + sa.UniqueConstraint('dataset_id') + ) with op.batch_alter_table('dataset_keyword_tables', schema=None) as batch_op: batch_op.create_index('dataset_keyword_table_dataset_id_idx', ['dataset_id'], unique=False) - op.create_table('dataset_process_rules', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('dataset_id', postgresql.UUID(), nullable=False), - sa.Column('mode', sa.String(length=255), server_default=sa.text("'automatic'::character varying"), nullable=False), - sa.Column('rules', sa.Text(), nullable=True), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='dataset_process_rule_pkey') - ) + if _is_pg(conn): + op.create_table('dataset_process_rules', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('dataset_id', postgresql.UUID(), nullable=False), + sa.Column('mode', sa.String(length=255), server_default=sa.text("'automatic'::character varying"), nullable=False), + sa.Column('rules', sa.Text(), nullable=True), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_process_rule_pkey') + ) + else: + op.create_table('dataset_process_rules', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('mode', sa.String(length=255), server_default=sa.text("'automatic'"), nullable=False), + sa.Column('rules', models.types.LongText(), nullable=True), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_process_rule_pkey') + ) with op.batch_alter_table('dataset_process_rules', schema=None) as batch_op: batch_op.create_index('dataset_process_rule_dataset_id_idx', ['dataset_id'], unique=False) - op.create_table('dataset_queries', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('dataset_id', postgresql.UUID(), nullable=False), - sa.Column('content', sa.Text(), nullable=False), - sa.Column('source', sa.String(length=255), nullable=False), - sa.Column('source_app_id', postgresql.UUID(), nullable=True), - sa.Column('created_by_role', sa.String(), nullable=False), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='dataset_query_pkey') - ) + if _is_pg(conn): + op.create_table('dataset_queries', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('dataset_id', postgresql.UUID(), nullable=False), + sa.Column('content', sa.Text(), nullable=False), + sa.Column('source', sa.String(length=255), nullable=False), + sa.Column('source_app_id', postgresql.UUID(), nullable=True), + sa.Column('created_by_role', sa.String(), nullable=False), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_query_pkey') + ) + else: + op.create_table('dataset_queries', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('content', models.types.LongText(), nullable=False), + sa.Column('source', sa.String(length=255), nullable=False), + sa.Column('source_app_id', models.types.StringUUID(), nullable=True), + sa.Column('created_by_role', sa.String(length=255), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_query_pkey') + ) with op.batch_alter_table('dataset_queries', schema=None) as batch_op: batch_op.create_index('dataset_query_dataset_id_idx', ['dataset_id'], unique=False) - op.create_table('datasets', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('description', sa.Text(), nullable=True), - sa.Column('provider', sa.String(length=255), server_default=sa.text("'vendor'::character varying"), nullable=False), - sa.Column('permission', sa.String(length=255), server_default=sa.text("'only_me'::character varying"), nullable=False), - sa.Column('data_source_type', sa.String(length=255), nullable=True), - sa.Column('indexing_technique', sa.String(length=255), nullable=True), - sa.Column('index_struct', sa.Text(), nullable=True), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_by', postgresql.UUID(), nullable=True), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='dataset_pkey') - ) + if _is_pg(conn): + op.create_table('datasets', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('description', sa.Text(), nullable=True), + sa.Column('provider', sa.String(length=255), server_default=sa.text("'vendor'::character varying"), nullable=False), + sa.Column('permission', sa.String(length=255), server_default=sa.text("'only_me'::character varying"), nullable=False), + sa.Column('data_source_type', sa.String(length=255), nullable=True), + sa.Column('indexing_technique', sa.String(length=255), nullable=True), + sa.Column('index_struct', sa.Text(), nullable=True), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_by', postgresql.UUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_pkey') + ) + else: + op.create_table('datasets', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('description', models.types.LongText(), nullable=True), + sa.Column('provider', sa.String(length=255), server_default=sa.text("'vendor'"), nullable=False), + sa.Column('permission', sa.String(length=255), server_default=sa.text("'only_me'"), nullable=False), + sa.Column('data_source_type', sa.String(length=255), nullable=True), + sa.Column('indexing_technique', sa.String(length=255), nullable=True), + sa.Column('index_struct', models.types.LongText(), nullable=True), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_pkey') + ) with op.batch_alter_table('datasets', schema=None) as batch_op: batch_op.create_index('dataset_tenant_idx', ['tenant_id'], unique=False) - op.create_table('dify_setups', - sa.Column('version', sa.String(length=255), nullable=False), - sa.Column('setup_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('version', name='dify_setup_pkey') - ) - op.create_table('document_segments', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('dataset_id', postgresql.UUID(), nullable=False), - sa.Column('document_id', postgresql.UUID(), nullable=False), - sa.Column('position', sa.Integer(), nullable=False), - sa.Column('content', sa.Text(), nullable=False), - sa.Column('word_count', sa.Integer(), nullable=False), - sa.Column('tokens', sa.Integer(), nullable=False), - sa.Column('keywords', sa.JSON(), nullable=True), - sa.Column('index_node_id', sa.String(length=255), nullable=True), - sa.Column('index_node_hash', sa.String(length=255), nullable=True), - sa.Column('hit_count', sa.Integer(), nullable=False), - sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), - sa.Column('disabled_at', sa.DateTime(), nullable=True), - sa.Column('disabled_by', postgresql.UUID(), nullable=True), - sa.Column('status', sa.String(length=255), server_default=sa.text("'waiting'::character varying"), nullable=False), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('indexing_at', sa.DateTime(), nullable=True), - sa.Column('completed_at', sa.DateTime(), nullable=True), - sa.Column('error', sa.Text(), nullable=True), - sa.Column('stopped_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id', name='document_segment_pkey') - ) + if _is_pg(conn): + op.create_table('dify_setups', + sa.Column('version', sa.String(length=255), nullable=False), + sa.Column('setup_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('version', name='dify_setup_pkey') + ) + else: + op.create_table('dify_setups', + sa.Column('version', sa.String(length=255), nullable=False), + sa.Column('setup_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('version', name='dify_setup_pkey') + ) + if _is_pg(conn): + op.create_table('document_segments', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('dataset_id', postgresql.UUID(), nullable=False), + sa.Column('document_id', postgresql.UUID(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('content', sa.Text(), nullable=False), + sa.Column('word_count', sa.Integer(), nullable=False), + sa.Column('tokens', sa.Integer(), nullable=False), + sa.Column('keywords', sa.JSON(), nullable=True), + sa.Column('index_node_id', sa.String(length=255), nullable=True), + sa.Column('index_node_hash', sa.String(length=255), nullable=True), + sa.Column('hit_count', sa.Integer(), nullable=False), + sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('disabled_at', sa.DateTime(), nullable=True), + sa.Column('disabled_by', postgresql.UUID(), nullable=True), + sa.Column('status', sa.String(length=255), server_default=sa.text("'waiting'::character varying"), nullable=False), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('indexing_at', sa.DateTime(), nullable=True), + sa.Column('completed_at', sa.DateTime(), nullable=True), + sa.Column('error', sa.Text(), nullable=True), + sa.Column('stopped_at', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id', name='document_segment_pkey') + ) + else: + op.create_table('document_segments', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('document_id', models.types.StringUUID(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('content', models.types.LongText(), nullable=False), + sa.Column('word_count', sa.Integer(), nullable=False), + sa.Column('tokens', sa.Integer(), nullable=False), + sa.Column('keywords', sa.JSON(), nullable=True), + sa.Column('index_node_id', sa.String(length=255), nullable=True), + sa.Column('index_node_hash', sa.String(length=255), nullable=True), + sa.Column('hit_count', sa.Integer(), nullable=False), + sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('disabled_at', sa.DateTime(), nullable=True), + sa.Column('disabled_by', models.types.StringUUID(), nullable=True), + sa.Column('status', sa.String(length=255), server_default=sa.text("'waiting'"), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('indexing_at', sa.DateTime(), nullable=True), + sa.Column('completed_at', sa.DateTime(), nullable=True), + sa.Column('error', models.types.LongText(), nullable=True), + sa.Column('stopped_at', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id', name='document_segment_pkey') + ) with op.batch_alter_table('document_segments', schema=None) as batch_op: batch_op.create_index('document_segment_dataset_id_idx', ['dataset_id'], unique=False) batch_op.create_index('document_segment_dataset_node_idx', ['dataset_id', 'index_node_id'], unique=False) @@ -282,359 +544,692 @@ def upgrade(): batch_op.create_index('document_segment_tenant_dataset_idx', ['dataset_id', 'tenant_id'], unique=False) batch_op.create_index('document_segment_tenant_document_idx', ['document_id', 'tenant_id'], unique=False) - op.create_table('documents', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('dataset_id', postgresql.UUID(), nullable=False), - sa.Column('position', sa.Integer(), nullable=False), - sa.Column('data_source_type', sa.String(length=255), nullable=False), - sa.Column('data_source_info', sa.Text(), nullable=True), - sa.Column('dataset_process_rule_id', postgresql.UUID(), nullable=True), - sa.Column('batch', sa.String(length=255), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('created_from', sa.String(length=255), nullable=False), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_api_request_id', postgresql.UUID(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('processing_started_at', sa.DateTime(), nullable=True), - sa.Column('file_id', sa.Text(), nullable=True), - sa.Column('word_count', sa.Integer(), nullable=True), - sa.Column('parsing_completed_at', sa.DateTime(), nullable=True), - sa.Column('cleaning_completed_at', sa.DateTime(), nullable=True), - sa.Column('splitting_completed_at', sa.DateTime(), nullable=True), - sa.Column('tokens', sa.Integer(), nullable=True), - sa.Column('indexing_latency', sa.Float(), nullable=True), - sa.Column('completed_at', sa.DateTime(), nullable=True), - sa.Column('is_paused', sa.Boolean(), server_default=sa.text('false'), nullable=True), - sa.Column('paused_by', postgresql.UUID(), nullable=True), - sa.Column('paused_at', sa.DateTime(), nullable=True), - sa.Column('error', sa.Text(), nullable=True), - sa.Column('stopped_at', sa.DateTime(), nullable=True), - sa.Column('indexing_status', sa.String(length=255), server_default=sa.text("'waiting'::character varying"), nullable=False), - sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), - sa.Column('disabled_at', sa.DateTime(), nullable=True), - sa.Column('disabled_by', postgresql.UUID(), nullable=True), - sa.Column('archived', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('archived_reason', sa.String(length=255), nullable=True), - sa.Column('archived_by', postgresql.UUID(), nullable=True), - sa.Column('archived_at', sa.DateTime(), nullable=True), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('doc_type', sa.String(length=40), nullable=True), - sa.Column('doc_metadata', sa.JSON(), nullable=True), - sa.PrimaryKeyConstraint('id', name='document_pkey') - ) + if _is_pg(conn): + op.create_table('documents', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('dataset_id', postgresql.UUID(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('data_source_type', sa.String(length=255), nullable=False), + sa.Column('data_source_info', sa.Text(), nullable=True), + sa.Column('dataset_process_rule_id', postgresql.UUID(), nullable=True), + sa.Column('batch', sa.String(length=255), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('created_from', sa.String(length=255), nullable=False), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_api_request_id', postgresql.UUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('processing_started_at', sa.DateTime(), nullable=True), + sa.Column('file_id', sa.Text(), nullable=True), + sa.Column('word_count', sa.Integer(), nullable=True), + sa.Column('parsing_completed_at', sa.DateTime(), nullable=True), + sa.Column('cleaning_completed_at', sa.DateTime(), nullable=True), + sa.Column('splitting_completed_at', sa.DateTime(), nullable=True), + sa.Column('tokens', sa.Integer(), nullable=True), + sa.Column('indexing_latency', sa.Float(), nullable=True), + sa.Column('completed_at', sa.DateTime(), nullable=True), + sa.Column('is_paused', sa.Boolean(), server_default=sa.text('false'), nullable=True), + sa.Column('paused_by', postgresql.UUID(), nullable=True), + sa.Column('paused_at', sa.DateTime(), nullable=True), + sa.Column('error', sa.Text(), nullable=True), + sa.Column('stopped_at', sa.DateTime(), nullable=True), + sa.Column('indexing_status', sa.String(length=255), server_default=sa.text("'waiting'::character varying"), nullable=False), + sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('disabled_at', sa.DateTime(), nullable=True), + sa.Column('disabled_by', postgresql.UUID(), nullable=True), + sa.Column('archived', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('archived_reason', sa.String(length=255), nullable=True), + sa.Column('archived_by', postgresql.UUID(), nullable=True), + sa.Column('archived_at', sa.DateTime(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('doc_type', sa.String(length=40), nullable=True), + sa.Column('doc_metadata', sa.JSON(), nullable=True), + sa.PrimaryKeyConstraint('id', name='document_pkey') + ) + else: + op.create_table('documents', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('data_source_type', sa.String(length=255), nullable=False), + sa.Column('data_source_info', models.types.LongText(), nullable=True), + sa.Column('dataset_process_rule_id', models.types.StringUUID(), nullable=True), + sa.Column('batch', sa.String(length=255), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('created_from', sa.String(length=255), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_api_request_id', models.types.StringUUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('processing_started_at', sa.DateTime(), nullable=True), + sa.Column('file_id', models.types.LongText(), nullable=True), + sa.Column('word_count', sa.Integer(), nullable=True), + sa.Column('parsing_completed_at', sa.DateTime(), nullable=True), + sa.Column('cleaning_completed_at', sa.DateTime(), nullable=True), + sa.Column('splitting_completed_at', sa.DateTime(), nullable=True), + sa.Column('tokens', sa.Integer(), nullable=True), + sa.Column('indexing_latency', sa.Float(), nullable=True), + sa.Column('completed_at', sa.DateTime(), nullable=True), + sa.Column('is_paused', sa.Boolean(), server_default=sa.text('false'), nullable=True), + sa.Column('paused_by', models.types.StringUUID(), nullable=True), + sa.Column('paused_at', sa.DateTime(), nullable=True), + sa.Column('error', models.types.LongText(), nullable=True), + sa.Column('stopped_at', sa.DateTime(), nullable=True), + sa.Column('indexing_status', sa.String(length=255), server_default=sa.text("'waiting'"), nullable=False), + sa.Column('enabled', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('disabled_at', sa.DateTime(), nullable=True), + sa.Column('disabled_by', models.types.StringUUID(), nullable=True), + sa.Column('archived', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('archived_reason', sa.String(length=255), nullable=True), + sa.Column('archived_by', models.types.StringUUID(), nullable=True), + sa.Column('archived_at', sa.DateTime(), nullable=True), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('doc_type', sa.String(length=40), nullable=True), + sa.Column('doc_metadata', sa.JSON(), nullable=True), + sa.PrimaryKeyConstraint('id', name='document_pkey') + ) with op.batch_alter_table('documents', schema=None) as batch_op: batch_op.create_index('document_dataset_id_idx', ['dataset_id'], unique=False) batch_op.create_index('document_is_paused_idx', ['is_paused'], unique=False) - op.create_table('embeddings', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('hash', sa.String(length=64), nullable=False), - sa.Column('embedding', sa.LargeBinary(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='embedding_pkey'), - sa.UniqueConstraint('hash', name='embedding_hash_idx') - ) - op.create_table('end_users', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=True), - sa.Column('type', sa.String(length=255), nullable=False), - sa.Column('external_user_id', sa.String(length=255), nullable=True), - sa.Column('name', sa.String(length=255), nullable=True), - sa.Column('is_anonymous', sa.Boolean(), server_default=sa.text('true'), nullable=False), - sa.Column('session_id', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='end_user_pkey') - ) + if _is_pg(conn): + op.create_table('embeddings', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('hash', sa.String(length=64), nullable=False), + sa.Column('embedding', sa.LargeBinary(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='embedding_pkey'), + sa.UniqueConstraint('hash', name='embedding_hash_idx') + ) + else: + op.create_table('embeddings', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('hash', sa.String(length=64), nullable=False), + sa.Column('embedding', models.types.BinaryData(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='embedding_pkey'), + sa.UniqueConstraint('hash', name='embedding_hash_idx') + ) + if _is_pg(conn): + op.create_table('end_users', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=True), + sa.Column('type', sa.String(length=255), nullable=False), + sa.Column('external_user_id', sa.String(length=255), nullable=True), + sa.Column('name', sa.String(length=255), nullable=True), + sa.Column('is_anonymous', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('session_id', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='end_user_pkey') + ) + else: + op.create_table('end_users', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=True), + sa.Column('type', sa.String(length=255), nullable=False), + sa.Column('external_user_id', sa.String(length=255), nullable=True), + sa.Column('name', sa.String(length=255), nullable=True), + sa.Column('is_anonymous', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('session_id', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='end_user_pkey') + ) with op.batch_alter_table('end_users', schema=None) as batch_op: batch_op.create_index('end_user_session_id_idx', ['session_id', 'type'], unique=False) batch_op.create_index('end_user_tenant_session_id_idx', ['tenant_id', 'session_id', 'type'], unique=False) - op.create_table('installed_apps', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('app_owner_tenant_id', postgresql.UUID(), nullable=False), - sa.Column('position', sa.Integer(), nullable=False), - sa.Column('is_pinned', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('last_used_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='installed_app_pkey'), - sa.UniqueConstraint('tenant_id', 'app_id', name='unique_tenant_app') - ) + if _is_pg(conn): + op.create_table('installed_apps', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('app_owner_tenant_id', postgresql.UUID(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('is_pinned', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('last_used_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='installed_app_pkey'), + sa.UniqueConstraint('tenant_id', 'app_id', name='unique_tenant_app') + ) + else: + op.create_table('installed_apps', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('app_owner_tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('is_pinned', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('last_used_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='installed_app_pkey'), + sa.UniqueConstraint('tenant_id', 'app_id', name='unique_tenant_app') + ) with op.batch_alter_table('installed_apps', schema=None) as batch_op: batch_op.create_index('installed_app_app_id_idx', ['app_id'], unique=False) batch_op.create_index('installed_app_tenant_id_idx', ['tenant_id'], unique=False) - op.create_table('invitation_codes', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('batch', sa.String(length=255), nullable=False), - sa.Column('code', sa.String(length=32), nullable=False), - sa.Column('status', sa.String(length=16), server_default=sa.text("'unused'::character varying"), nullable=False), - sa.Column('used_at', sa.DateTime(), nullable=True), - sa.Column('used_by_tenant_id', postgresql.UUID(), nullable=True), - sa.Column('used_by_account_id', postgresql.UUID(), nullable=True), - sa.Column('deprecated_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='invitation_code_pkey') - ) + if _is_pg(conn): + op.create_table('invitation_codes', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('batch', sa.String(length=255), nullable=False), + sa.Column('code', sa.String(length=32), nullable=False), + sa.Column('status', sa.String(length=16), server_default=sa.text("'unused'::character varying"), nullable=False), + sa.Column('used_at', sa.DateTime(), nullable=True), + sa.Column('used_by_tenant_id', postgresql.UUID(), nullable=True), + sa.Column('used_by_account_id', postgresql.UUID(), nullable=True), + sa.Column('deprecated_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='invitation_code_pkey') + ) + else: + op.create_table('invitation_codes', + sa.Column('id', sa.Integer(), nullable=False, autoincrement=True), + sa.Column('batch', sa.String(length=255), nullable=False), + sa.Column('code', sa.String(length=32), nullable=False), + sa.Column('status', sa.String(length=16), server_default=sa.text("'unused'"), nullable=False), + sa.Column('used_at', sa.DateTime(), nullable=True), + sa.Column('used_by_tenant_id', models.types.StringUUID(), nullable=True), + sa.Column('used_by_account_id', models.types.StringUUID(), nullable=True), + sa.Column('deprecated_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='invitation_code_pkey') + ) with op.batch_alter_table('invitation_codes', schema=None) as batch_op: batch_op.create_index('invitation_codes_batch_idx', ['batch'], unique=False) batch_op.create_index('invitation_codes_code_idx', ['code', 'status'], unique=False) - op.create_table('message_agent_thoughts', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('message_id', postgresql.UUID(), nullable=False), - sa.Column('message_chain_id', postgresql.UUID(), nullable=False), - sa.Column('position', sa.Integer(), nullable=False), - sa.Column('thought', sa.Text(), nullable=True), - sa.Column('tool', sa.Text(), nullable=True), - sa.Column('tool_input', sa.Text(), nullable=True), - sa.Column('observation', sa.Text(), nullable=True), - sa.Column('tool_process_data', sa.Text(), nullable=True), - sa.Column('message', sa.Text(), nullable=True), - sa.Column('message_token', sa.Integer(), nullable=True), - sa.Column('message_unit_price', sa.Numeric(), nullable=True), - sa.Column('answer', sa.Text(), nullable=True), - sa.Column('answer_token', sa.Integer(), nullable=True), - sa.Column('answer_unit_price', sa.Numeric(), nullable=True), - sa.Column('tokens', sa.Integer(), nullable=True), - sa.Column('total_price', sa.Numeric(), nullable=True), - sa.Column('currency', sa.String(), nullable=True), - sa.Column('latency', sa.Float(), nullable=True), - sa.Column('created_by_role', sa.String(), nullable=False), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='message_agent_thought_pkey') - ) + if _is_pg(conn): + op.create_table('message_agent_thoughts', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('message_id', postgresql.UUID(), nullable=False), + sa.Column('message_chain_id', postgresql.UUID(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('thought', sa.Text(), nullable=True), + sa.Column('tool', sa.Text(), nullable=True), + sa.Column('tool_input', sa.Text(), nullable=True), + sa.Column('observation', sa.Text(), nullable=True), + sa.Column('tool_process_data', sa.Text(), nullable=True), + sa.Column('message', sa.Text(), nullable=True), + sa.Column('message_token', sa.Integer(), nullable=True), + sa.Column('message_unit_price', sa.Numeric(), nullable=True), + sa.Column('answer', sa.Text(), nullable=True), + sa.Column('answer_token', sa.Integer(), nullable=True), + sa.Column('answer_unit_price', sa.Numeric(), nullable=True), + sa.Column('tokens', sa.Integer(), nullable=True), + sa.Column('total_price', sa.Numeric(), nullable=True), + sa.Column('currency', sa.String(), nullable=True), + sa.Column('latency', sa.Float(), nullable=True), + sa.Column('created_by_role', sa.String(), nullable=False), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='message_agent_thought_pkey') + ) + else: + op.create_table('message_agent_thoughts', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('message_id', models.types.StringUUID(), nullable=False), + sa.Column('message_chain_id', models.types.StringUUID(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('thought', models.types.LongText(), nullable=True), + sa.Column('tool', models.types.LongText(), nullable=True), + sa.Column('tool_input', models.types.LongText(), nullable=True), + sa.Column('observation', models.types.LongText(), nullable=True), + sa.Column('tool_process_data', models.types.LongText(), nullable=True), + sa.Column('message', models.types.LongText(), nullable=True), + sa.Column('message_token', sa.Integer(), nullable=True), + sa.Column('message_unit_price', sa.Numeric(), nullable=True), + sa.Column('answer', models.types.LongText(), nullable=True), + sa.Column('answer_token', sa.Integer(), nullable=True), + sa.Column('answer_unit_price', sa.Numeric(), nullable=True), + sa.Column('tokens', sa.Integer(), nullable=True), + sa.Column('total_price', sa.Numeric(), nullable=True), + sa.Column('currency', sa.String(length=255), nullable=True), + sa.Column('latency', sa.Float(), nullable=True), + sa.Column('created_by_role', sa.String(length=255), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='message_agent_thought_pkey') + ) with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: batch_op.create_index('message_agent_thought_message_chain_id_idx', ['message_chain_id'], unique=False) batch_op.create_index('message_agent_thought_message_id_idx', ['message_id'], unique=False) - op.create_table('message_chains', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('message_id', postgresql.UUID(), nullable=False), - sa.Column('type', sa.String(length=255), nullable=False), - sa.Column('input', sa.Text(), nullable=True), - sa.Column('output', sa.Text(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='message_chain_pkey') - ) + if _is_pg(conn): + op.create_table('message_chains', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('message_id', postgresql.UUID(), nullable=False), + sa.Column('type', sa.String(length=255), nullable=False), + sa.Column('input', sa.Text(), nullable=True), + sa.Column('output', sa.Text(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='message_chain_pkey') + ) + else: + op.create_table('message_chains', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('message_id', models.types.StringUUID(), nullable=False), + sa.Column('type', sa.String(length=255), nullable=False), + sa.Column('input', models.types.LongText(), nullable=True), + sa.Column('output', models.types.LongText(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='message_chain_pkey') + ) with op.batch_alter_table('message_chains', schema=None) as batch_op: batch_op.create_index('message_chain_message_id_idx', ['message_id'], unique=False) - op.create_table('message_feedbacks', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('conversation_id', postgresql.UUID(), nullable=False), - sa.Column('message_id', postgresql.UUID(), nullable=False), - sa.Column('rating', sa.String(length=255), nullable=False), - sa.Column('content', sa.Text(), nullable=True), - sa.Column('from_source', sa.String(length=255), nullable=False), - sa.Column('from_end_user_id', postgresql.UUID(), nullable=True), - sa.Column('from_account_id', postgresql.UUID(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='message_feedback_pkey') - ) + if _is_pg(conn): + op.create_table('message_feedbacks', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('conversation_id', postgresql.UUID(), nullable=False), + sa.Column('message_id', postgresql.UUID(), nullable=False), + sa.Column('rating', sa.String(length=255), nullable=False), + sa.Column('content', sa.Text(), nullable=True), + sa.Column('from_source', sa.String(length=255), nullable=False), + sa.Column('from_end_user_id', postgresql.UUID(), nullable=True), + sa.Column('from_account_id', postgresql.UUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='message_feedback_pkey') + ) + else: + op.create_table('message_feedbacks', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('conversation_id', models.types.StringUUID(), nullable=False), + sa.Column('message_id', models.types.StringUUID(), nullable=False), + sa.Column('rating', sa.String(length=255), nullable=False), + sa.Column('content', models.types.LongText(), nullable=True), + sa.Column('from_source', sa.String(length=255), nullable=False), + sa.Column('from_end_user_id', models.types.StringUUID(), nullable=True), + sa.Column('from_account_id', models.types.StringUUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='message_feedback_pkey') + ) with op.batch_alter_table('message_feedbacks', schema=None) as batch_op: batch_op.create_index('message_feedback_app_idx', ['app_id'], unique=False) batch_op.create_index('message_feedback_conversation_idx', ['conversation_id', 'from_source', 'rating'], unique=False) batch_op.create_index('message_feedback_message_idx', ['message_id', 'from_source'], unique=False) - op.create_table('operation_logs', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('account_id', postgresql.UUID(), nullable=False), - sa.Column('action', sa.String(length=255), nullable=False), - sa.Column('content', sa.JSON(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('created_ip', sa.String(length=255), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='operation_log_pkey') - ) + if _is_pg(conn): + op.create_table('operation_logs', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('account_id', postgresql.UUID(), nullable=False), + sa.Column('action', sa.String(length=255), nullable=False), + sa.Column('content', sa.JSON(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('created_ip', sa.String(length=255), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='operation_log_pkey') + ) + else: + op.create_table('operation_logs', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('account_id', models.types.StringUUID(), nullable=False), + sa.Column('action', sa.String(length=255), nullable=False), + sa.Column('content', sa.JSON(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('created_ip', sa.String(length=255), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='operation_log_pkey') + ) with op.batch_alter_table('operation_logs', schema=None) as batch_op: batch_op.create_index('operation_log_account_action_idx', ['tenant_id', 'account_id', 'action'], unique=False) - op.create_table('pinned_conversations', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('conversation_id', postgresql.UUID(), nullable=False), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='pinned_conversation_pkey') - ) + if _is_pg(conn): + op.create_table('pinned_conversations', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('conversation_id', postgresql.UUID(), nullable=False), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='pinned_conversation_pkey') + ) + else: + op.create_table('pinned_conversations', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('conversation_id', models.types.StringUUID(), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='pinned_conversation_pkey') + ) with op.batch_alter_table('pinned_conversations', schema=None) as batch_op: batch_op.create_index('pinned_conversation_conversation_idx', ['app_id', 'conversation_id', 'created_by'], unique=False) - op.create_table('providers', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('provider_name', sa.String(length=40), nullable=False), - sa.Column('provider_type', sa.String(length=40), nullable=False, server_default=sa.text("'custom'::character varying")), - sa.Column('encrypted_config', sa.Text(), nullable=True), - sa.Column('is_valid', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('last_used', sa.DateTime(), nullable=True), - sa.Column('quota_type', sa.String(length=40), nullable=True, server_default=sa.text("''::character varying")), - sa.Column('quota_limit', sa.Integer(), nullable=True), - sa.Column('quota_used', sa.Integer(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='provider_pkey'), - sa.UniqueConstraint('tenant_id', 'provider_name', 'provider_type', 'quota_type', name='unique_provider_name_type_quota') - ) + if _is_pg(conn): + op.create_table('providers', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('provider_name', sa.String(length=40), nullable=False), + sa.Column('provider_type', sa.String(length=40), nullable=False, server_default=sa.text("'custom'::character varying")), + sa.Column('encrypted_config', sa.Text(), nullable=True), + sa.Column('is_valid', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('last_used', sa.DateTime(), nullable=True), + sa.Column('quota_type', sa.String(length=40), nullable=True, server_default=sa.text("''::character varying")), + sa.Column('quota_limit', sa.Integer(), nullable=True), + sa.Column('quota_used', sa.Integer(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='provider_pkey'), + sa.UniqueConstraint('tenant_id', 'provider_name', 'provider_type', 'quota_type', name='unique_provider_name_type_quota') + ) + else: + op.create_table('providers', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=40), nullable=False), + sa.Column('provider_type', sa.String(length=40), nullable=False, server_default=sa.text("'custom'")), + sa.Column('encrypted_config', models.types.LongText(), nullable=True), + sa.Column('is_valid', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('last_used', sa.DateTime(), nullable=True), + sa.Column('quota_type', sa.String(length=40), nullable=True, server_default=sa.text("''")), + sa.Column('quota_limit', sa.Integer(), nullable=True), + sa.Column('quota_used', sa.Integer(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='provider_pkey'), + sa.UniqueConstraint('tenant_id', 'provider_name', 'provider_type', 'quota_type', name='unique_provider_name_type_quota') + ) with op.batch_alter_table('providers', schema=None) as batch_op: batch_op.create_index('provider_tenant_id_provider_idx', ['tenant_id', 'provider_name'], unique=False) - op.create_table('recommended_apps', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('description', sa.JSON(), nullable=False), - sa.Column('copyright', sa.String(length=255), nullable=False), - sa.Column('privacy_policy', sa.String(length=255), nullable=False), - sa.Column('category', sa.String(length=255), nullable=False), - sa.Column('position', sa.Integer(), nullable=False), - sa.Column('is_listed', sa.Boolean(), nullable=False), - sa.Column('install_count', sa.Integer(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='recommended_app_pkey') - ) + if _is_pg(conn): + op.create_table('recommended_apps', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('description', sa.JSON(), nullable=False), + sa.Column('copyright', sa.String(length=255), nullable=False), + sa.Column('privacy_policy', sa.String(length=255), nullable=False), + sa.Column('category', sa.String(length=255), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('is_listed', sa.Boolean(), nullable=False), + sa.Column('install_count', sa.Integer(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='recommended_app_pkey') + ) + else: + op.create_table('recommended_apps', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('description', sa.JSON(), nullable=False), + sa.Column('copyright', sa.String(length=255), nullable=False), + sa.Column('privacy_policy', sa.String(length=255), nullable=False), + sa.Column('category', sa.String(length=255), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('is_listed', sa.Boolean(), nullable=False), + sa.Column('install_count', sa.Integer(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='recommended_app_pkey') + ) with op.batch_alter_table('recommended_apps', schema=None) as batch_op: batch_op.create_index('recommended_app_app_id_idx', ['app_id'], unique=False) batch_op.create_index('recommended_app_is_listed_idx', ['is_listed'], unique=False) - op.create_table('saved_messages', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('message_id', postgresql.UUID(), nullable=False), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='saved_message_pkey') - ) + if _is_pg(conn): + op.create_table('saved_messages', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('message_id', postgresql.UUID(), nullable=False), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='saved_message_pkey') + ) + else: + op.create_table('saved_messages', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('message_id', models.types.StringUUID(), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='saved_message_pkey') + ) with op.batch_alter_table('saved_messages', schema=None) as batch_op: batch_op.create_index('saved_message_message_idx', ['app_id', 'message_id', 'created_by'], unique=False) - op.create_table('sessions', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('session_id', sa.String(length=255), nullable=True), - sa.Column('data', sa.LargeBinary(), nullable=True), - sa.Column('expiry', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('session_id') - ) - op.create_table('sites', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('title', sa.String(length=255), nullable=False), - sa.Column('icon', sa.String(length=255), nullable=True), - sa.Column('icon_background', sa.String(length=255), nullable=True), - sa.Column('description', sa.String(length=255), nullable=True), - sa.Column('default_language', sa.String(length=255), nullable=False), - sa.Column('copyright', sa.String(length=255), nullable=True), - sa.Column('privacy_policy', sa.String(length=255), nullable=True), - sa.Column('customize_domain', sa.String(length=255), nullable=True), - sa.Column('customize_token_strategy', sa.String(length=255), nullable=False), - sa.Column('prompt_public', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'::character varying"), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('code', sa.String(length=255), nullable=True), - sa.PrimaryKeyConstraint('id', name='site_pkey') - ) + if _is_pg(conn): + op.create_table('sessions', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('session_id', sa.String(length=255), nullable=True), + sa.Column('data', sa.LargeBinary(), nullable=True), + sa.Column('expiry', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('session_id') + ) + else: + op.create_table('sessions', + sa.Column('id', sa.Integer(), nullable=False, autoincrement=True), + sa.Column('session_id', sa.String(length=255), nullable=True), + sa.Column('data', models.types.BinaryData(), nullable=True), + sa.Column('expiry', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('session_id') + ) + if _is_pg(conn): + op.create_table('sites', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('title', sa.String(length=255), nullable=False), + sa.Column('icon', sa.String(length=255), nullable=True), + sa.Column('icon_background', sa.String(length=255), nullable=True), + sa.Column('description', sa.String(length=255), nullable=True), + sa.Column('default_language', sa.String(length=255), nullable=False), + sa.Column('copyright', sa.String(length=255), nullable=True), + sa.Column('privacy_policy', sa.String(length=255), nullable=True), + sa.Column('customize_domain', sa.String(length=255), nullable=True), + sa.Column('customize_token_strategy', sa.String(length=255), nullable=False), + sa.Column('prompt_public', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'::character varying"), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('code', sa.String(length=255), nullable=True), + sa.PrimaryKeyConstraint('id', name='site_pkey') + ) + else: + op.create_table('sites', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('title', sa.String(length=255), nullable=False), + sa.Column('icon', sa.String(length=255), nullable=True), + sa.Column('icon_background', sa.String(length=255), nullable=True), + sa.Column('description', sa.String(length=255), nullable=True), + sa.Column('default_language', sa.String(length=255), nullable=False), + sa.Column('copyright', sa.String(length=255), nullable=True), + sa.Column('privacy_policy', sa.String(length=255), nullable=True), + sa.Column('customize_domain', sa.String(length=255), nullable=True), + sa.Column('customize_token_strategy', sa.String(length=255), nullable=False), + sa.Column('prompt_public', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'"), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('code', sa.String(length=255), nullable=True), + sa.PrimaryKeyConstraint('id', name='site_pkey') + ) with op.batch_alter_table('sites', schema=None) as batch_op: batch_op.create_index('site_app_id_idx', ['app_id'], unique=False) batch_op.create_index('site_code_idx', ['code', 'status'], unique=False) - op.create_table('tenant_account_joins', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('account_id', postgresql.UUID(), nullable=False), - sa.Column('role', sa.String(length=16), server_default='normal', nullable=False), - sa.Column('invited_by', postgresql.UUID(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tenant_account_join_pkey'), - sa.UniqueConstraint('tenant_id', 'account_id', name='unique_tenant_account_join') - ) + if _is_pg(conn): + op.create_table('tenant_account_joins', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('account_id', postgresql.UUID(), nullable=False), + sa.Column('role', sa.String(length=16), server_default='normal', nullable=False), + sa.Column('invited_by', postgresql.UUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tenant_account_join_pkey'), + sa.UniqueConstraint('tenant_id', 'account_id', name='unique_tenant_account_join') + ) + else: + op.create_table('tenant_account_joins', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('account_id', models.types.StringUUID(), nullable=False), + sa.Column('role', sa.String(length=16), server_default='normal', nullable=False), + sa.Column('invited_by', models.types.StringUUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tenant_account_join_pkey'), + sa.UniqueConstraint('tenant_id', 'account_id', name='unique_tenant_account_join') + ) with op.batch_alter_table('tenant_account_joins', schema=None) as batch_op: batch_op.create_index('tenant_account_join_account_id_idx', ['account_id'], unique=False) batch_op.create_index('tenant_account_join_tenant_id_idx', ['tenant_id'], unique=False) - op.create_table('tenants', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('encrypt_public_key', sa.Text(), nullable=True), - sa.Column('plan', sa.String(length=255), server_default=sa.text("'basic'::character varying"), nullable=False), - sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'::character varying"), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tenant_pkey') - ) - op.create_table('upload_files', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('storage_type', sa.String(length=255), nullable=False), - sa.Column('key', sa.String(length=255), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('size', sa.Integer(), nullable=False), - sa.Column('extension', sa.String(length=255), nullable=False), - sa.Column('mime_type', sa.String(length=255), nullable=True), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('used', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('used_by', postgresql.UUID(), nullable=True), - sa.Column('used_at', sa.DateTime(), nullable=True), - sa.Column('hash', sa.String(length=255), nullable=True), - sa.PrimaryKeyConstraint('id', name='upload_file_pkey') - ) + if _is_pg(conn): + op.create_table('tenants', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('encrypt_public_key', sa.Text(), nullable=True), + sa.Column('plan', sa.String(length=255), server_default=sa.text("'basic'::character varying"), nullable=False), + sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'::character varying"), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tenant_pkey') + ) + else: + op.create_table('tenants', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('encrypt_public_key', models.types.LongText(), nullable=True), + sa.Column('plan', sa.String(length=255), server_default=sa.text("'basic'"), nullable=False), + sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'"), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tenant_pkey') + ) + if _is_pg(conn): + op.create_table('upload_files', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('storage_type', sa.String(length=255), nullable=False), + sa.Column('key', sa.String(length=255), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('size', sa.Integer(), nullable=False), + sa.Column('extension', sa.String(length=255), nullable=False), + sa.Column('mime_type', sa.String(length=255), nullable=True), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('used', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('used_by', postgresql.UUID(), nullable=True), + sa.Column('used_at', sa.DateTime(), nullable=True), + sa.Column('hash', sa.String(length=255), nullable=True), + sa.PrimaryKeyConstraint('id', name='upload_file_pkey') + ) + else: + op.create_table('upload_files', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('storage_type', sa.String(length=255), nullable=False), + sa.Column('key', sa.String(length=255), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('size', sa.Integer(), nullable=False), + sa.Column('extension', sa.String(length=255), nullable=False), + sa.Column('mime_type', sa.String(length=255), nullable=True), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('used', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('used_by', models.types.StringUUID(), nullable=True), + sa.Column('used_at', sa.DateTime(), nullable=True), + sa.Column('hash', sa.String(length=255), nullable=True), + sa.PrimaryKeyConstraint('id', name='upload_file_pkey') + ) with op.batch_alter_table('upload_files', schema=None) as batch_op: batch_op.create_index('upload_file_tenant_idx', ['tenant_id'], unique=False) - op.create_table('message_annotations', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('conversation_id', postgresql.UUID(), nullable=False), - sa.Column('message_id', postgresql.UUID(), nullable=False), - sa.Column('content', sa.Text(), nullable=False), - sa.Column('account_id', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='message_annotation_pkey') - ) + if _is_pg(conn): + op.create_table('message_annotations', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('conversation_id', postgresql.UUID(), nullable=False), + sa.Column('message_id', postgresql.UUID(), nullable=False), + sa.Column('content', sa.Text(), nullable=False), + sa.Column('account_id', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='message_annotation_pkey') + ) + else: + op.create_table('message_annotations', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('conversation_id', models.types.StringUUID(), nullable=False), + sa.Column('message_id', models.types.StringUUID(), nullable=False), + sa.Column('content', models.types.LongText(), nullable=False), + sa.Column('account_id', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='message_annotation_pkey') + ) with op.batch_alter_table('message_annotations', schema=None) as batch_op: batch_op.create_index('message_annotation_app_idx', ['app_id'], unique=False) batch_op.create_index('message_annotation_conversation_idx', ['conversation_id'], unique=False) batch_op.create_index('message_annotation_message_idx', ['message_id'], unique=False) - op.create_table('messages', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('model_provider', sa.String(length=255), nullable=False), - sa.Column('model_id', sa.String(length=255), nullable=False), - sa.Column('override_model_configs', sa.Text(), nullable=True), - sa.Column('conversation_id', postgresql.UUID(), nullable=False), - sa.Column('inputs', sa.JSON(), nullable=True), - sa.Column('query', sa.Text(), nullable=False), - sa.Column('message', sa.JSON(), nullable=False), - sa.Column('message_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), - sa.Column('message_unit_price', sa.Numeric(precision=10, scale=4), nullable=False), - sa.Column('answer', sa.Text(), nullable=False), - sa.Column('answer_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), - sa.Column('answer_unit_price', sa.Numeric(precision=10, scale=4), nullable=False), - sa.Column('provider_response_latency', sa.Float(), server_default=sa.text('0'), nullable=False), - sa.Column('total_price', sa.Numeric(precision=10, scale=7), nullable=True), - sa.Column('currency', sa.String(length=255), nullable=False), - sa.Column('from_source', sa.String(length=255), nullable=False), - sa.Column('from_end_user_id', postgresql.UUID(), nullable=True), - sa.Column('from_account_id', postgresql.UUID(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('agent_based', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.PrimaryKeyConstraint('id', name='message_pkey') - ) + if _is_pg(conn): + op.create_table('messages', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('model_provider', sa.String(length=255), nullable=False), + sa.Column('model_id', sa.String(length=255), nullable=False), + sa.Column('override_model_configs', sa.Text(), nullable=True), + sa.Column('conversation_id', postgresql.UUID(), nullable=False), + sa.Column('inputs', sa.JSON(), nullable=True), + sa.Column('query', sa.Text(), nullable=False), + sa.Column('message', sa.JSON(), nullable=False), + sa.Column('message_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), + sa.Column('message_unit_price', sa.Numeric(precision=10, scale=4), nullable=False), + sa.Column('answer', sa.Text(), nullable=False), + sa.Column('answer_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), + sa.Column('answer_unit_price', sa.Numeric(precision=10, scale=4), nullable=False), + sa.Column('provider_response_latency', sa.Float(), server_default=sa.text('0'), nullable=False), + sa.Column('total_price', sa.Numeric(precision=10, scale=7), nullable=True), + sa.Column('currency', sa.String(length=255), nullable=False), + sa.Column('from_source', sa.String(length=255), nullable=False), + sa.Column('from_end_user_id', postgresql.UUID(), nullable=True), + sa.Column('from_account_id', postgresql.UUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('agent_based', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.PrimaryKeyConstraint('id', name='message_pkey') + ) + else: + op.create_table('messages', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('model_provider', sa.String(length=255), nullable=False), + sa.Column('model_id', sa.String(length=255), nullable=False), + sa.Column('override_model_configs', models.types.LongText(), nullable=True), + sa.Column('conversation_id', models.types.StringUUID(), nullable=False), + sa.Column('inputs', sa.JSON(), nullable=True), + sa.Column('query', models.types.LongText(), nullable=False), + sa.Column('message', sa.JSON(), nullable=False), + sa.Column('message_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), + sa.Column('message_unit_price', sa.Numeric(precision=10, scale=4), nullable=False), + sa.Column('answer', models.types.LongText(), nullable=False), + sa.Column('answer_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), + sa.Column('answer_unit_price', sa.Numeric(precision=10, scale=4), nullable=False), + sa.Column('provider_response_latency', sa.Float(), server_default=sa.text('0'), nullable=False), + sa.Column('total_price', sa.Numeric(precision=10, scale=7), nullable=True), + sa.Column('currency', sa.String(length=255), nullable=False), + sa.Column('from_source', sa.String(length=255), nullable=False), + sa.Column('from_end_user_id', models.types.StringUUID(), nullable=True), + sa.Column('from_account_id', models.types.StringUUID(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('agent_based', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.PrimaryKeyConstraint('id', name='message_pkey') + ) with op.batch_alter_table('messages', schema=None) as batch_op: batch_op.create_index('message_account_idx', ['app_id', 'from_source', 'from_account_id'], unique=False) batch_op.create_index('message_app_id_idx', ['app_id', 'created_at'], unique=False) @@ -764,8 +1359,12 @@ def downgrade(): op.drop_table('celery_tasksetmeta') op.drop_table('celery_taskmeta') - op.execute('DROP SEQUENCE taskset_id_sequence;') - op.execute('DROP SEQUENCE task_id_sequence;') + conn = op.get_bind() + if _is_pg(conn): + op.execute('DROP SEQUENCE taskset_id_sequence;') + op.execute('DROP SEQUENCE task_id_sequence;') + else: + pass with op.batch_alter_table('apps', schema=None) as batch_op: batch_op.drop_index('app_tenant_id_idx') @@ -793,5 +1392,9 @@ def downgrade(): op.drop_table('accounts') op.drop_table('account_integrates') - op.execute('DROP EXTENSION IF EXISTS "uuid-ossp";') + conn = op.get_bind() + if _is_pg(conn): + op.execute('DROP EXTENSION IF EXISTS "uuid-ossp";') + else: + pass # ### end Alembic commands ### diff --git a/api/migrations/versions/6dcb43972bdc_add_dataset_retriever_resource.py b/api/migrations/versions/6dcb43972bdc_add_dataset_retriever_resource.py index da27dd4426..78fed540bc 100644 --- a/api/migrations/versions/6dcb43972bdc_add_dataset_retriever_resource.py +++ b/api/migrations/versions/6dcb43972bdc_add_dataset_retriever_resource.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '6dcb43972bdc' down_revision = '4bcffcd64aa4' @@ -18,27 +24,53 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('dataset_retriever_resources', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('message_id', postgresql.UUID(), nullable=False), - sa.Column('position', sa.Integer(), nullable=False), - sa.Column('dataset_id', postgresql.UUID(), nullable=False), - sa.Column('dataset_name', sa.Text(), nullable=False), - sa.Column('document_id', postgresql.UUID(), nullable=False), - sa.Column('document_name', sa.Text(), nullable=False), - sa.Column('data_source_type', sa.Text(), nullable=False), - sa.Column('segment_id', postgresql.UUID(), nullable=False), - sa.Column('score', sa.Float(), nullable=True), - sa.Column('content', sa.Text(), nullable=False), - sa.Column('hit_count', sa.Integer(), nullable=True), - sa.Column('word_count', sa.Integer(), nullable=True), - sa.Column('segment_position', sa.Integer(), nullable=True), - sa.Column('index_node_hash', sa.Text(), nullable=True), - sa.Column('retriever_from', sa.Text(), nullable=False), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), - sa.PrimaryKeyConstraint('id', name='dataset_retriever_resource_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('dataset_retriever_resources', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('message_id', postgresql.UUID(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('dataset_id', postgresql.UUID(), nullable=False), + sa.Column('dataset_name', sa.Text(), nullable=False), + sa.Column('document_id', postgresql.UUID(), nullable=False), + sa.Column('document_name', sa.Text(), nullable=False), + sa.Column('data_source_type', sa.Text(), nullable=False), + sa.Column('segment_id', postgresql.UUID(), nullable=False), + sa.Column('score', sa.Float(), nullable=True), + sa.Column('content', sa.Text(), nullable=False), + sa.Column('hit_count', sa.Integer(), nullable=True), + sa.Column('word_count', sa.Integer(), nullable=True), + sa.Column('segment_position', sa.Integer(), nullable=True), + sa.Column('index_node_hash', sa.Text(), nullable=True), + sa.Column('retriever_from', sa.Text(), nullable=False), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_retriever_resource_pkey') + ) + else: + op.create_table('dataset_retriever_resources', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('message_id', models.types.StringUUID(), nullable=False), + sa.Column('position', sa.Integer(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_name', models.types.LongText(), nullable=False), + sa.Column('document_id', models.types.StringUUID(), nullable=False), + sa.Column('document_name', models.types.LongText(), nullable=False), + sa.Column('data_source_type', models.types.LongText(), nullable=False), + sa.Column('segment_id', models.types.StringUUID(), nullable=False), + sa.Column('score', sa.Float(), nullable=True), + sa.Column('content', models.types.LongText(), nullable=False), + sa.Column('hit_count', sa.Integer(), nullable=True), + sa.Column('word_count', sa.Integer(), nullable=True), + sa.Column('segment_position', sa.Integer(), nullable=True), + sa.Column('index_node_hash', models.types.LongText(), nullable=True), + sa.Column('retriever_from', models.types.LongText(), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_retriever_resource_pkey') + ) + with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: batch_op.create_index('dataset_retriever_resource_message_id_idx', ['message_id'], unique=False) diff --git a/api/migrations/versions/6e2cfb077b04_add_dataset_collection_binding.py b/api/migrations/versions/6e2cfb077b04_add_dataset_collection_binding.py index 4fa322f693..1ace8ea5a0 100644 --- a/api/migrations/versions/6e2cfb077b04_add_dataset_collection_binding.py +++ b/api/migrations/versions/6e2cfb077b04_add_dataset_collection_binding.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '6e2cfb077b04' down_revision = '77e83833755c' @@ -18,19 +24,36 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('dataset_collection_bindings', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('provider_name', sa.String(length=40), nullable=False), - sa.Column('model_name', sa.String(length=40), nullable=False), - sa.Column('collection_name', sa.String(length=64), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='dataset_collection_bindings_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('dataset_collection_bindings', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('provider_name', sa.String(length=40), nullable=False), + sa.Column('model_name', sa.String(length=40), nullable=False), + sa.Column('collection_name', sa.String(length=64), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_collection_bindings_pkey') + ) + else: + op.create_table('dataset_collection_bindings', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=40), nullable=False), + sa.Column('model_name', sa.String(length=40), nullable=False), + sa.Column('collection_name', sa.String(length=64), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_collection_bindings_pkey') + ) + with op.batch_alter_table('dataset_collection_bindings', schema=None) as batch_op: batch_op.create_index('provider_model_name_idx', ['provider_name', 'model_name'], unique=False) - with op.batch_alter_table('datasets', schema=None) as batch_op: - batch_op.add_column(sa.Column('collection_binding_id', postgresql.UUID(), nullable=True)) + if _is_pg(conn): + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.add_column(sa.Column('collection_binding_id', postgresql.UUID(), nullable=True)) + else: + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.add_column(sa.Column('collection_binding_id', models.types.StringUUID(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/714aafe25d39_add_anntation_history_match_response.py b/api/migrations/versions/714aafe25d39_add_anntation_history_match_response.py index 498b46e3c4..457338ef42 100644 --- a/api/migrations/versions/714aafe25d39_add_anntation_history_match_response.py +++ b/api/migrations/versions/714aafe25d39_add_anntation_history_match_response.py @@ -8,6 +8,12 @@ Create Date: 2023-12-14 06:38:02.972527 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '714aafe25d39' down_revision = 'f2a6fc85e260' @@ -17,9 +23,16 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: - batch_op.add_column(sa.Column('annotation_question', sa.Text(), nullable=False)) - batch_op.add_column(sa.Column('annotation_content', sa.Text(), nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: + batch_op.add_column(sa.Column('annotation_question', sa.Text(), nullable=False)) + batch_op.add_column(sa.Column('annotation_content', sa.Text(), nullable=False)) + else: + with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: + batch_op.add_column(sa.Column('annotation_question', models.types.LongText(), nullable=False)) + batch_op.add_column(sa.Column('annotation_content', models.types.LongText(), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/77e83833755c_add_app_config_retriever_resource.py b/api/migrations/versions/77e83833755c_add_app_config_retriever_resource.py index c5d8c3d88d..7bcd1a1be3 100644 --- a/api/migrations/versions/77e83833755c_add_app_config_retriever_resource.py +++ b/api/migrations/versions/77e83833755c_add_app_config_retriever_resource.py @@ -8,6 +8,12 @@ Create Date: 2023-09-06 17:26:40.311927 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '77e83833755c' down_revision = '6dcb43972bdc' @@ -17,8 +23,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('retriever_resource', sa.Text(), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('retriever_resource', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('retriever_resource', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/7b45942e39bb_add_api_key_auth_binding.py b/api/migrations/versions/7b45942e39bb_add_api_key_auth_binding.py index 2ba0e13caa..f1932fe76c 100644 --- a/api/migrations/versions/7b45942e39bb_add_api_key_auth_binding.py +++ b/api/migrations/versions/7b45942e39bb_add_api_key_auth_binding.py @@ -10,6 +10,10 @@ from alembic import op import models.types + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '7b45942e39bb' down_revision = '4e99a8df00ff' @@ -19,44 +23,75 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('data_source_api_key_auth_bindings', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('category', sa.String(length=255), nullable=False), - sa.Column('provider', sa.String(length=255), nullable=False), - sa.Column('credentials', sa.Text(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('disabled', sa.Boolean(), server_default=sa.text('false'), nullable=True), - sa.PrimaryKeyConstraint('id', name='data_source_api_key_auth_binding_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + # PostgreSQL: Keep original syntax + op.create_table('data_source_api_key_auth_bindings', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('category', sa.String(length=255), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('credentials', sa.Text(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('disabled', sa.Boolean(), server_default=sa.text('false'), nullable=True), + sa.PrimaryKeyConstraint('id', name='data_source_api_key_auth_binding_pkey') + ) + else: + # MySQL: Use compatible syntax + op.create_table('data_source_api_key_auth_bindings', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('category', sa.String(length=255), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('credentials', models.types.LongText(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('disabled', sa.Boolean(), server_default=sa.text('false'), nullable=True), + sa.PrimaryKeyConstraint('id', name='data_source_api_key_auth_binding_pkey') + ) + with op.batch_alter_table('data_source_api_key_auth_bindings', schema=None) as batch_op: batch_op.create_index('data_source_api_key_auth_binding_provider_idx', ['provider'], unique=False) batch_op.create_index('data_source_api_key_auth_binding_tenant_id_idx', ['tenant_id'], unique=False) with op.batch_alter_table('data_source_bindings', schema=None) as batch_op: batch_op.drop_index('source_binding_tenant_id_idx') - batch_op.drop_index('source_info_idx') + if _is_pg(conn): + batch_op.drop_index('source_info_idx', postgresql_using='gin') + else: + pass op.rename_table('data_source_bindings', 'data_source_oauth_bindings') with op.batch_alter_table('data_source_oauth_bindings', schema=None) as batch_op: batch_op.create_index('source_binding_tenant_id_idx', ['tenant_id'], unique=False) - batch_op.create_index('source_info_idx', ['source_info'], unique=False, postgresql_using='gin') + if _is_pg(conn): + batch_op.create_index('source_info_idx', ['source_info'], unique=False, postgresql_using='gin') + else: + pass # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() with op.batch_alter_table('data_source_oauth_bindings', schema=None) as batch_op: - batch_op.drop_index('source_info_idx', postgresql_using='gin') + if _is_pg(conn): + batch_op.drop_index('source_info_idx', postgresql_using='gin') + else: + pass batch_op.drop_index('source_binding_tenant_id_idx') op.rename_table('data_source_oauth_bindings', 'data_source_bindings') with op.batch_alter_table('data_source_bindings', schema=None) as batch_op: - batch_op.create_index('source_info_idx', ['source_info'], unique=False) + if _is_pg(conn): + batch_op.create_index('source_info_idx', ['source_info'], unique=False, postgresql_using='gin') + else: + pass batch_op.create_index('source_binding_tenant_id_idx', ['tenant_id'], unique=False) with op.batch_alter_table('data_source_api_key_auth_bindings', schema=None) as batch_op: diff --git a/api/migrations/versions/7bdef072e63a_add_workflow_tool.py b/api/migrations/versions/7bdef072e63a_add_workflow_tool.py index f09a682f28..a0f4522cb3 100644 --- a/api/migrations/versions/7bdef072e63a_add_workflow_tool.py +++ b/api/migrations/versions/7bdef072e63a_add_workflow_tool.py @@ -10,6 +10,10 @@ from alembic import op import models.types + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '7bdef072e63a' down_revision = '5fda94355fce' @@ -19,21 +23,42 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tool_workflow_providers', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('name', sa.String(length=40), nullable=False), - sa.Column('icon', sa.String(length=255), nullable=False), - sa.Column('app_id', models.types.StringUUID(), nullable=False), - sa.Column('user_id', models.types.StringUUID(), nullable=False), - sa.Column('tenant_id', models.types.StringUUID(), nullable=False), - sa.Column('description', sa.Text(), nullable=False), - sa.Column('parameter_configuration', sa.Text(), server_default='[]', nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tool_workflow_provider_pkey'), - sa.UniqueConstraint('name', 'tenant_id', name='unique_workflow_tool_provider'), - sa.UniqueConstraint('tenant_id', 'app_id', name='unique_workflow_tool_provider_app_id') - ) + conn = op.get_bind() + + if _is_pg(conn): + # PostgreSQL: Keep original syntax + op.create_table('tool_workflow_providers', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('name', sa.String(length=40), nullable=False), + sa.Column('icon', sa.String(length=255), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('user_id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('description', sa.Text(), nullable=False), + sa.Column('parameter_configuration', sa.Text(), server_default='[]', nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_workflow_provider_pkey'), + sa.UniqueConstraint('name', 'tenant_id', name='unique_workflow_tool_provider'), + sa.UniqueConstraint('tenant_id', 'app_id', name='unique_workflow_tool_provider_app_id') + ) + else: + # MySQL: Use compatible syntax + op.create_table('tool_workflow_providers', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=40), nullable=False), + sa.Column('icon', sa.String(length=255), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('user_id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('description', models.types.LongText(), nullable=False), + sa.Column('parameter_configuration', models.types.LongText(), default='[]', nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_workflow_provider_pkey'), + sa.UniqueConstraint('name', 'tenant_id', name='unique_workflow_tool_provider'), + sa.UniqueConstraint('tenant_id', 'app_id', name='unique_workflow_tool_provider_app_id') + ) # ### end Alembic commands ### diff --git a/api/migrations/versions/7ce5a52e4eee_add_tool_providers.py b/api/migrations/versions/7ce5a52e4eee_add_tool_providers.py index 881ffec61d..3c0aa082d5 100644 --- a/api/migrations/versions/7ce5a52e4eee_add_tool_providers.py +++ b/api/migrations/versions/7ce5a52e4eee_add_tool_providers.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '7ce5a52e4eee' down_revision = '2beac44e5f5f' @@ -18,19 +24,40 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tool_providers', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('tool_name', sa.String(length=40), nullable=False), - sa.Column('encrypted_credentials', sa.Text(), nullable=True), - sa.Column('is_enabled', sa.Boolean(), server_default=sa.text('false'), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tool_provider_pkey'), - sa.UniqueConstraint('tenant_id', 'tool_name', name='unique_tool_provider_tool_name') - ) - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('sensitive_word_avoidance', sa.Text(), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + # PostgreSQL: Keep original syntax + op.create_table('tool_providers', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('tool_name', sa.String(length=40), nullable=False), + sa.Column('encrypted_credentials', sa.Text(), nullable=True), + sa.Column('is_enabled', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_provider_pkey'), + sa.UniqueConstraint('tenant_id', 'tool_name', name='unique_tool_provider_tool_name') + ) + else: + # MySQL: Use compatible syntax + op.create_table('tool_providers', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('tool_name', sa.String(length=40), nullable=False), + sa.Column('encrypted_credentials', models.types.LongText(), nullable=True), + sa.Column('is_enabled', sa.Boolean(), server_default=sa.text('false'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_provider_pkey'), + sa.UniqueConstraint('tenant_id', 'tool_name', name='unique_tool_provider_tool_name') + ) + if _is_pg(conn): + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('sensitive_word_avoidance', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('sensitive_word_avoidance', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/7e6a8693e07a_add_table_dataset_permissions.py b/api/migrations/versions/7e6a8693e07a_add_table_dataset_permissions.py index 865572f3a7..f8883d51ff 100644 --- a/api/migrations/versions/7e6a8693e07a_add_table_dataset_permissions.py +++ b/api/migrations/versions/7e6a8693e07a_add_table_dataset_permissions.py @@ -10,6 +10,10 @@ from alembic import op import models.types + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '7e6a8693e07a' down_revision = 'b2602e131636' @@ -19,14 +23,27 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('dataset_permissions', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('dataset_id', models.types.StringUUID(), nullable=False), - sa.Column('account_id', models.types.StringUUID(), nullable=False), - sa.Column('has_permission', sa.Boolean(), server_default=sa.text('true'), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='dataset_permission_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('dataset_permissions', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('account_id', models.types.StringUUID(), nullable=False), + sa.Column('has_permission', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_permission_pkey') + ) + else: + op.create_table('dataset_permissions', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('dataset_id', models.types.StringUUID(), nullable=False), + sa.Column('account_id', models.types.StringUUID(), nullable=False), + sa.Column('has_permission', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='dataset_permission_pkey') + ) + with op.batch_alter_table('dataset_permissions', schema=None) as batch_op: batch_op.create_index('idx_dataset_permissions_account_id', ['account_id'], unique=False) batch_op.create_index('idx_dataset_permissions_dataset_id', ['dataset_id'], unique=False) diff --git a/api/migrations/versions/88072f0caa04_add_custom_config_in_tenant.py b/api/migrations/versions/88072f0caa04_add_custom_config_in_tenant.py index f7625bff8c..beea90b384 100644 --- a/api/migrations/versions/88072f0caa04_add_custom_config_in_tenant.py +++ b/api/migrations/versions/88072f0caa04_add_custom_config_in_tenant.py @@ -8,6 +8,12 @@ Create Date: 2023-12-14 07:36:50.705362 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '88072f0caa04' down_revision = '246ba09cbbdb' @@ -17,8 +23,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('tenants', schema=None) as batch_op: - batch_op.add_column(sa.Column('custom_config', sa.Text(), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('tenants', schema=None) as batch_op: + batch_op.add_column(sa.Column('custom_config', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('tenants', schema=None) as batch_op: + batch_op.add_column(sa.Column('custom_config', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/89c7899ca936_.py b/api/migrations/versions/89c7899ca936_.py index 0fad39fa57..2420710e74 100644 --- a/api/migrations/versions/89c7899ca936_.py +++ b/api/migrations/versions/89c7899ca936_.py @@ -8,6 +8,12 @@ Create Date: 2024-01-21 04:10:23.192853 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '89c7899ca936' down_revision = '187385f442fc' @@ -17,21 +23,39 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('sites', schema=None) as batch_op: - batch_op.alter_column('description', - existing_type=sa.VARCHAR(length=255), - type_=sa.Text(), - existing_nullable=True) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('sites', schema=None) as batch_op: + batch_op.alter_column('description', + existing_type=sa.VARCHAR(length=255), + type_=sa.Text(), + existing_nullable=True) + else: + with op.batch_alter_table('sites', schema=None) as batch_op: + batch_op.alter_column('description', + existing_type=sa.VARCHAR(length=255), + type_=models.types.LongText(), + existing_nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('sites', schema=None) as batch_op: - batch_op.alter_column('description', - existing_type=sa.Text(), - type_=sa.VARCHAR(length=255), - existing_nullable=True) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('sites', schema=None) as batch_op: + batch_op.alter_column('description', + existing_type=sa.Text(), + type_=sa.VARCHAR(length=255), + existing_nullable=True) + else: + with op.batch_alter_table('sites', schema=None) as batch_op: + batch_op.alter_column('description', + existing_type=models.types.LongText(), + type_=sa.VARCHAR(length=255), + existing_nullable=True) # ### end Alembic commands ### diff --git a/api/migrations/versions/8d2d099ceb74_add_qa_model_support.py b/api/migrations/versions/8d2d099ceb74_add_qa_model_support.py index 849103b071..14e9cde727 100644 --- a/api/migrations/versions/8d2d099ceb74_add_qa_model_support.py +++ b/api/migrations/versions/8d2d099ceb74_add_qa_model_support.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '8d2d099ceb74' down_revision = '7ce5a52e4eee' @@ -18,13 +24,24 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('document_segments', schema=None) as batch_op: - batch_op.add_column(sa.Column('answer', sa.Text(), nullable=True)) - batch_op.add_column(sa.Column('updated_by', postgresql.UUID(), nullable=True)) - batch_op.add_column(sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('document_segments', schema=None) as batch_op: + batch_op.add_column(sa.Column('answer', sa.Text(), nullable=True)) + batch_op.add_column(sa.Column('updated_by', postgresql.UUID(), nullable=True)) + batch_op.add_column(sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False)) - with op.batch_alter_table('documents', schema=None) as batch_op: - batch_op.add_column(sa.Column('doc_form', sa.String(length=255), server_default=sa.text("'text_model'::character varying"), nullable=False)) + with op.batch_alter_table('documents', schema=None) as batch_op: + batch_op.add_column(sa.Column('doc_form', sa.String(length=255), server_default=sa.text("'text_model'::character varying"), nullable=False)) + else: + with op.batch_alter_table('document_segments', schema=None) as batch_op: + batch_op.add_column(sa.Column('answer', models.types.LongText(), nullable=True)) + batch_op.add_column(sa.Column('updated_by', models.types.StringUUID(), nullable=True)) + batch_op.add_column(sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False)) + + with op.batch_alter_table('documents', schema=None) as batch_op: + batch_op.add_column(sa.Column('doc_form', sa.String(length=255), server_default=sa.text("'text_model'"), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/8e5588e6412e_add_environment_variable_to_workflow_.py b/api/migrations/versions/8e5588e6412e_add_environment_variable_to_workflow_.py index ec2336da4d..f550f79b8e 100644 --- a/api/migrations/versions/8e5588e6412e_add_environment_variable_to_workflow_.py +++ b/api/migrations/versions/8e5588e6412e_add_environment_variable_to_workflow_.py @@ -10,6 +10,10 @@ from alembic import op import models as models + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '8e5588e6412e' down_revision = '6e957a32015b' @@ -19,8 +23,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('workflows', schema=None) as batch_op: - batch_op.add_column(sa.Column('environment_variables', sa.Text(), server_default='{}', nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.add_column(sa.Column('environment_variables', sa.Text(), server_default='{}', nullable=False)) + else: + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.add_column(sa.Column('environment_variables', models.types.LongText(), default='{}', nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/8ec536f3c800_rename_api_provider_credentails.py b/api/migrations/versions/8ec536f3c800_rename_api_provider_credentails.py index 6cafc198aa..111e81240b 100644 --- a/api/migrations/versions/8ec536f3c800_rename_api_provider_credentails.py +++ b/api/migrations/versions/8ec536f3c800_rename_api_provider_credentails.py @@ -8,6 +8,12 @@ Create Date: 2024-01-07 03:57:35.257545 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '8ec536f3c800' down_revision = 'ad472b61a054' @@ -17,8 +23,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('credentials_str', sa.Text(), nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('credentials_str', sa.Text(), nullable=False)) + else: + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('credentials_str', models.types.LongText(), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/8fe468ba0ca5_add_gpt4v_supports.py b/api/migrations/versions/8fe468ba0ca5_add_gpt4v_supports.py index 01d5631510..1c1c6cacbb 100644 --- a/api/migrations/versions/8fe468ba0ca5_add_gpt4v_supports.py +++ b/api/migrations/versions/8fe468ba0ca5_add_gpt4v_supports.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '8fe468ba0ca5' down_revision = 'a9836e3baeee' @@ -18,27 +24,52 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('message_files', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('message_id', postgresql.UUID(), nullable=False), - sa.Column('type', sa.String(length=255), nullable=False), - sa.Column('transfer_method', sa.String(length=255), nullable=False), - sa.Column('url', sa.Text(), nullable=True), - sa.Column('upload_file_id', postgresql.UUID(), nullable=True), - sa.Column('created_by_role', sa.String(length=255), nullable=False), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='message_file_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('message_files', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('message_id', postgresql.UUID(), nullable=False), + sa.Column('type', sa.String(length=255), nullable=False), + sa.Column('transfer_method', sa.String(length=255), nullable=False), + sa.Column('url', sa.Text(), nullable=True), + sa.Column('upload_file_id', postgresql.UUID(), nullable=True), + sa.Column('created_by_role', sa.String(length=255), nullable=False), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='message_file_pkey') + ) + else: + op.create_table('message_files', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('message_id', models.types.StringUUID(), nullable=False), + sa.Column('type', sa.String(length=255), nullable=False), + sa.Column('transfer_method', sa.String(length=255), nullable=False), + sa.Column('url', models.types.LongText(), nullable=True), + sa.Column('upload_file_id', models.types.StringUUID(), nullable=True), + sa.Column('created_by_role', sa.String(length=255), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='message_file_pkey') + ) + with op.batch_alter_table('message_files', schema=None) as batch_op: batch_op.create_index('message_file_created_by_idx', ['created_by'], unique=False) batch_op.create_index('message_file_message_idx', ['message_id'], unique=False) - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('file_upload', sa.Text(), nullable=True)) + if _is_pg(conn): + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('file_upload', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('file_upload', models.types.LongText(), nullable=True)) - with op.batch_alter_table('upload_files', schema=None) as batch_op: - batch_op.add_column(sa.Column('created_by_role', sa.String(length=255), server_default=sa.text("'account'::character varying"), nullable=False)) + if _is_pg(conn): + with op.batch_alter_table('upload_files', schema=None) as batch_op: + batch_op.add_column(sa.Column('created_by_role', sa.String(length=255), server_default=sa.text("'account'::character varying"), nullable=False)) + else: + with op.batch_alter_table('upload_files', schema=None) as batch_op: + batch_op.add_column(sa.Column('created_by_role', sa.String(length=255), server_default=sa.text("'account'"), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/968fff4c0ab9_add_api_based_extension.py b/api/migrations/versions/968fff4c0ab9_add_api_based_extension.py index 207a9c841f..c0ea28fe50 100644 --- a/api/migrations/versions/968fff4c0ab9_add_api_based_extension.py +++ b/api/migrations/versions/968fff4c0ab9_add_api_based_extension.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '968fff4c0ab9' down_revision = 'b3a09c049e8e' @@ -18,16 +24,28 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - - op.create_table('api_based_extensions', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('name', sa.String(length=255), nullable=False), - sa.Column('api_endpoint', sa.String(length=255), nullable=False), - sa.Column('api_key', sa.Text(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='api_based_extension_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('api_based_extensions', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('api_endpoint', sa.String(length=255), nullable=False), + sa.Column('api_key', sa.Text(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='api_based_extension_pkey') + ) + else: + op.create_table('api_based_extensions', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.Column('api_endpoint', sa.String(length=255), nullable=False), + sa.Column('api_key', models.types.LongText(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='api_based_extension_pkey') + ) with op.batch_alter_table('api_based_extensions', schema=None) as batch_op: batch_op.create_index('api_based_extension_tenant_idx', ['tenant_id'], unique=False) diff --git a/api/migrations/versions/9f4e3427ea84_add_created_by_role.py b/api/migrations/versions/9f4e3427ea84_add_created_by_role.py index c7a98b4ac6..5d29d354f3 100644 --- a/api/migrations/versions/9f4e3427ea84_add_created_by_role.py +++ b/api/migrations/versions/9f4e3427ea84_add_created_by_role.py @@ -8,6 +8,10 @@ Create Date: 2023-05-17 17:29:01.060435 import sqlalchemy as sa from alembic import op + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = '9f4e3427ea84' down_revision = '64b051264f32' @@ -17,15 +21,30 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('pinned_conversations', schema=None) as batch_op: - batch_op.add_column(sa.Column('created_by_role', sa.String(length=255), server_default=sa.text("'end_user'::character varying"), nullable=False)) - batch_op.drop_index('pinned_conversation_conversation_idx') - batch_op.create_index('pinned_conversation_conversation_idx', ['app_id', 'conversation_id', 'created_by_role', 'created_by'], unique=False) + conn = op.get_bind() + + if _is_pg(conn): + # PostgreSQL: Keep original syntax + with op.batch_alter_table('pinned_conversations', schema=None) as batch_op: + batch_op.add_column(sa.Column('created_by_role', sa.String(length=255), server_default=sa.text("'end_user'::character varying"), nullable=False)) + batch_op.drop_index('pinned_conversation_conversation_idx') + batch_op.create_index('pinned_conversation_conversation_idx', ['app_id', 'conversation_id', 'created_by_role', 'created_by'], unique=False) - with op.batch_alter_table('saved_messages', schema=None) as batch_op: - batch_op.add_column(sa.Column('created_by_role', sa.String(length=255), server_default=sa.text("'end_user'::character varying"), nullable=False)) - batch_op.drop_index('saved_message_message_idx') - batch_op.create_index('saved_message_message_idx', ['app_id', 'message_id', 'created_by_role', 'created_by'], unique=False) + with op.batch_alter_table('saved_messages', schema=None) as batch_op: + batch_op.add_column(sa.Column('created_by_role', sa.String(length=255), server_default=sa.text("'end_user'::character varying"), nullable=False)) + batch_op.drop_index('saved_message_message_idx') + batch_op.create_index('saved_message_message_idx', ['app_id', 'message_id', 'created_by_role', 'created_by'], unique=False) + else: + # MySQL: Use compatible syntax + with op.batch_alter_table('pinned_conversations', schema=None) as batch_op: + batch_op.add_column(sa.Column('created_by_role', sa.String(length=255), server_default=sa.text("'end_user'"), nullable=False)) + batch_op.drop_index('pinned_conversation_conversation_idx') + batch_op.create_index('pinned_conversation_conversation_idx', ['app_id', 'conversation_id', 'created_by_role', 'created_by'], unique=False) + + with op.batch_alter_table('saved_messages', schema=None) as batch_op: + batch_op.add_column(sa.Column('created_by_role', sa.String(length=255), server_default=sa.text("'end_user'"), nullable=False)) + batch_op.drop_index('saved_message_message_idx') + batch_op.create_index('saved_message_message_idx', ['app_id', 'message_id', 'created_by_role', 'created_by'], unique=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/a45f4dfde53b_add_language_to_recommend_apps.py b/api/migrations/versions/a45f4dfde53b_add_language_to_recommend_apps.py index 3014978110..7e1e328317 100644 --- a/api/migrations/versions/a45f4dfde53b_add_language_to_recommend_apps.py +++ b/api/migrations/versions/a45f4dfde53b_add_language_to_recommend_apps.py @@ -8,6 +8,10 @@ Create Date: 2023-05-25 17:50:32.052335 import sqlalchemy as sa from alembic import op + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'a45f4dfde53b' down_revision = '9f4e3427ea84' @@ -17,10 +21,18 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('recommended_apps', schema=None) as batch_op: - batch_op.add_column(sa.Column('language', sa.String(length=255), server_default=sa.text("'en-US'::character varying"), nullable=False)) - batch_op.drop_index('recommended_app_is_listed_idx') - batch_op.create_index('recommended_app_is_listed_idx', ['is_listed', 'language'], unique=False) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('recommended_apps', schema=None) as batch_op: + batch_op.add_column(sa.Column('language', sa.String(length=255), server_default=sa.text("'en-US'::character varying"), nullable=False)) + batch_op.drop_index('recommended_app_is_listed_idx') + batch_op.create_index('recommended_app_is_listed_idx', ['is_listed', 'language'], unique=False) + else: + with op.batch_alter_table('recommended_apps', schema=None) as batch_op: + batch_op.add_column(sa.Column('language', sa.String(length=255), server_default=sa.text("'en-US'"), nullable=False)) + batch_op.drop_index('recommended_app_is_listed_idx') + batch_op.create_index('recommended_app_is_listed_idx', ['is_listed', 'language'], unique=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/a5b56fb053ef_app_config_add_speech_to_text.py b/api/migrations/versions/a5b56fb053ef_app_config_add_speech_to_text.py index acb6812434..616cb2f163 100644 --- a/api/migrations/versions/a5b56fb053ef_app_config_add_speech_to_text.py +++ b/api/migrations/versions/a5b56fb053ef_app_config_add_speech_to_text.py @@ -8,6 +8,12 @@ Create Date: 2023-07-06 17:55:20.894149 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'a5b56fb053ef' down_revision = 'd3d503a3471c' @@ -17,8 +23,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('speech_to_text', sa.Text(), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('speech_to_text', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('speech_to_text', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/a8d7385a7b66_add_embeddings_provider_name.py b/api/migrations/versions/a8d7385a7b66_add_embeddings_provider_name.py index 1ee01381d8..77311061b0 100644 --- a/api/migrations/versions/a8d7385a7b66_add_embeddings_provider_name.py +++ b/api/migrations/versions/a8d7385a7b66_add_embeddings_provider_name.py @@ -8,6 +8,10 @@ Create Date: 2024-04-02 12:17:22.641525 import sqlalchemy as sa from alembic import op + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'a8d7385a7b66' down_revision = '17b5ab037c40' @@ -17,10 +21,18 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('embeddings', schema=None) as batch_op: - batch_op.add_column(sa.Column('provider_name', sa.String(length=40), server_default=sa.text("''::character varying"), nullable=False)) - batch_op.drop_constraint('embedding_hash_idx', type_='unique') - batch_op.create_unique_constraint('embedding_hash_idx', ['model_name', 'hash', 'provider_name']) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.add_column(sa.Column('provider_name', sa.String(length=40), server_default=sa.text("''::character varying"), nullable=False)) + batch_op.drop_constraint('embedding_hash_idx', type_='unique') + batch_op.create_unique_constraint('embedding_hash_idx', ['model_name', 'hash', 'provider_name']) + else: + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.add_column(sa.Column('provider_name', sa.String(length=40), server_default=sa.text("''"), nullable=False)) + batch_op.drop_constraint('embedding_hash_idx', type_='unique') + batch_op.create_unique_constraint('embedding_hash_idx', ['model_name', 'hash', 'provider_name']) # ### end Alembic commands ### diff --git a/api/migrations/versions/a9836e3baeee_add_external_data_tools_in_app_model_.py b/api/migrations/versions/a9836e3baeee_add_external_data_tools_in_app_model_.py index 5dcb630aed..900ff78036 100644 --- a/api/migrations/versions/a9836e3baeee_add_external_data_tools_in_app_model_.py +++ b/api/migrations/versions/a9836e3baeee_add_external_data_tools_in_app_model_.py @@ -8,6 +8,12 @@ Create Date: 2023-11-02 04:04:57.609485 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'a9836e3baeee' down_revision = '968fff4c0ab9' @@ -17,8 +23,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('external_data_tools', sa.Text(), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('external_data_tools', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('external_data_tools', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/b24be59fbb04_.py b/api/migrations/versions/b24be59fbb04_.py index 29ba859f2b..b0a6d10d8c 100644 --- a/api/migrations/versions/b24be59fbb04_.py +++ b/api/migrations/versions/b24be59fbb04_.py @@ -8,6 +8,12 @@ Create Date: 2024-01-17 01:31:12.670556 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'b24be59fbb04' down_revision = 'de95f5c77138' @@ -17,8 +23,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('text_to_speech', sa.Text(), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('text_to_speech', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('text_to_speech', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/b289e2408ee2_add_workflow.py b/api/migrations/versions/b289e2408ee2_add_workflow.py index 966f86c05f..ea50930eed 100644 --- a/api/migrations/versions/b289e2408ee2_add_workflow.py +++ b/api/migrations/versions/b289e2408ee2_add_workflow.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'b289e2408ee2' down_revision = 'a8d7385a7b66' @@ -18,98 +24,190 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('workflow_app_logs', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('workflow_id', postgresql.UUID(), nullable=False), - sa.Column('workflow_run_id', postgresql.UUID(), nullable=False), - sa.Column('created_from', sa.String(length=255), nullable=False), - sa.Column('created_by_role', sa.String(length=255), nullable=False), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='workflow_app_log_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('workflow_app_logs', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('workflow_id', postgresql.UUID(), nullable=False), + sa.Column('workflow_run_id', postgresql.UUID(), nullable=False), + sa.Column('created_from', sa.String(length=255), nullable=False), + sa.Column('created_by_role', sa.String(length=255), nullable=False), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='workflow_app_log_pkey') + ) + else: + op.create_table('workflow_app_logs', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('workflow_id', models.types.StringUUID(), nullable=False), + sa.Column('workflow_run_id', models.types.StringUUID(), nullable=False), + sa.Column('created_from', sa.String(length=255), nullable=False), + sa.Column('created_by_role', sa.String(length=255), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='workflow_app_log_pkey') + ) with op.batch_alter_table('workflow_app_logs', schema=None) as batch_op: batch_op.create_index('workflow_app_log_app_idx', ['tenant_id', 'app_id'], unique=False) - op.create_table('workflow_node_executions', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('workflow_id', postgresql.UUID(), nullable=False), - sa.Column('triggered_from', sa.String(length=255), nullable=False), - sa.Column('workflow_run_id', postgresql.UUID(), nullable=True), - sa.Column('index', sa.Integer(), nullable=False), - sa.Column('predecessor_node_id', sa.String(length=255), nullable=True), - sa.Column('node_id', sa.String(length=255), nullable=False), - sa.Column('node_type', sa.String(length=255), nullable=False), - sa.Column('title', sa.String(length=255), nullable=False), - sa.Column('inputs', sa.Text(), nullable=True), - sa.Column('process_data', sa.Text(), nullable=True), - sa.Column('outputs', sa.Text(), nullable=True), - sa.Column('status', sa.String(length=255), nullable=False), - sa.Column('error', sa.Text(), nullable=True), - sa.Column('elapsed_time', sa.Float(), server_default=sa.text('0'), nullable=False), - sa.Column('execution_metadata', sa.Text(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('created_by_role', sa.String(length=255), nullable=False), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('finished_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id', name='workflow_node_execution_pkey') - ) + if _is_pg(conn): + op.create_table('workflow_node_executions', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('workflow_id', postgresql.UUID(), nullable=False), + sa.Column('triggered_from', sa.String(length=255), nullable=False), + sa.Column('workflow_run_id', postgresql.UUID(), nullable=True), + sa.Column('index', sa.Integer(), nullable=False), + sa.Column('predecessor_node_id', sa.String(length=255), nullable=True), + sa.Column('node_id', sa.String(length=255), nullable=False), + sa.Column('node_type', sa.String(length=255), nullable=False), + sa.Column('title', sa.String(length=255), nullable=False), + sa.Column('inputs', sa.Text(), nullable=True), + sa.Column('process_data', sa.Text(), nullable=True), + sa.Column('outputs', sa.Text(), nullable=True), + sa.Column('status', sa.String(length=255), nullable=False), + sa.Column('error', sa.Text(), nullable=True), + sa.Column('elapsed_time', sa.Float(), server_default=sa.text('0'), nullable=False), + sa.Column('execution_metadata', sa.Text(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('created_by_role', sa.String(length=255), nullable=False), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('finished_at', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id', name='workflow_node_execution_pkey') + ) + else: + op.create_table('workflow_node_executions', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('workflow_id', models.types.StringUUID(), nullable=False), + sa.Column('triggered_from', sa.String(length=255), nullable=False), + sa.Column('workflow_run_id', models.types.StringUUID(), nullable=True), + sa.Column('index', sa.Integer(), nullable=False), + sa.Column('predecessor_node_id', sa.String(length=255), nullable=True), + sa.Column('node_id', sa.String(length=255), nullable=False), + sa.Column('node_type', sa.String(length=255), nullable=False), + sa.Column('title', sa.String(length=255), nullable=False), + sa.Column('inputs', models.types.LongText(), nullable=True), + sa.Column('process_data', models.types.LongText(), nullable=True), + sa.Column('outputs', models.types.LongText(), nullable=True), + sa.Column('status', sa.String(length=255), nullable=False), + sa.Column('error', models.types.LongText(), nullable=True), + sa.Column('elapsed_time', sa.Float(), server_default=sa.text('0'), nullable=False), + sa.Column('execution_metadata', models.types.LongText(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('created_by_role', sa.String(length=255), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('finished_at', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id', name='workflow_node_execution_pkey') + ) with op.batch_alter_table('workflow_node_executions', schema=None) as batch_op: batch_op.create_index('workflow_node_execution_node_run_idx', ['tenant_id', 'app_id', 'workflow_id', 'triggered_from', 'node_id'], unique=False) batch_op.create_index('workflow_node_execution_workflow_run_idx', ['tenant_id', 'app_id', 'workflow_id', 'triggered_from', 'workflow_run_id'], unique=False) - op.create_table('workflow_runs', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('sequence_number', sa.Integer(), nullable=False), - sa.Column('workflow_id', postgresql.UUID(), nullable=False), - sa.Column('type', sa.String(length=255), nullable=False), - sa.Column('triggered_from', sa.String(length=255), nullable=False), - sa.Column('version', sa.String(length=255), nullable=False), - sa.Column('graph', sa.Text(), nullable=True), - sa.Column('inputs', sa.Text(), nullable=True), - sa.Column('status', sa.String(length=255), nullable=False), - sa.Column('outputs', sa.Text(), nullable=True), - sa.Column('error', sa.Text(), nullable=True), - sa.Column('elapsed_time', sa.Float(), server_default=sa.text('0'), nullable=False), - sa.Column('total_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), - sa.Column('total_steps', sa.Integer(), server_default=sa.text('0'), nullable=True), - sa.Column('created_by_role', sa.String(length=255), nullable=False), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('finished_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id', name='workflow_run_pkey') - ) + if _is_pg(conn): + op.create_table('workflow_runs', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('sequence_number', sa.Integer(), nullable=False), + sa.Column('workflow_id', postgresql.UUID(), nullable=False), + sa.Column('type', sa.String(length=255), nullable=False), + sa.Column('triggered_from', sa.String(length=255), nullable=False), + sa.Column('version', sa.String(length=255), nullable=False), + sa.Column('graph', sa.Text(), nullable=True), + sa.Column('inputs', sa.Text(), nullable=True), + sa.Column('status', sa.String(length=255), nullable=False), + sa.Column('outputs', sa.Text(), nullable=True), + sa.Column('error', sa.Text(), nullable=True), + sa.Column('elapsed_time', sa.Float(), server_default=sa.text('0'), nullable=False), + sa.Column('total_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), + sa.Column('total_steps', sa.Integer(), server_default=sa.text('0'), nullable=True), + sa.Column('created_by_role', sa.String(length=255), nullable=False), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('finished_at', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id', name='workflow_run_pkey') + ) + else: + op.create_table('workflow_runs', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('sequence_number', sa.Integer(), nullable=False), + sa.Column('workflow_id', models.types.StringUUID(), nullable=False), + sa.Column('type', sa.String(length=255), nullable=False), + sa.Column('triggered_from', sa.String(length=255), nullable=False), + sa.Column('version', sa.String(length=255), nullable=False), + sa.Column('graph', models.types.LongText(), nullable=True), + sa.Column('inputs', models.types.LongText(), nullable=True), + sa.Column('status', sa.String(length=255), nullable=False), + sa.Column('outputs', models.types.LongText(), nullable=True), + sa.Column('error', models.types.LongText(), nullable=True), + sa.Column('elapsed_time', sa.Float(), server_default=sa.text('0'), nullable=False), + sa.Column('total_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), + sa.Column('total_steps', sa.Integer(), server_default=sa.text('0'), nullable=True), + sa.Column('created_by_role', sa.String(length=255), nullable=False), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('finished_at', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id', name='workflow_run_pkey') + ) with op.batch_alter_table('workflow_runs', schema=None) as batch_op: batch_op.create_index('workflow_run_triggerd_from_idx', ['tenant_id', 'app_id', 'triggered_from'], unique=False) - op.create_table('workflows', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('type', sa.String(length=255), nullable=False), - sa.Column('version', sa.String(length=255), nullable=False), - sa.Column('graph', sa.Text(), nullable=True), - sa.Column('features', sa.Text(), nullable=True), - sa.Column('created_by', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_by', postgresql.UUID(), nullable=True), - sa.Column('updated_at', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id', name='workflow_pkey') - ) + if _is_pg(conn): + op.create_table('workflows', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('type', sa.String(length=255), nullable=False), + sa.Column('version', sa.String(length=255), nullable=False), + sa.Column('graph', sa.Text(), nullable=True), + sa.Column('features', sa.Text(), nullable=True), + sa.Column('created_by', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_by', postgresql.UUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id', name='workflow_pkey') + ) + else: + op.create_table('workflows', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('type', sa.String(length=255), nullable=False), + sa.Column('version', sa.String(length=255), nullable=False), + sa.Column('graph', models.types.LongText(), nullable=True), + sa.Column('features', models.types.LongText(), nullable=True), + sa.Column('created_by', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_by', models.types.StringUUID(), nullable=True), + sa.Column('updated_at', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id', name='workflow_pkey') + ) + with op.batch_alter_table('workflows', schema=None) as batch_op: batch_op.create_index('workflow_version_idx', ['tenant_id', 'app_id', 'version'], unique=False) - with op.batch_alter_table('apps', schema=None) as batch_op: - batch_op.add_column(sa.Column('workflow_id', postgresql.UUID(), nullable=True)) + if _is_pg(conn): + with op.batch_alter_table('apps', schema=None) as batch_op: + batch_op.add_column(sa.Column('workflow_id', postgresql.UUID(), nullable=True)) - with op.batch_alter_table('messages', schema=None) as batch_op: - batch_op.add_column(sa.Column('workflow_run_id', postgresql.UUID(), nullable=True)) + with op.batch_alter_table('messages', schema=None) as batch_op: + batch_op.add_column(sa.Column('workflow_run_id', postgresql.UUID(), nullable=True)) + else: + with op.batch_alter_table('apps', schema=None) as batch_op: + batch_op.add_column(sa.Column('workflow_id', models.types.StringUUID(), nullable=True)) + + with op.batch_alter_table('messages', schema=None) as batch_op: + batch_op.add_column(sa.Column('workflow_run_id', models.types.StringUUID(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/b3a09c049e8e_add_advanced_prompt_templates.py b/api/migrations/versions/b3a09c049e8e_add_advanced_prompt_templates.py index 5682eff030..772395c25b 100644 --- a/api/migrations/versions/b3a09c049e8e_add_advanced_prompt_templates.py +++ b/api/migrations/versions/b3a09c049e8e_add_advanced_prompt_templates.py @@ -8,6 +8,12 @@ Create Date: 2023-10-10 15:23:23.395420 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'b3a09c049e8e' down_revision = '2e9819ca5b28' @@ -17,11 +23,20 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('prompt_type', sa.String(length=255), nullable=False, server_default='simple')) - batch_op.add_column(sa.Column('chat_prompt_config', sa.Text(), nullable=True)) - batch_op.add_column(sa.Column('completion_prompt_config', sa.Text(), nullable=True)) - batch_op.add_column(sa.Column('dataset_configs', sa.Text(), nullable=True)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('prompt_type', sa.String(length=255), nullable=False, server_default='simple')) + batch_op.add_column(sa.Column('chat_prompt_config', sa.Text(), nullable=True)) + batch_op.add_column(sa.Column('completion_prompt_config', sa.Text(), nullable=True)) + batch_op.add_column(sa.Column('dataset_configs', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('prompt_type', sa.String(length=255), nullable=False, server_default='simple')) + batch_op.add_column(sa.Column('chat_prompt_config', models.types.LongText(), nullable=True)) + batch_op.add_column(sa.Column('completion_prompt_config', models.types.LongText(), nullable=True)) + batch_op.add_column(sa.Column('dataset_configs', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/bf0aec5ba2cf_add_provider_order.py b/api/migrations/versions/bf0aec5ba2cf_add_provider_order.py index dfa1517462..32736f41ca 100644 --- a/api/migrations/versions/bf0aec5ba2cf_add_provider_order.py +++ b/api/migrations/versions/bf0aec5ba2cf_add_provider_order.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'bf0aec5ba2cf' down_revision = 'e35ed59becda' @@ -18,25 +24,48 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('provider_orders', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('provider_name', sa.String(length=40), nullable=False), - sa.Column('account_id', postgresql.UUID(), nullable=False), - sa.Column('payment_product_id', sa.String(length=191), nullable=False), - sa.Column('payment_id', sa.String(length=191), nullable=True), - sa.Column('transaction_id', sa.String(length=191), nullable=True), - sa.Column('quantity', sa.Integer(), server_default=sa.text('1'), nullable=False), - sa.Column('currency', sa.String(length=40), nullable=True), - sa.Column('total_amount', sa.Integer(), nullable=True), - sa.Column('payment_status', sa.String(length=40), server_default=sa.text("'wait_pay'::character varying"), nullable=False), - sa.Column('paid_at', sa.DateTime(), nullable=True), - sa.Column('pay_failed_at', sa.DateTime(), nullable=True), - sa.Column('refunded_at', sa.DateTime(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='provider_order_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('provider_orders', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('provider_name', sa.String(length=40), nullable=False), + sa.Column('account_id', postgresql.UUID(), nullable=False), + sa.Column('payment_product_id', sa.String(length=191), nullable=False), + sa.Column('payment_id', sa.String(length=191), nullable=True), + sa.Column('transaction_id', sa.String(length=191), nullable=True), + sa.Column('quantity', sa.Integer(), server_default=sa.text('1'), nullable=False), + sa.Column('currency', sa.String(length=40), nullable=True), + sa.Column('total_amount', sa.Integer(), nullable=True), + sa.Column('payment_status', sa.String(length=40), server_default=sa.text("'wait_pay'::character varying"), nullable=False), + sa.Column('paid_at', sa.DateTime(), nullable=True), + sa.Column('pay_failed_at', sa.DateTime(), nullable=True), + sa.Column('refunded_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='provider_order_pkey') + ) + else: + op.create_table('provider_orders', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider_name', sa.String(length=40), nullable=False), + sa.Column('account_id', models.types.StringUUID(), nullable=False), + sa.Column('payment_product_id', sa.String(length=191), nullable=False), + sa.Column('payment_id', sa.String(length=191), nullable=True), + sa.Column('transaction_id', sa.String(length=191), nullable=True), + sa.Column('quantity', sa.Integer(), server_default=sa.text('1'), nullable=False), + sa.Column('currency', sa.String(length=40), nullable=True), + sa.Column('total_amount', sa.Integer(), nullable=True), + sa.Column('payment_status', sa.String(length=40), server_default=sa.text("'wait_pay'"), nullable=False), + sa.Column('paid_at', sa.DateTime(), nullable=True), + sa.Column('pay_failed_at', sa.DateTime(), nullable=True), + sa.Column('refunded_at', sa.DateTime(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='provider_order_pkey') + ) with op.batch_alter_table('provider_orders', schema=None) as batch_op: batch_op.create_index('provider_order_tenant_provider_idx', ['tenant_id', 'provider_name'], unique=False) diff --git a/api/migrations/versions/c031d46af369_remove_app_model_config_trace_config_.py b/api/migrations/versions/c031d46af369_remove_app_model_config_trace_config_.py index f87819c367..76be794ff4 100644 --- a/api/migrations/versions/c031d46af369_remove_app_model_config_trace_config_.py +++ b/api/migrations/versions/c031d46af369_remove_app_model_config_trace_config_.py @@ -11,6 +11,10 @@ from sqlalchemy.dialects import postgresql import models.types + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'c031d46af369' down_revision = '04c602f5dc9b' @@ -20,16 +24,30 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('trace_app_config', - sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', models.types.StringUUID(), nullable=False), - sa.Column('tracing_provider', sa.String(length=255), nullable=True), - sa.Column('tracing_config', sa.JSON(), nullable=True), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), - sa.Column('is_active', sa.Boolean(), server_default=sa.text('true'), nullable=False), - sa.PrimaryKeyConstraint('id', name='trace_app_config_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('trace_app_config', + sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('tracing_provider', sa.String(length=255), nullable=True), + sa.Column('tracing_config', sa.JSON(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False), + sa.Column('is_active', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.PrimaryKeyConstraint('id', name='trace_app_config_pkey') + ) + else: + op.create_table('trace_app_config', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('tracing_provider', sa.String(length=255), nullable=True), + sa.Column('tracing_config', sa.JSON(), nullable=True), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.now(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.now(), nullable=False), + sa.Column('is_active', sa.Boolean(), server_default=sa.text('true'), nullable=False), + sa.PrimaryKeyConstraint('id', name='trace_app_config_pkey') + ) with op.batch_alter_table('trace_app_config', schema=None) as batch_op: batch_op.create_index('trace_app_config_app_id_idx', ['app_id'], unique=False) diff --git a/api/migrations/versions/c3311b089690_add_tool_meta.py b/api/migrations/versions/c3311b089690_add_tool_meta.py index e075535b0d..79f80f5553 100644 --- a/api/migrations/versions/c3311b089690_add_tool_meta.py +++ b/api/migrations/versions/c3311b089690_add_tool_meta.py @@ -8,6 +8,12 @@ Create Date: 2024-03-28 11:50:45.364875 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'c3311b089690' down_revision = 'e2eacc9a1b63' @@ -17,8 +23,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: - batch_op.add_column(sa.Column('tool_meta_str', sa.Text(), server_default=sa.text("'{}'::text"), nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.add_column(sa.Column('tool_meta_str', sa.Text(), server_default=sa.text("'{}'::text"), nullable=False)) + else: + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.add_column(sa.Column('tool_meta_str', models.types.LongText(), default=sa.text("'{}'"), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/c71211c8f604_add_tool_invoke_model_log.py b/api/migrations/versions/c71211c8f604_add_tool_invoke_model_log.py index 95fb8f5d0e..e3e818d2a7 100644 --- a/api/migrations/versions/c71211c8f604_add_tool_invoke_model_log.py +++ b/api/migrations/versions/c71211c8f604_add_tool_invoke_model_log.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'c71211c8f604' down_revision = 'f25003750af4' @@ -18,28 +24,54 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('tool_model_invokes', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('user_id', postgresql.UUID(), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('provider', sa.String(length=40), nullable=False), - sa.Column('tool_type', sa.String(length=40), nullable=False), - sa.Column('tool_name', sa.String(length=40), nullable=False), - sa.Column('tool_id', postgresql.UUID(), nullable=False), - sa.Column('model_parameters', sa.Text(), nullable=False), - sa.Column('prompt_messages', sa.Text(), nullable=False), - sa.Column('model_response', sa.Text(), nullable=False), - sa.Column('prompt_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), - sa.Column('answer_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), - sa.Column('answer_unit_price', sa.Numeric(precision=10, scale=4), nullable=False), - sa.Column('answer_price_unit', sa.Numeric(precision=10, scale=7), server_default=sa.text('0.001'), nullable=False), - sa.Column('provider_response_latency', sa.Float(), server_default=sa.text('0'), nullable=False), - sa.Column('total_price', sa.Numeric(precision=10, scale=7), nullable=True), - sa.Column('currency', sa.String(length=255), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='tool_model_invoke_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('tool_model_invokes', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('user_id', postgresql.UUID(), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('provider', sa.String(length=40), nullable=False), + sa.Column('tool_type', sa.String(length=40), nullable=False), + sa.Column('tool_name', sa.String(length=40), nullable=False), + sa.Column('tool_id', postgresql.UUID(), nullable=False), + sa.Column('model_parameters', sa.Text(), nullable=False), + sa.Column('prompt_messages', sa.Text(), nullable=False), + sa.Column('model_response', sa.Text(), nullable=False), + sa.Column('prompt_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), + sa.Column('answer_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), + sa.Column('answer_unit_price', sa.Numeric(precision=10, scale=4), nullable=False), + sa.Column('answer_price_unit', sa.Numeric(precision=10, scale=7), server_default=sa.text('0.001'), nullable=False), + sa.Column('provider_response_latency', sa.Float(), server_default=sa.text('0'), nullable=False), + sa.Column('total_price', sa.Numeric(precision=10, scale=7), nullable=True), + sa.Column('currency', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_model_invoke_pkey') + ) + else: + op.create_table('tool_model_invokes', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('user_id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('provider', sa.String(length=40), nullable=False), + sa.Column('tool_type', sa.String(length=40), nullable=False), + sa.Column('tool_name', sa.String(length=40), nullable=False), + sa.Column('tool_id', models.types.StringUUID(), nullable=False), + sa.Column('model_parameters', models.types.LongText(), nullable=False), + sa.Column('prompt_messages', models.types.LongText(), nullable=False), + sa.Column('model_response', models.types.LongText(), nullable=False), + sa.Column('prompt_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), + sa.Column('answer_tokens', sa.Integer(), server_default=sa.text('0'), nullable=False), + sa.Column('answer_unit_price', sa.Numeric(precision=10, scale=4), nullable=False), + sa.Column('answer_price_unit', sa.Numeric(precision=10, scale=7), server_default=sa.text('0.001'), nullable=False), + sa.Column('provider_response_latency', sa.Float(), server_default=sa.text('0'), nullable=False), + sa.Column('total_price', sa.Numeric(precision=10, scale=7), nullable=True), + sa.Column('currency', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='tool_model_invoke_pkey') + ) # ### end Alembic commands ### diff --git a/api/migrations/versions/cc04d0998d4d_set_model_config_column_nullable.py b/api/migrations/versions/cc04d0998d4d_set_model_config_column_nullable.py index aefbe43f14..2b9f0e90a4 100644 --- a/api/migrations/versions/cc04d0998d4d_set_model_config_column_nullable.py +++ b/api/migrations/versions/cc04d0998d4d_set_model_config_column_nullable.py @@ -9,6 +9,10 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'cc04d0998d4d' down_revision = 'b289e2408ee2' @@ -18,16 +22,30 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.alter_column('provider', - existing_type=sa.VARCHAR(length=255), - nullable=True) - batch_op.alter_column('model_id', - existing_type=sa.VARCHAR(length=255), - nullable=True) - batch_op.alter_column('configs', - existing_type=postgresql.JSON(astext_type=sa.Text()), - nullable=True) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.alter_column('provider', + existing_type=sa.VARCHAR(length=255), + nullable=True) + batch_op.alter_column('model_id', + existing_type=sa.VARCHAR(length=255), + nullable=True) + batch_op.alter_column('configs', + existing_type=postgresql.JSON(astext_type=sa.Text()), + nullable=True) + else: + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.alter_column('provider', + existing_type=sa.VARCHAR(length=255), + nullable=True) + batch_op.alter_column('model_id', + existing_type=sa.VARCHAR(length=255), + nullable=True) + batch_op.alter_column('configs', + existing_type=sa.JSON(), + nullable=True) with op.batch_alter_table('apps', schema=None) as batch_op: batch_op.alter_column('api_rpm', @@ -45,6 +63,8 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + with op.batch_alter_table('apps', schema=None) as batch_op: batch_op.alter_column('api_rpm', existing_type=sa.Integer(), @@ -56,15 +76,27 @@ def downgrade(): server_default=None, nullable=False) - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.alter_column('configs', - existing_type=postgresql.JSON(astext_type=sa.Text()), - nullable=False) - batch_op.alter_column('model_id', - existing_type=sa.VARCHAR(length=255), - nullable=False) - batch_op.alter_column('provider', - existing_type=sa.VARCHAR(length=255), - nullable=False) + if _is_pg(conn): + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.alter_column('configs', + existing_type=postgresql.JSON(astext_type=sa.Text()), + nullable=False) + batch_op.alter_column('model_id', + existing_type=sa.VARCHAR(length=255), + nullable=False) + batch_op.alter_column('provider', + existing_type=sa.VARCHAR(length=255), + nullable=False) + else: + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.alter_column('configs', + existing_type=sa.JSON(), + nullable=False) + batch_op.alter_column('model_id', + existing_type=sa.VARCHAR(length=255), + nullable=False) + batch_op.alter_column('provider', + existing_type=sa.VARCHAR(length=255), + nullable=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/e1901f623fd0_add_annotation_reply.py b/api/migrations/versions/e1901f623fd0_add_annotation_reply.py index 32902c8eb0..9e02ec5d84 100644 --- a/api/migrations/versions/e1901f623fd0_add_annotation_reply.py +++ b/api/migrations/versions/e1901f623fd0_add_annotation_reply.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'e1901f623fd0' down_revision = 'fca025d3b60f' @@ -18,51 +24,98 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('app_annotation_hit_histories', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('annotation_id', postgresql.UUID(), nullable=False), - sa.Column('source', sa.Text(), nullable=False), - sa.Column('question', sa.Text(), nullable=False), - sa.Column('account_id', postgresql.UUID(), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.PrimaryKeyConstraint('id', name='app_annotation_hit_histories_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('app_annotation_hit_histories', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('annotation_id', postgresql.UUID(), nullable=False), + sa.Column('source', sa.Text(), nullable=False), + sa.Column('question', sa.Text(), nullable=False), + sa.Column('account_id', postgresql.UUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_annotation_hit_histories_pkey') + ) + else: + op.create_table('app_annotation_hit_histories', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('annotation_id', models.types.StringUUID(), nullable=False), + sa.Column('source', models.types.LongText(), nullable=False), + sa.Column('question', models.types.LongText(), nullable=False), + sa.Column('account_id', models.types.StringUUID(), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.PrimaryKeyConstraint('id', name='app_annotation_hit_histories_pkey') + ) + with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: batch_op.create_index('app_annotation_hit_histories_account_idx', ['account_id'], unique=False) batch_op.create_index('app_annotation_hit_histories_annotation_idx', ['annotation_id'], unique=False) batch_op.create_index('app_annotation_hit_histories_app_idx', ['app_id'], unique=False) - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('annotation_reply', sa.Text(), nullable=True)) + if _is_pg(conn): + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('annotation_reply', sa.Text(), nullable=True)) + else: + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('annotation_reply', models.types.LongText(), nullable=True)) - with op.batch_alter_table('dataset_collection_bindings', schema=None) as batch_op: - batch_op.add_column(sa.Column('type', sa.String(length=40), server_default=sa.text("'dataset'::character varying"), nullable=False)) + if _is_pg(conn): + with op.batch_alter_table('dataset_collection_bindings', schema=None) as batch_op: + batch_op.add_column(sa.Column('type', sa.String(length=40), server_default=sa.text("'dataset'::character varying"), nullable=False)) + else: + with op.batch_alter_table('dataset_collection_bindings', schema=None) as batch_op: + batch_op.add_column(sa.Column('type', sa.String(length=40), server_default=sa.text("'dataset'"), nullable=False)) - with op.batch_alter_table('message_annotations', schema=None) as batch_op: - batch_op.add_column(sa.Column('question', sa.Text(), nullable=True)) - batch_op.add_column(sa.Column('hit_count', sa.Integer(), server_default=sa.text('0'), nullable=False)) - batch_op.alter_column('conversation_id', - existing_type=postgresql.UUID(), - nullable=True) - batch_op.alter_column('message_id', - existing_type=postgresql.UUID(), - nullable=True) + if _is_pg(conn): + with op.batch_alter_table('message_annotations', schema=None) as batch_op: + batch_op.add_column(sa.Column('question', sa.Text(), nullable=True)) + batch_op.add_column(sa.Column('hit_count', sa.Integer(), server_default=sa.text('0'), nullable=False)) + batch_op.alter_column('conversation_id', + existing_type=postgresql.UUID(), + nullable=True) + batch_op.alter_column('message_id', + existing_type=postgresql.UUID(), + nullable=True) + else: + with op.batch_alter_table('message_annotations', schema=None) as batch_op: + batch_op.add_column(sa.Column('question', models.types.LongText(), nullable=True)) + batch_op.add_column(sa.Column('hit_count', sa.Integer(), server_default=sa.text('0'), nullable=False)) + batch_op.alter_column('conversation_id', + existing_type=models.types.StringUUID(), + nullable=True) + batch_op.alter_column('message_id', + existing_type=models.types.StringUUID(), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('message_annotations', schema=None) as batch_op: - batch_op.alter_column('message_id', - existing_type=postgresql.UUID(), - nullable=False) - batch_op.alter_column('conversation_id', - existing_type=postgresql.UUID(), - nullable=False) - batch_op.drop_column('hit_count') - batch_op.drop_column('question') + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('message_annotations', schema=None) as batch_op: + batch_op.alter_column('message_id', + existing_type=postgresql.UUID(), + nullable=False) + batch_op.alter_column('conversation_id', + existing_type=postgresql.UUID(), + nullable=False) + batch_op.drop_column('hit_count') + batch_op.drop_column('question') + else: + with op.batch_alter_table('message_annotations', schema=None) as batch_op: + batch_op.alter_column('message_id', + existing_type=models.types.StringUUID(), + nullable=False) + batch_op.alter_column('conversation_id', + existing_type=models.types.StringUUID(), + nullable=False) + batch_op.drop_column('hit_count') + batch_op.drop_column('question') with op.batch_alter_table('dataset_collection_bindings', schema=None) as batch_op: batch_op.drop_column('type') diff --git a/api/migrations/versions/e2eacc9a1b63_add_status_for_message.py b/api/migrations/versions/e2eacc9a1b63_add_status_for_message.py index 08f994a41f..0eeb68360e 100644 --- a/api/migrations/versions/e2eacc9a1b63_add_status_for_message.py +++ b/api/migrations/versions/e2eacc9a1b63_add_status_for_message.py @@ -8,6 +8,12 @@ Create Date: 2024-03-21 09:31:27.342221 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'e2eacc9a1b63' down_revision = '563cf8bf777b' @@ -17,14 +23,23 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + with op.batch_alter_table('conversations', schema=None) as batch_op: batch_op.add_column(sa.Column('invoke_from', sa.String(length=255), nullable=True)) - with op.batch_alter_table('messages', schema=None) as batch_op: - batch_op.add_column(sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'::character varying"), nullable=False)) - batch_op.add_column(sa.Column('error', sa.Text(), nullable=True)) - batch_op.add_column(sa.Column('message_metadata', sa.Text(), nullable=True)) - batch_op.add_column(sa.Column('invoke_from', sa.String(length=255), nullable=True)) + if _is_pg(conn): + with op.batch_alter_table('messages', schema=None) as batch_op: + batch_op.add_column(sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'::character varying"), nullable=False)) + batch_op.add_column(sa.Column('error', sa.Text(), nullable=True)) + batch_op.add_column(sa.Column('message_metadata', sa.Text(), nullable=True)) + batch_op.add_column(sa.Column('invoke_from', sa.String(length=255), nullable=True)) + else: + with op.batch_alter_table('messages', schema=None) as batch_op: + batch_op.add_column(sa.Column('status', sa.String(length=255), server_default=sa.text("'normal'"), nullable=False)) + batch_op.add_column(sa.Column('error', models.types.LongText(), nullable=True)) + batch_op.add_column(sa.Column('message_metadata', models.types.LongText(), nullable=True)) + batch_op.add_column(sa.Column('invoke_from', sa.String(length=255), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/e32f6ccb87c6_e08af0a69ccefbb59fa80c778efee300bb780980.py b/api/migrations/versions/e32f6ccb87c6_e08af0a69ccefbb59fa80c778efee300bb780980.py index 3d7dd1fabf..c52605667b 100644 --- a/api/migrations/versions/e32f6ccb87c6_e08af0a69ccefbb59fa80c778efee300bb780980.py +++ b/api/migrations/versions/e32f6ccb87c6_e08af0a69ccefbb59fa80c778efee300bb780980.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'e32f6ccb87c6' down_revision = '614f77cecc48' @@ -18,28 +24,52 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.create_table('data_source_bindings', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('tenant_id', postgresql.UUID(), nullable=False), - sa.Column('access_token', sa.String(length=255), nullable=False), - sa.Column('provider', sa.String(length=255), nullable=False), - sa.Column('source_info', postgresql.JSONB(astext_type=sa.Text()), nullable=False), - sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), - sa.Column('disabled', sa.Boolean(), server_default=sa.text('false'), nullable=True), - sa.PrimaryKeyConstraint('id', name='source_binding_pkey') - ) + conn = op.get_bind() + + if _is_pg(conn): + op.create_table('data_source_bindings', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('tenant_id', postgresql.UUID(), nullable=False), + sa.Column('access_token', sa.String(length=255), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('source_info', postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False), + sa.Column('disabled', sa.Boolean(), server_default=sa.text('false'), nullable=True), + sa.PrimaryKeyConstraint('id', name='source_binding_pkey') + ) + else: + op.create_table('data_source_bindings', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('tenant_id', models.types.StringUUID(), nullable=False), + sa.Column('access_token', sa.String(length=255), nullable=False), + sa.Column('provider', sa.String(length=255), nullable=False), + sa.Column('source_info', models.types.AdjustedJSON(astext_type=sa.Text()), nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), + sa.Column('disabled', sa.Boolean(), server_default=sa.text('false'), nullable=True), + sa.PrimaryKeyConstraint('id', name='source_binding_pkey') + ) + with op.batch_alter_table('data_source_bindings', schema=None) as batch_op: batch_op.create_index('source_binding_tenant_id_idx', ['tenant_id'], unique=False) - batch_op.create_index('source_info_idx', ['source_info'], unique=False, postgresql_using='gin') + if _is_pg(conn): + batch_op.create_index('source_info_idx', ['source_info'], unique=False, postgresql_using='gin') + else: + pass # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + with op.batch_alter_table('data_source_bindings', schema=None) as batch_op: - batch_op.drop_index('source_info_idx', postgresql_using='gin') + if _is_pg(conn): + batch_op.drop_index('source_info_idx', postgresql_using='gin') + else: + pass batch_op.drop_index('source_binding_tenant_id_idx') op.drop_table('data_source_bindings') diff --git a/api/migrations/versions/e8883b0148c9_add_dataset_model_name.py b/api/migrations/versions/e8883b0148c9_add_dataset_model_name.py index 875683d68e..b7bb0dd4df 100644 --- a/api/migrations/versions/e8883b0148c9_add_dataset_model_name.py +++ b/api/migrations/versions/e8883b0148c9_add_dataset_model_name.py @@ -8,6 +8,10 @@ Create Date: 2023-08-15 20:54:58.936787 import sqlalchemy as sa from alembic import op + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'e8883b0148c9' down_revision = '2c8af9671032' @@ -17,9 +21,18 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('datasets', schema=None) as batch_op: - batch_op.add_column(sa.Column('embedding_model', sa.String(length=255), server_default=sa.text("'text-embedding-ada-002'::character varying"), nullable=False)) - batch_op.add_column(sa.Column('embedding_model_provider', sa.String(length=255), server_default=sa.text("'openai'::character varying"), nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + # PostgreSQL: Keep original syntax + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.add_column(sa.Column('embedding_model', sa.String(length=255), server_default=sa.text("'text-embedding-ada-002'::character varying"), nullable=False)) + batch_op.add_column(sa.Column('embedding_model_provider', sa.String(length=255), server_default=sa.text("'openai'::character varying"), nullable=False)) + else: + # MySQL: Use compatible syntax + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.add_column(sa.Column('embedding_model', sa.String(length=255), server_default=sa.text("'text-embedding-ada-002'"), nullable=False)) + batch_op.add_column(sa.Column('embedding_model_provider', sa.String(length=255), server_default=sa.text("'openai'"), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/eeb2e349e6ac_increase_max_model_name_length.py b/api/migrations/versions/eeb2e349e6ac_increase_max_model_name_length.py index 434531b6c8..6125744a1f 100644 --- a/api/migrations/versions/eeb2e349e6ac_increase_max_model_name_length.py +++ b/api/migrations/versions/eeb2e349e6ac_increase_max_model_name_length.py @@ -10,6 +10,10 @@ from alembic import op import models as models + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'eeb2e349e6ac' down_revision = '53bf8af60645' @@ -19,30 +23,50 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + with op.batch_alter_table('dataset_collection_bindings', schema=None) as batch_op: batch_op.alter_column('model_name', existing_type=sa.VARCHAR(length=40), type_=sa.String(length=255), existing_nullable=False) - with op.batch_alter_table('embeddings', schema=None) as batch_op: - batch_op.alter_column('model_name', - existing_type=sa.VARCHAR(length=40), - type_=sa.String(length=255), - existing_nullable=False, - existing_server_default=sa.text("'text-embedding-ada-002'::character varying")) + if _is_pg(conn): + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.alter_column('model_name', + existing_type=sa.VARCHAR(length=40), + type_=sa.String(length=255), + existing_nullable=False, + existing_server_default=sa.text("'text-embedding-ada-002'::character varying")) + else: + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.alter_column('model_name', + existing_type=sa.VARCHAR(length=40), + type_=sa.String(length=255), + existing_nullable=False, + existing_server_default=sa.text("'text-embedding-ada-002'")) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('embeddings', schema=None) as batch_op: - batch_op.alter_column('model_name', - existing_type=sa.String(length=255), - type_=sa.VARCHAR(length=40), - existing_nullable=False, - existing_server_default=sa.text("'text-embedding-ada-002'::character varying")) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.alter_column('model_name', + existing_type=sa.String(length=255), + type_=sa.VARCHAR(length=40), + existing_nullable=False, + existing_server_default=sa.text("'text-embedding-ada-002'::character varying")) + else: + with op.batch_alter_table('embeddings', schema=None) as batch_op: + batch_op.alter_column('model_name', + existing_type=sa.String(length=255), + type_=sa.VARCHAR(length=40), + existing_nullable=False, + existing_server_default=sa.text("'text-embedding-ada-002'")) with op.batch_alter_table('dataset_collection_bindings', schema=None) as batch_op: batch_op.alter_column('model_name', diff --git a/api/migrations/versions/f25003750af4_add_created_updated_at.py b/api/migrations/versions/f25003750af4_add_created_updated_at.py index 178eaf2380..f2752dfbb7 100644 --- a/api/migrations/versions/f25003750af4_add_created_updated_at.py +++ b/api/migrations/versions/f25003750af4_add_created_updated_at.py @@ -8,6 +8,10 @@ Create Date: 2024-01-07 04:53:24.441861 import sqlalchemy as sa from alembic import op + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'f25003750af4' down_revision = '00bacef91f18' @@ -17,9 +21,18 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False)) - batch_op.add_column(sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + # PostgreSQL: Keep original syntax + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False)) + batch_op.add_column(sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False)) + else: + # MySQL: Use compatible syntax + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False)) + batch_op.add_column(sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/f2a6fc85e260_add_anntation_history_message_id.py b/api/migrations/versions/f2a6fc85e260_add_anntation_history_message_id.py index dc9392a92c..02098e91c1 100644 --- a/api/migrations/versions/f2a6fc85e260_add_anntation_history_message_id.py +++ b/api/migrations/versions/f2a6fc85e260_add_anntation_history_message_id.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'f2a6fc85e260' down_revision = '46976cc39132' @@ -18,9 +24,16 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: - batch_op.add_column(sa.Column('message_id', postgresql.UUID(), nullable=False)) - batch_op.create_index('app_annotation_hit_histories_message_idx', ['message_id'], unique=False) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: + batch_op.add_column(sa.Column('message_id', postgresql.UUID(), nullable=False)) + batch_op.create_index('app_annotation_hit_histories_message_idx', ['message_id'], unique=False) + else: + with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: + batch_op.add_column(sa.Column('message_id', models.types.StringUUID(), nullable=False)) + batch_op.create_index('app_annotation_hit_histories_message_idx', ['message_id'], unique=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/f9107f83abab_add_desc_for_apps.py b/api/migrations/versions/f9107f83abab_add_desc_for_apps.py index 3e5ae0d67d..8a3f479217 100644 --- a/api/migrations/versions/f9107f83abab_add_desc_for_apps.py +++ b/api/migrations/versions/f9107f83abab_add_desc_for_apps.py @@ -8,6 +8,12 @@ Create Date: 2024-02-28 08:16:14.090481 import sqlalchemy as sa from alembic import op +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'f9107f83abab' down_revision = 'cc04d0998d4d' @@ -17,8 +23,14 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('apps', schema=None) as batch_op: - batch_op.add_column(sa.Column('description', sa.Text(), server_default=sa.text("''::character varying"), nullable=False)) + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('apps', schema=None) as batch_op: + batch_op.add_column(sa.Column('description', sa.Text(), server_default=sa.text("''::character varying"), nullable=False)) + else: + with op.batch_alter_table('apps', schema=None) as batch_op: + batch_op.add_column(sa.Column('description', models.types.LongText(), default=sa.text("''"), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/fca025d3b60f_add_dataset_retrival_model.py b/api/migrations/versions/fca025d3b60f_add_dataset_retrival_model.py index 52495be60a..4a13133c1c 100644 --- a/api/migrations/versions/fca025d3b60f_add_dataset_retrival_model.py +++ b/api/migrations/versions/fca025d3b60f_add_dataset_retrival_model.py @@ -9,6 +9,12 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + # revision identifiers, used by Alembic. revision = 'fca025d3b60f' down_revision = '8fe468ba0ca5' @@ -18,26 +24,48 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + op.drop_table('sessions') - with op.batch_alter_table('datasets', schema=None) as batch_op: - batch_op.add_column(sa.Column('retrieval_model', postgresql.JSONB(astext_type=sa.Text()), nullable=True)) - batch_op.create_index('retrieval_model_idx', ['retrieval_model'], unique=False, postgresql_using='gin') + if _is_pg(conn): + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.add_column(sa.Column('retrieval_model', postgresql.JSONB(astext_type=sa.Text()), nullable=True)) + batch_op.create_index('retrieval_model_idx', ['retrieval_model'], unique=False, postgresql_using='gin') + else: + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.add_column(sa.Column('retrieval_model', models.types.AdjustedJSON(astext_type=sa.Text()), nullable=True)) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('datasets', schema=None) as batch_op: - batch_op.drop_index('retrieval_model_idx', postgresql_using='gin') - batch_op.drop_column('retrieval_model') + conn = op.get_bind() + + if _is_pg(conn): + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.drop_index('retrieval_model_idx', postgresql_using='gin') + batch_op.drop_column('retrieval_model') + else: + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.drop_column('retrieval_model') - op.create_table('sessions', - sa.Column('id', sa.INTEGER(), autoincrement=True, nullable=False), - sa.Column('session_id', sa.VARCHAR(length=255), autoincrement=False, nullable=True), - sa.Column('data', postgresql.BYTEA(), autoincrement=False, nullable=True), - sa.Column('expiry', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('id', name='sessions_pkey'), - sa.UniqueConstraint('session_id', name='sessions_session_id_key') - ) + if _is_pg(conn): + op.create_table('sessions', + sa.Column('id', sa.INTEGER(), autoincrement=True, nullable=False), + sa.Column('session_id', sa.VARCHAR(length=255), autoincrement=False, nullable=True), + sa.Column('data', postgresql.BYTEA(), autoincrement=False, nullable=True), + sa.Column('expiry', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('id', name='sessions_pkey'), + sa.UniqueConstraint('session_id', name='sessions_session_id_key') + ) + else: + op.create_table('sessions', + sa.Column('id', sa.INTEGER(), autoincrement=True, nullable=False), + sa.Column('session_id', sa.VARCHAR(length=255), autoincrement=False, nullable=True), + sa.Column('data', models.types.BinaryData(), autoincrement=False, nullable=True), + sa.Column('expiry', sa.TIMESTAMP(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('id', name='sessions_pkey'), + sa.UniqueConstraint('session_id', name='sessions_session_id_key') + ) # ### end Alembic commands ### diff --git a/api/migrations/versions/fecff1c3da27_remove_extra_tracing_app_config_table .py b/api/migrations/versions/fecff1c3da27_remove_extra_tracing_app_config_table .py deleted file mode 100644 index 6f76a361d9..0000000000 --- a/api/migrations/versions/fecff1c3da27_remove_extra_tracing_app_config_table .py +++ /dev/null @@ -1,50 +0,0 @@ -"""remove extra tracing app config table and add idx_dataset_permissions_tenant_id - -Revision ID: fecff1c3da27 -Revises: 408176b91ad3 -Create Date: 2024-07-19 12:03:21.217463 - -""" -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = 'fecff1c3da27' -down_revision = '408176b91ad3' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('tracing_app_configs') - - # idx_dataset_permissions_tenant_id - with op.batch_alter_table('dataset_permissions', schema=None) as batch_op: - batch_op.create_index('idx_dataset_permissions_tenant_id', ['tenant_id']) - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'tracing_app_configs', - sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), - sa.Column('app_id', postgresql.UUID(), nullable=False), - sa.Column('tracing_provider', sa.String(length=255), nullable=True), - sa.Column('tracing_config', postgresql.JSON(astext_type=sa.Text()), nullable=True), - sa.Column( - 'created_at', postgresql.TIMESTAMP(), server_default=sa.text('now()'), autoincrement=False, nullable=False - ), - sa.Column( - 'updated_at', postgresql.TIMESTAMP(), server_default=sa.text('now()'), autoincrement=False, nullable=False - ), - sa.PrimaryKeyConstraint('id', name='tracing_app_config_pkey') - ) - - with op.batch_alter_table('dataset_permissions', schema=None) as batch_op: - batch_op.drop_index('idx_dataset_permissions_tenant_id') - - # ### end Alembic commands ### diff --git a/api/migrations/versions/fecff1c3da27_remove_extra_tracing_app_config_table.py b/api/migrations/versions/fecff1c3da27_remove_extra_tracing_app_config_table.py new file mode 100644 index 0000000000..ab84ec0d87 --- /dev/null +++ b/api/migrations/versions/fecff1c3da27_remove_extra_tracing_app_config_table.py @@ -0,0 +1,74 @@ +"""remove extra tracing app config table and add idx_dataset_permissions_tenant_id + +Revision ID: fecff1c3da27 +Revises: 408176b91ad3 +Create Date: 2024-07-19 12:03:21.217463 + +""" +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +import models.types + + +def _is_pg(conn): + return conn.dialect.name == "postgresql" + +# revision identifiers, used by Alembic. +revision = 'fecff1c3da27' +down_revision = '408176b91ad3' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('tracing_app_configs') + + # idx_dataset_permissions_tenant_id + with op.batch_alter_table('dataset_permissions', schema=None) as batch_op: + batch_op.create_index('idx_dataset_permissions_tenant_id', ['tenant_id']) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + + if _is_pg(conn): + op.create_table( + 'tracing_app_configs', + sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), + sa.Column('app_id', postgresql.UUID(), nullable=False), + sa.Column('tracing_provider', sa.String(length=255), nullable=True), + sa.Column('tracing_config', postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column( + 'created_at', postgresql.TIMESTAMP(), server_default=sa.text('now()'), autoincrement=False, nullable=False + ), + sa.Column( + 'updated_at', postgresql.TIMESTAMP(), server_default=sa.text('now()'), autoincrement=False, nullable=False + ), + sa.PrimaryKeyConstraint('id', name='tracing_app_config_pkey') + ) + else: + op.create_table( + 'tracing_app_configs', + sa.Column('id', models.types.StringUUID(), nullable=False), + sa.Column('app_id', models.types.StringUUID(), nullable=False), + sa.Column('tracing_provider', sa.String(length=255), nullable=True), + sa.Column('tracing_config', sa.JSON(), nullable=True), + sa.Column( + 'created_at', sa.TIMESTAMP(), server_default=sa.func.now(), autoincrement=False, nullable=False + ), + sa.Column( + 'updated_at', sa.TIMESTAMP(), server_default=sa.func.now(), autoincrement=False, nullable=False + ), + sa.PrimaryKeyConstraint('id', name='tracing_app_config_pkey') + ) + + with op.batch_alter_table('dataset_permissions', schema=None) as batch_op: + batch_op.drop_index('idx_dataset_permissions_tenant_id') + + # ### end Alembic commands ### diff --git a/api/models/__init__.py b/api/models/__init__.py index 2a228b6c06..b10b6918a4 100644 --- a/api/models/__init__.py +++ b/api/models/__init__.py @@ -103,6 +103,7 @@ from .workflow import ( WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload, WorkflowNodeExecutionTriggeredFrom, + WorkflowPause, WorkflowRun, WorkflowType, ) @@ -199,6 +200,7 @@ __all__ = [ "WorkflowNodeExecutionModel", "WorkflowNodeExecutionOffload", "WorkflowNodeExecutionTriggeredFrom", + "WorkflowPause", "WorkflowRun", "WorkflowRunTriggeredFrom", "WorkflowSchedulePlan", diff --git a/api/models/account.py b/api/models/account.py index 400a2c6362..b1dafed0ed 100644 --- a/api/models/account.py +++ b/api/models/account.py @@ -3,6 +3,7 @@ import json from dataclasses import field from datetime import datetime from typing import Any, Optional +from uuid import uuid4 import sqlalchemy as sa from flask_login import UserMixin @@ -10,10 +11,9 @@ from sqlalchemy import DateTime, String, func, select from sqlalchemy.orm import Mapped, Session, mapped_column from typing_extensions import deprecated -from models.base import TypeBase - +from .base import TypeBase from .engine import db -from .types import StringUUID +from .types import LongText, StringUUID class TenantAccountRole(enum.StrEnum): @@ -88,7 +88,7 @@ class Account(UserMixin, TypeBase): __tablename__ = "accounts" __table_args__ = (sa.PrimaryKeyConstraint("id", name="account_pkey"), sa.Index("account_email_idx", "email")) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) name: Mapped[str] = mapped_column(String(255)) email: Mapped[str] = mapped_column(String(255)) password: Mapped[str | None] = mapped_column(String(255), default=None) @@ -102,15 +102,13 @@ class Account(UserMixin, TypeBase): last_active_at: Mapped[datetime] = mapped_column( DateTime, server_default=func.current_timestamp(), nullable=False, init=False ) - status: Mapped[str] = mapped_column( - String(16), server_default=sa.text("'active'::character varying"), default="active" - ) + status: Mapped[str] = mapped_column(String(16), server_default=sa.text("'active'"), default="active") initialized_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None) created_at: Mapped[datetime] = mapped_column( DateTime, server_default=func.current_timestamp(), nullable=False, init=False ) updated_at: Mapped[datetime] = mapped_column( - DateTime, server_default=func.current_timestamp(), nullable=False, init=False + DateTime, server_default=func.current_timestamp(), nullable=False, init=False, onupdate=func.current_timestamp() ) role: TenantAccountRole | None = field(default=None, init=False) @@ -237,20 +235,18 @@ class Tenant(TypeBase): __tablename__ = "tenants" __table_args__ = (sa.PrimaryKeyConstraint("id", name="tenant_pkey"),) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) name: Mapped[str] = mapped_column(String(255)) - encrypt_public_key: Mapped[str | None] = mapped_column(sa.Text, default=None) - plan: Mapped[str] = mapped_column( - String(255), server_default=sa.text("'basic'::character varying"), default="basic" - ) - status: Mapped[str] = mapped_column( - String(255), server_default=sa.text("'normal'::character varying"), default="normal" - ) - custom_config: Mapped[str | None] = mapped_column(sa.Text, default=None) + encrypt_public_key: Mapped[str | None] = mapped_column(LongText, default=None) + plan: Mapped[str] = mapped_column(String(255), server_default=sa.text("'basic'"), default="basic") + status: Mapped[str] = mapped_column(String(255), server_default=sa.text("'normal'"), default="normal") + custom_config: Mapped[str | None] = mapped_column(LongText, default=None) created_at: Mapped[datetime] = mapped_column( DateTime, server_default=func.current_timestamp(), nullable=False, init=False ) - updated_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.current_timestamp(), init=False) + updated_at: Mapped[datetime] = mapped_column( + DateTime, server_default=func.current_timestamp(), init=False, onupdate=func.current_timestamp() + ) def get_accounts(self) -> list[Account]: return list( @@ -279,7 +275,7 @@ class TenantAccountJoin(TypeBase): sa.UniqueConstraint("tenant_id", "account_id", name="unique_tenant_account_join"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID) account_id: Mapped[str] = mapped_column(StringUUID) current: Mapped[bool] = mapped_column(sa.Boolean, server_default=sa.text("false"), default=False) @@ -289,7 +285,7 @@ class TenantAccountJoin(TypeBase): DateTime, server_default=func.current_timestamp(), nullable=False, init=False ) updated_at: Mapped[datetime] = mapped_column( - DateTime, server_default=func.current_timestamp(), nullable=False, init=False + DateTime, server_default=func.current_timestamp(), nullable=False, init=False, onupdate=func.current_timestamp() ) @@ -301,7 +297,7 @@ class AccountIntegrate(TypeBase): sa.UniqueConstraint("provider", "open_id", name="unique_provider_open_id"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) account_id: Mapped[str] = mapped_column(StringUUID) provider: Mapped[str] = mapped_column(String(16)) open_id: Mapped[str] = mapped_column(String(255)) @@ -310,7 +306,7 @@ class AccountIntegrate(TypeBase): DateTime, server_default=func.current_timestamp(), nullable=False, init=False ) updated_at: Mapped[datetime] = mapped_column( - DateTime, server_default=func.current_timestamp(), nullable=False, init=False + DateTime, server_default=func.current_timestamp(), nullable=False, init=False, onupdate=func.current_timestamp() ) @@ -325,15 +321,13 @@ class InvitationCode(TypeBase): id: Mapped[int] = mapped_column(sa.Integer, init=False) batch: Mapped[str] = mapped_column(String(255)) code: Mapped[str] = mapped_column(String(32)) - status: Mapped[str] = mapped_column( - String(16), server_default=sa.text("'unused'::character varying"), default="unused" - ) + status: Mapped[str] = mapped_column(String(16), server_default=sa.text("'unused'"), default="unused") used_at: Mapped[datetime | None] = mapped_column(DateTime, default=None) used_by_tenant_id: Mapped[str | None] = mapped_column(StringUUID, default=None) used_by_account_id: Mapped[str | None] = mapped_column(StringUUID, default=None) deprecated_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None) created_at: Mapped[datetime] = mapped_column( - DateTime, server_default=sa.text("CURRENT_TIMESTAMP(0)"), nullable=False, init=False + DateTime, server_default=sa.func.current_timestamp(), nullable=False, init=False ) @@ -354,7 +348,7 @@ class TenantPluginPermission(TypeBase): sa.UniqueConstraint("tenant_id", name="unique_tenant_plugin"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) install_permission: Mapped[InstallPermission] = mapped_column( String(16), nullable=False, server_default="everyone", default=InstallPermission.EVERYONE @@ -381,7 +375,7 @@ class TenantPluginAutoUpgradeStrategy(TypeBase): sa.UniqueConstraint("tenant_id", name="unique_tenant_plugin_auto_upgrade_strategy"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) strategy_setting: Mapped[StrategySetting] = mapped_column( String(16), nullable=False, server_default="fix_only", default=StrategySetting.FIX_ONLY @@ -389,12 +383,12 @@ class TenantPluginAutoUpgradeStrategy(TypeBase): upgrade_mode: Mapped[UpgradeMode] = mapped_column( String(16), nullable=False, server_default="exclude", default=UpgradeMode.EXCLUDE ) - exclude_plugins: Mapped[list[str]] = mapped_column(sa.ARRAY(String(255)), nullable=False, default_factory=list) - include_plugins: Mapped[list[str]] = mapped_column(sa.ARRAY(String(255)), nullable=False, default_factory=list) + exclude_plugins: Mapped[list[str]] = mapped_column(sa.JSON, nullable=False, default_factory=list) + include_plugins: Mapped[list[str]] = mapped_column(sa.JSON, nullable=False, default_factory=list) upgrade_time_of_day: Mapped[int] = mapped_column(sa.Integer, nullable=False, default=0) created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp(), init=False ) updated_at: Mapped[datetime] = mapped_column( - DateTime, nullable=False, server_default=func.current_timestamp(), init=False + DateTime, nullable=False, server_default=func.current_timestamp(), init=False, onupdate=func.current_timestamp() ) diff --git a/api/models/api_based_extension.py b/api/models/api_based_extension.py index e86826fc3d..99d33908f8 100644 --- a/api/models/api_based_extension.py +++ b/api/models/api_based_extension.py @@ -1,12 +1,13 @@ import enum from datetime import datetime +from uuid import uuid4 import sqlalchemy as sa -from sqlalchemy import DateTime, String, Text, func +from sqlalchemy import DateTime, String, func from sqlalchemy.orm import Mapped, mapped_column -from .base import Base -from .types import StringUUID +from .base import TypeBase +from .types import LongText, StringUUID class APIBasedExtensionPoint(enum.StrEnum): @@ -16,16 +17,18 @@ class APIBasedExtensionPoint(enum.StrEnum): APP_MODERATION_OUTPUT = "app.moderation.output" -class APIBasedExtension(Base): +class APIBasedExtension(TypeBase): __tablename__ = "api_based_extensions" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="api_based_extension_pkey"), sa.Index("api_based_extension_tenant_idx", "tenant_id"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) - tenant_id = mapped_column(StringUUID, nullable=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) name: Mapped[str] = mapped_column(String(255), nullable=False) api_endpoint: Mapped[str] = mapped_column(String(255), nullable=False) - api_key = mapped_column(Text, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + api_key: Mapped[str] = mapped_column(LongText, nullable=False) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) diff --git a/api/models/base.py b/api/models/base.py index 76848825fe..c8a5e20f25 100644 --- a/api/models/base.py +++ b/api/models/base.py @@ -1,6 +1,13 @@ -from sqlalchemy.orm import DeclarativeBase, MappedAsDataclass +from datetime import datetime -from models.engine import metadata +from sqlalchemy import DateTime, func +from sqlalchemy.orm import DeclarativeBase, Mapped, MappedAsDataclass, mapped_column + +from libs.datetime_utils import naive_utc_now +from libs.uuid_utils import uuidv7 + +from .engine import metadata +from .types import StringUUID class Base(DeclarativeBase): @@ -13,3 +20,33 @@ class TypeBase(MappedAsDataclass, DeclarativeBase): """ metadata = metadata + + +class DefaultFieldsMixin: + id: Mapped[str] = mapped_column( + StringUUID, + primary_key=True, + # NOTE: The default serve as fallback mechanisms. + # The application can generate the `id` before saving to optimize + # the insertion process (especially for interdependent models) + # and reduce database roundtrips. + default=lambda: str(uuidv7()), + ) + + created_at: Mapped[datetime] = mapped_column( + DateTime, + nullable=False, + default=naive_utc_now, + server_default=func.current_timestamp(), + ) + + updated_at: Mapped[datetime] = mapped_column( + __name_pos=DateTime, + nullable=False, + default=naive_utc_now, + server_default=func.current_timestamp(), + onupdate=func.current_timestamp(), + ) + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}(id={self.id})>" diff --git a/api/models/dataset.py b/api/models/dataset.py index 4a9e2688b8..4bc802bb1c 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -11,23 +11,24 @@ import time from datetime import datetime from json import JSONDecodeError from typing import Any, cast +from uuid import uuid4 import sqlalchemy as sa from sqlalchemy import DateTime, String, func, select -from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, Session, mapped_column from configs import dify_config from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource from core.rag.retrieval.retrieval_methods import RetrievalMethod from extensions.ext_storage import storage +from libs.uuid_utils import uuidv7 from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule from .account import Account -from .base import Base +from .base import Base, TypeBase from .engine import db from .model import App, Tag, TagBinding, UploadFile -from .types import StringUUID +from .types import AdjustedJSON, BinaryData, LongText, StringUUID, adjusted_json_index logger = logging.getLogger(__name__) @@ -43,36 +44,38 @@ class Dataset(Base): __table_args__ = ( sa.PrimaryKeyConstraint("id", name="dataset_pkey"), sa.Index("dataset_tenant_idx", "tenant_id"), - sa.Index("retrieval_model_idx", "retrieval_model", postgresql_using="gin"), + adjusted_json_index("retrieval_model_idx", "retrieval_model"), ) INDEXING_TECHNIQUE_LIST = ["high_quality", "economy", None] PROVIDER_LIST = ["vendor", "external", None] - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id: Mapped[str] = mapped_column(StringUUID) name: Mapped[str] = mapped_column(String(255)) - description = mapped_column(sa.Text, nullable=True) - provider: Mapped[str] = mapped_column(String(255), server_default=sa.text("'vendor'::character varying")) - permission: Mapped[str] = mapped_column(String(255), server_default=sa.text("'only_me'::character varying")) + description = mapped_column(LongText, nullable=True) + provider: Mapped[str] = mapped_column(String(255), server_default=sa.text("'vendor'")) + permission: Mapped[str] = mapped_column(String(255), server_default=sa.text("'only_me'")) data_source_type = mapped_column(String(255)) indexing_technique: Mapped[str | None] = mapped_column(String(255)) - index_struct = mapped_column(sa.Text, nullable=True) + index_struct = mapped_column(LongText, nullable=True) created_by = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) updated_by = mapped_column(StringUUID, nullable=True) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - embedding_model = mapped_column(db.String(255), nullable=True) - embedding_model_provider = mapped_column(db.String(255), nullable=True) - keyword_number = mapped_column(sa.Integer, nullable=True, server_default=db.text("10")) + updated_at = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) + embedding_model = mapped_column(sa.String(255), nullable=True) + embedding_model_provider = mapped_column(sa.String(255), nullable=True) + keyword_number = mapped_column(sa.Integer, nullable=True, server_default=sa.text("10")) collection_binding_id = mapped_column(StringUUID, nullable=True) - retrieval_model = mapped_column(JSONB, nullable=True) - built_in_field_enabled = mapped_column(sa.Boolean, nullable=False, server_default=db.text("false")) - icon_info = mapped_column(JSONB, nullable=True) - runtime_mode = mapped_column(db.String(255), nullable=True, server_default=db.text("'general'::character varying")) + retrieval_model = mapped_column(AdjustedJSON, nullable=True) + built_in_field_enabled = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) + icon_info = mapped_column(AdjustedJSON, nullable=True) + runtime_mode = mapped_column(sa.String(255), nullable=True, server_default=sa.text("'general'")) pipeline_id = mapped_column(StringUUID, nullable=True) - chunk_structure = mapped_column(db.String(255), nullable=True) - enable_api = mapped_column(sa.Boolean, nullable=False, server_default=db.text("true")) + chunk_structure = mapped_column(sa.String(255), nullable=True) + enable_api = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true")) @property def total_documents(self): @@ -117,6 +120,13 @@ class Dataset(Base): def created_by_account(self): return db.session.get(Account, self.created_by) + @property + def author_name(self) -> str | None: + account = db.session.get(Account, self.created_by) + if account: + return account.name + return None + @property def latest_process_rule(self): return ( @@ -222,7 +232,7 @@ class Dataset(Base): ExternalKnowledgeApis.id == external_knowledge_binding.external_knowledge_api_id ) ) - if not external_knowledge_api: + if external_knowledge_api is None or external_knowledge_api.settings is None: return None return { "external_knowledge_id": external_knowledge_binding.external_knowledge_id, @@ -304,10 +314,10 @@ class DatasetProcessRule(Base): sa.Index("dataset_process_rule_dataset_id_idx", "dataset_id"), ) - id = mapped_column(StringUUID, nullable=False, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4())) dataset_id = mapped_column(StringUUID, nullable=False) - mode = mapped_column(String(255), nullable=False, server_default=sa.text("'automatic'::character varying")) - rules = mapped_column(sa.Text, nullable=True) + mode = mapped_column(String(255), nullable=False, server_default=sa.text("'automatic'")) + rules = mapped_column(LongText, nullable=True) created_by = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) @@ -344,16 +354,16 @@ class Document(Base): sa.Index("document_dataset_id_idx", "dataset_id"), sa.Index("document_is_paused_idx", "is_paused"), sa.Index("document_tenant_idx", "tenant_id"), - sa.Index("document_metadata_idx", "doc_metadata", postgresql_using="gin"), + adjusted_json_index("document_metadata_idx", "doc_metadata"), ) # initial fields - id = mapped_column(StringUUID, nullable=False, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4())) tenant_id = mapped_column(StringUUID, nullable=False) dataset_id = mapped_column(StringUUID, nullable=False) position: Mapped[int] = mapped_column(sa.Integer, nullable=False) data_source_type: Mapped[str] = mapped_column(String(255), nullable=False) - data_source_info = mapped_column(sa.Text, nullable=True) + data_source_info = mapped_column(LongText, nullable=True) dataset_process_rule_id = mapped_column(StringUUID, nullable=True) batch: Mapped[str] = mapped_column(String(255), nullable=False) name: Mapped[str] = mapped_column(String(255), nullable=False) @@ -366,7 +376,7 @@ class Document(Base): processing_started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) # parsing - file_id = mapped_column(sa.Text, nullable=True) + file_id = mapped_column(LongText, nullable=True) word_count: Mapped[int | None] = mapped_column(sa.Integer, nullable=True) # TODO: make this not nullable parsing_completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) @@ -387,11 +397,11 @@ class Document(Base): paused_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) # error - error = mapped_column(sa.Text, nullable=True) + error = mapped_column(LongText, nullable=True) stopped_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) # basic fields - indexing_status = mapped_column(String(255), nullable=False, server_default=sa.text("'waiting'::character varying")) + indexing_status = mapped_column(String(255), nullable=False, server_default=sa.text("'waiting'")) enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true")) disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) disabled_by = mapped_column(StringUUID, nullable=True) @@ -399,10 +409,12 @@ class Document(Base): archived_reason = mapped_column(String(255), nullable=True) archived_by = mapped_column(StringUUID, nullable=True) archived_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) doc_type = mapped_column(String(40), nullable=True) - doc_metadata = mapped_column(JSONB, nullable=True) - doc_form = mapped_column(String(255), nullable=False, server_default=sa.text("'text_model'::character varying")) + doc_metadata = mapped_column(AdjustedJSON, nullable=True) + doc_form = mapped_column(String(255), nullable=False, server_default=sa.text("'text_model'")) doc_language = mapped_column(String(255), nullable=True) DATA_SOURCES = ["upload_file", "notion_import", "website_crawl"] @@ -692,13 +704,13 @@ class DocumentSegment(Base): ) # initial fields - id = mapped_column(StringUUID, nullable=False, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4())) tenant_id = mapped_column(StringUUID, nullable=False) dataset_id = mapped_column(StringUUID, nullable=False) document_id = mapped_column(StringUUID, nullable=False) position: Mapped[int] - content = mapped_column(sa.Text, nullable=False) - answer = mapped_column(sa.Text, nullable=True) + content = mapped_column(LongText, nullable=False) + answer = mapped_column(LongText, nullable=True) word_count: Mapped[int] tokens: Mapped[int] @@ -712,14 +724,16 @@ class DocumentSegment(Base): enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true")) disabled_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) disabled_by = mapped_column(StringUUID, nullable=True) - status: Mapped[str] = mapped_column(String(255), server_default=sa.text("'waiting'::character varying")) + status: Mapped[str] = mapped_column(String(255), server_default=sa.text("'waiting'")) created_by = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) updated_by = mapped_column(StringUUID, nullable=True) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) indexing_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) - error = mapped_column(sa.Text, nullable=True) + error = mapped_column(LongText, nullable=True) stopped_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) @property @@ -863,29 +877,27 @@ class ChildChunk(Base): ) # initial fields - id = mapped_column(StringUUID, nullable=False, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4())) tenant_id = mapped_column(StringUUID, nullable=False) dataset_id = mapped_column(StringUUID, nullable=False) document_id = mapped_column(StringUUID, nullable=False) segment_id = mapped_column(StringUUID, nullable=False) position: Mapped[int] = mapped_column(sa.Integer, nullable=False) - content = mapped_column(sa.Text, nullable=False) + content = mapped_column(LongText, nullable=False) word_count: Mapped[int] = mapped_column(sa.Integer, nullable=False) # indexing fields index_node_id = mapped_column(String(255), nullable=True) index_node_hash = mapped_column(String(255), nullable=True) - type = mapped_column(String(255), nullable=False, server_default=sa.text("'automatic'::character varying")) + type = mapped_column(String(255), nullable=False, server_default=sa.text("'automatic'")) created_by = mapped_column(StringUUID, nullable=False) - created_at: Mapped[datetime] = mapped_column( - DateTime, nullable=False, server_default=sa.text("CURRENT_TIMESTAMP(0)") - ) + created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=sa.func.current_timestamp()) updated_by = mapped_column(StringUUID, nullable=True) updated_at: Mapped[datetime] = mapped_column( - DateTime, nullable=False, server_default=sa.text("CURRENT_TIMESTAMP(0)") + DateTime, nullable=False, server_default=sa.func.current_timestamp(), onupdate=func.current_timestamp() ) indexing_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) - error = mapped_column(sa.Text, nullable=True) + error = mapped_column(LongText, nullable=True) @property def dataset(self): @@ -900,52 +912,60 @@ class ChildChunk(Base): return db.session.query(DocumentSegment).where(DocumentSegment.id == self.segment_id).first() -class AppDatasetJoin(Base): +class AppDatasetJoin(TypeBase): __tablename__ = "app_dataset_joins" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="app_dataset_join_pkey"), sa.Index("app_dataset_join_app_dataset_idx", "dataset_id", "app_id"), ) - id = mapped_column(StringUUID, primary_key=True, nullable=False, server_default=sa.text("uuid_generate_v4()")) - app_id = mapped_column(StringUUID, nullable=False) - dataset_id = mapped_column(StringUUID, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=sa.func.current_timestamp()) + id: Mapped[str] = mapped_column( + StringUUID, primary_key=True, nullable=False, default=lambda: str(uuid4()), init=False + ) + app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=sa.func.current_timestamp(), init=False + ) @property def app(self): return db.session.get(App, self.app_id) -class DatasetQuery(Base): +class DatasetQuery(TypeBase): __tablename__ = "dataset_queries" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="dataset_query_pkey"), sa.Index("dataset_query_dataset_id_idx", "dataset_id"), ) - id = mapped_column(StringUUID, primary_key=True, nullable=False, server_default=sa.text("uuid_generate_v4()")) - dataset_id = mapped_column(StringUUID, nullable=False) - content = mapped_column(sa.Text, nullable=False) + id: Mapped[str] = mapped_column( + StringUUID, primary_key=True, nullable=False, default=lambda: str(uuid4()), init=False + ) + dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + content: Mapped[str] = mapped_column(LongText, nullable=False) source: Mapped[str] = mapped_column(String(255), nullable=False) - source_app_id = mapped_column(StringUUID, nullable=True) - created_by_role = mapped_column(String, nullable=False) - created_by = mapped_column(StringUUID, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=sa.func.current_timestamp()) + source_app_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) + created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) + created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=sa.func.current_timestamp(), init=False + ) -class DatasetKeywordTable(Base): +class DatasetKeywordTable(TypeBase): __tablename__ = "dataset_keyword_tables" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="dataset_keyword_table_pkey"), sa.Index("dataset_keyword_table_dataset_id_idx", "dataset_id"), ) - id = mapped_column(StringUUID, primary_key=True, server_default=sa.text("uuid_generate_v4()")) - dataset_id = mapped_column(StringUUID, nullable=False, unique=True) - keyword_table = mapped_column(sa.Text, nullable=False) - data_source_type = mapped_column( - String(255), nullable=False, server_default=sa.text("'database'::character varying") + id: Mapped[str] = mapped_column(StringUUID, primary_key=True, default=lambda: str(uuid4()), init=False) + dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False, unique=True) + keyword_table: Mapped[str] = mapped_column(LongText, nullable=False) + data_source_type: Mapped[str] = mapped_column( + String(255), nullable=False, server_default=sa.text("'database'"), default="database" ) @property @@ -992,14 +1012,12 @@ class Embedding(Base): sa.Index("created_at_idx", "created_at"), ) - id = mapped_column(StringUUID, primary_key=True, server_default=sa.text("uuid_generate_v4()")) - model_name = mapped_column( - String(255), nullable=False, server_default=sa.text("'text-embedding-ada-002'::character varying") - ) + id = mapped_column(StringUUID, primary_key=True, default=lambda: str(uuid4())) + model_name = mapped_column(String(255), nullable=False, server_default=sa.text("'text-embedding-ada-002'")) hash = mapped_column(String(64), nullable=False) - embedding = mapped_column(sa.LargeBinary, nullable=False) + embedding = mapped_column(BinaryData, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - provider_name = mapped_column(String(255), nullable=False, server_default=sa.text("''::character varying")) + provider_name = mapped_column(String(255), nullable=False, server_default=sa.text("''")) def set_embedding(self, embedding_data: list[float]): self.embedding = pickle.dumps(embedding_data, protocol=pickle.HIGHEST_PROTOCOL) @@ -1015,10 +1033,10 @@ class DatasetCollectionBinding(Base): sa.Index("provider_model_name_idx", "provider_name", "model_name"), ) - id = mapped_column(StringUUID, primary_key=True, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, primary_key=True, default=lambda: str(uuid4())) provider_name: Mapped[str] = mapped_column(String(255), nullable=False) model_name: Mapped[str] = mapped_column(String(255), nullable=False) - type = mapped_column(String(40), server_default=sa.text("'dataset'::character varying"), nullable=False) + type = mapped_column(String(40), server_default=sa.text("'dataset'"), nullable=False) collection_name = mapped_column(String(64), nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) @@ -1032,30 +1050,32 @@ class TidbAuthBinding(Base): sa.Index("tidb_auth_bindings_created_at_idx", "created_at"), sa.Index("tidb_auth_bindings_status_idx", "status"), ) - id = mapped_column(StringUUID, primary_key=True, server_default=sa.text("uuid_generate_v4()")) - tenant_id = mapped_column(StringUUID, nullable=True) + id: Mapped[str] = mapped_column(StringUUID, primary_key=True, default=lambda: str(uuid4())) + tenant_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) cluster_id: Mapped[str] = mapped_column(String(255), nullable=False) cluster_name: Mapped[str] = mapped_column(String(255), nullable=False) - active: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=db.text("false")) - status = mapped_column(String(255), nullable=False, server_default=db.text("'CREATING'::character varying")) + active: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) + status: Mapped[str] = mapped_column(sa.String(255), nullable=False, server_default=sa.text("'CREATING'")) account: Mapped[str] = mapped_column(String(255), nullable=False) password: Mapped[str] = mapped_column(String(255), nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) -class Whitelist(Base): +class Whitelist(TypeBase): __tablename__ = "whitelists" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="whitelists_pkey"), sa.Index("whitelists_tenant_idx", "tenant_id"), ) - id = mapped_column(StringUUID, primary_key=True, server_default=sa.text("uuid_generate_v4()")) - tenant_id = mapped_column(StringUUID, nullable=True) + id: Mapped[str] = mapped_column(StringUUID, primary_key=True, default=lambda: str(uuid4()), init=False) + tenant_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) category: Mapped[str] = mapped_column(String(255), nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) -class DatasetPermission(Base): +class DatasetPermission(TypeBase): __tablename__ = "dataset_permissions" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="dataset_permission_pkey"), @@ -1064,15 +1084,19 @@ class DatasetPermission(Base): sa.Index("idx_dataset_permissions_tenant_id", "tenant_id"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), primary_key=True) - dataset_id = mapped_column(StringUUID, nullable=False) - account_id = mapped_column(StringUUID, nullable=False) - tenant_id = mapped_column(StringUUID, nullable=False) - has_permission: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true")) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), primary_key=True, init=False) + dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + account_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + has_permission: Mapped[bool] = mapped_column( + sa.Boolean, nullable=False, server_default=sa.text("true"), default=True + ) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) -class ExternalKnowledgeApis(Base): +class ExternalKnowledgeApis(TypeBase): __tablename__ = "external_knowledge_apis" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="external_knowledge_apis_pkey"), @@ -1080,15 +1104,19 @@ class ExternalKnowledgeApis(Base): sa.Index("external_knowledge_apis_name_idx", "name"), ) - id = mapped_column(StringUUID, nullable=False, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4()), init=False) name: Mapped[str] = mapped_column(String(255), nullable=False) description: Mapped[str] = mapped_column(String(255), nullable=False) - tenant_id = mapped_column(StringUUID, nullable=False) - settings = mapped_column(sa.Text, nullable=True) - created_by = mapped_column(StringUUID, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_by = mapped_column(StringUUID, nullable=True) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + settings: Mapped[str | None] = mapped_column(LongText, nullable=True) + created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False + ) def to_dict(self) -> dict[str, Any]: return { @@ -1123,7 +1151,7 @@ class ExternalKnowledgeApis(Base): return dataset_bindings -class ExternalKnowledgeBindings(Base): +class ExternalKnowledgeBindings(TypeBase): __tablename__ = "external_knowledge_bindings" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="external_knowledge_bindings_pkey"), @@ -1133,15 +1161,19 @@ class ExternalKnowledgeBindings(Base): sa.Index("external_knowledge_bindings_external_knowledge_api_idx", "external_knowledge_api_id"), ) - id = mapped_column(StringUUID, nullable=False, server_default=sa.text("uuid_generate_v4()")) - tenant_id = mapped_column(StringUUID, nullable=False) - external_knowledge_api_id = mapped_column(StringUUID, nullable=False) - dataset_id = mapped_column(StringUUID, nullable=False) - external_knowledge_id = mapped_column(sa.Text, nullable=False) - created_by = mapped_column(StringUUID, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_by = mapped_column(StringUUID, nullable=True) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + id: Mapped[str] = mapped_column(StringUUID, nullable=False, default=lambda: str(uuid4()), init=False) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + external_knowledge_api_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + external_knowledge_id: Mapped[str] = mapped_column(String(512), nullable=False) + created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None, init=False) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False + ) class DatasetAutoDisableLog(Base): @@ -1153,17 +1185,15 @@ class DatasetAutoDisableLog(Base): sa.Index("dataset_auto_disable_log_created_atx", "created_at"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id = mapped_column(StringUUID, nullable=False) dataset_id = mapped_column(StringUUID, nullable=False) document_id = mapped_column(StringUUID, nullable=False) notified: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) - created_at: Mapped[datetime] = mapped_column( - DateTime, nullable=False, server_default=sa.text("CURRENT_TIMESTAMP(0)") - ) + created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=sa.func.current_timestamp()) -class RateLimitLog(Base): +class RateLimitLog(TypeBase): __tablename__ = "rate_limit_logs" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="rate_limit_log_pkey"), @@ -1171,12 +1201,12 @@ class RateLimitLog(Base): sa.Index("rate_limit_log_operation_idx", "operation"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) - tenant_id = mapped_column(StringUUID, nullable=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) subscription_plan: Mapped[str] = mapped_column(String(255), nullable=False) operation: Mapped[str] = mapped_column(String(255), nullable=False) created_at: Mapped[datetime] = mapped_column( - DateTime, nullable=False, server_default=sa.text("CURRENT_TIMESTAMP(0)") + DateTime, nullable=False, server_default=func.current_timestamp(), init=False ) @@ -1188,16 +1218,14 @@ class DatasetMetadata(Base): sa.Index("dataset_metadata_dataset_idx", "dataset_id"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id = mapped_column(StringUUID, nullable=False) dataset_id = mapped_column(StringUUID, nullable=False) type: Mapped[str] = mapped_column(String(255), nullable=False) name: Mapped[str] = mapped_column(String(255), nullable=False) - created_at: Mapped[datetime] = mapped_column( - DateTime, nullable=False, server_default=sa.text("CURRENT_TIMESTAMP(0)") - ) + created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=sa.func.current_timestamp()) updated_at: Mapped[datetime] = mapped_column( - DateTime, nullable=False, server_default=sa.text("CURRENT_TIMESTAMP(0)") + DateTime, nullable=False, server_default=sa.func.current_timestamp(), onupdate=func.current_timestamp() ) created_by = mapped_column(StringUUID, nullable=False) updated_by = mapped_column(StringUUID, nullable=True) @@ -1213,7 +1241,7 @@ class DatasetMetadataBinding(Base): sa.Index("dataset_metadata_binding_document_idx", "document_id"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id = mapped_column(StringUUID, nullable=False) dataset_id = mapped_column(StringUUID, nullable=False) metadata_id = mapped_column(StringUUID, nullable=False) @@ -1222,46 +1250,62 @@ class DatasetMetadataBinding(Base): created_by = mapped_column(StringUUID, nullable=False) -class PipelineBuiltInTemplate(Base): # type: ignore[name-defined] +class PipelineBuiltInTemplate(TypeBase): __tablename__ = "pipeline_built_in_templates" - __table_args__ = (db.PrimaryKeyConstraint("id", name="pipeline_built_in_template_pkey"),) + __table_args__ = (sa.PrimaryKeyConstraint("id", name="pipeline_built_in_template_pkey"),) - id = mapped_column(StringUUID, server_default=db.text("uuidv7()")) - name = mapped_column(db.String(255), nullable=False) - description = mapped_column(sa.Text, nullable=False) - chunk_structure = mapped_column(db.String(255), nullable=False) - icon = mapped_column(sa.JSON, nullable=False) - yaml_content = mapped_column(sa.Text, nullable=False) - copyright = mapped_column(db.String(255), nullable=False) - privacy_policy = mapped_column(db.String(255), nullable=False) - position = mapped_column(sa.Integer, nullable=False) - install_count = mapped_column(sa.Integer, nullable=False, default=0) - language = mapped_column(db.String(255), nullable=False) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - - -class PipelineCustomizedTemplate(Base): # type: ignore[name-defined] - __tablename__ = "pipeline_customized_templates" - __table_args__ = ( - db.PrimaryKeyConstraint("id", name="pipeline_customized_template_pkey"), - db.Index("pipeline_customized_template_tenant_idx", "tenant_id"), + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()), init=False) + name: Mapped[str] = mapped_column(sa.String(255), nullable=False) + description: Mapped[str] = mapped_column(LongText, nullable=False) + chunk_structure: Mapped[str] = mapped_column(sa.String(255), nullable=False) + icon: Mapped[dict] = mapped_column(sa.JSON, nullable=False) + yaml_content: Mapped[str] = mapped_column(LongText, nullable=False) + copyright: Mapped[str] = mapped_column(sa.String(255), nullable=False) + privacy_policy: Mapped[str] = mapped_column(sa.String(255), nullable=False) + position: Mapped[int] = mapped_column(sa.Integer, nullable=False) + install_count: Mapped[int] = mapped_column(sa.Integer, nullable=False) + language: Mapped[str] = mapped_column(sa.String(255), nullable=False) + created_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_at: Mapped[datetime] = mapped_column( + sa.DateTime, + nullable=False, + server_default=func.current_timestamp(), + onupdate=func.current_timestamp(), + init=False, ) - id = mapped_column(StringUUID, server_default=db.text("uuidv7()")) - tenant_id = mapped_column(StringUUID, nullable=False) - name = mapped_column(db.String(255), nullable=False) - description = mapped_column(sa.Text, nullable=False) - chunk_structure = mapped_column(db.String(255), nullable=False) - icon = mapped_column(sa.JSON, nullable=False) - position = mapped_column(sa.Integer, nullable=False) - yaml_content = mapped_column(sa.Text, nullable=False) - install_count = mapped_column(sa.Integer, nullable=False, default=0) - language = mapped_column(db.String(255), nullable=False) - created_by = mapped_column(StringUUID, nullable=False) - updated_by = mapped_column(StringUUID, nullable=True) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + +class PipelineCustomizedTemplate(TypeBase): + __tablename__ = "pipeline_customized_templates" + __table_args__ = ( + sa.PrimaryKeyConstraint("id", name="pipeline_customized_template_pkey"), + sa.Index("pipeline_customized_template_tenant_idx", "tenant_id"), + ) + + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()), init=False) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + name: Mapped[str] = mapped_column(sa.String(255), nullable=False) + description: Mapped[str] = mapped_column(LongText, nullable=False) + chunk_structure: Mapped[str] = mapped_column(sa.String(255), nullable=False) + icon: Mapped[dict] = mapped_column(sa.JSON, nullable=False) + position: Mapped[int] = mapped_column(sa.Integer, nullable=False) + yaml_content: Mapped[str] = mapped_column(LongText, nullable=False) + install_count: Mapped[int] = mapped_column(sa.Integer, nullable=False) + language: Mapped[str] = mapped_column(sa.String(255), nullable=False) + created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) + updated_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None, init=False) + created_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_at: Mapped[datetime] = mapped_column( + sa.DateTime, + nullable=False, + server_default=func.current_timestamp(), + onupdate=func.current_timestamp(), + init=False, + ) @property def created_user_name(self): @@ -1273,50 +1317,62 @@ class PipelineCustomizedTemplate(Base): # type: ignore[name-defined] class Pipeline(Base): # type: ignore[name-defined] __tablename__ = "pipelines" - __table_args__ = (db.PrimaryKeyConstraint("id", name="pipeline_pkey"),) + __table_args__ = (sa.PrimaryKeyConstraint("id", name="pipeline_pkey"),) - id = mapped_column(StringUUID, server_default=db.text("uuidv7()")) + id = mapped_column(StringUUID, default=lambda: str(uuidv7())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - name = mapped_column(db.String(255), nullable=False) - description = mapped_column(sa.Text, nullable=False, server_default=db.text("''::character varying")) + name = mapped_column(sa.String(255), nullable=False) + description = mapped_column(LongText, nullable=False, default=sa.text("''")) workflow_id = mapped_column(StringUUID, nullable=True) - is_public = mapped_column(sa.Boolean, nullable=False, server_default=db.text("false")) - is_published = mapped_column(sa.Boolean, nullable=False, server_default=db.text("false")) + is_public = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) + is_published = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) created_by = mapped_column(StringUUID, nullable=True) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) updated_by = mapped_column(StringUUID, nullable=True) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) def retrieve_dataset(self, session: Session): return session.query(Dataset).where(Dataset.pipeline_id == self.id).first() -class DocumentPipelineExecutionLog(Base): +class DocumentPipelineExecutionLog(TypeBase): __tablename__ = "document_pipeline_execution_logs" __table_args__ = ( - db.PrimaryKeyConstraint("id", name="document_pipeline_execution_log_pkey"), - db.Index("document_pipeline_execution_logs_document_id_idx", "document_id"), + sa.PrimaryKeyConstraint("id", name="document_pipeline_execution_log_pkey"), + sa.Index("document_pipeline_execution_logs_document_id_idx", "document_id"), ) - id = mapped_column(StringUUID, server_default=db.text("uuidv7()")) - pipeline_id = mapped_column(StringUUID, nullable=False) - document_id = mapped_column(StringUUID, nullable=False) - datasource_type = mapped_column(db.String(255), nullable=False) - datasource_info = mapped_column(sa.Text, nullable=False) - datasource_node_id = mapped_column(db.String(255), nullable=False) - input_data = mapped_column(sa.JSON, nullable=False) - created_by = mapped_column(StringUUID, nullable=True) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()), init=False) + pipeline_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + document_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + datasource_type: Mapped[str] = mapped_column(sa.String(255), nullable=False) + datasource_info: Mapped[str] = mapped_column(LongText, nullable=False) + datasource_node_id: Mapped[str] = mapped_column(sa.String(255), nullable=False) + input_data: Mapped[dict] = mapped_column(sa.JSON, nullable=False) + created_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True) + created_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) -class PipelineRecommendedPlugin(Base): +class PipelineRecommendedPlugin(TypeBase): __tablename__ = "pipeline_recommended_plugins" - __table_args__ = (db.PrimaryKeyConstraint("id", name="pipeline_recommended_plugin_pkey"),) + __table_args__ = (sa.PrimaryKeyConstraint("id", name="pipeline_recommended_plugin_pkey"),) - id = mapped_column(StringUUID, server_default=db.text("uuidv7()")) - plugin_id = mapped_column(sa.Text, nullable=False) - provider_name = mapped_column(sa.Text, nullable=False) - position = mapped_column(sa.Integer, nullable=False, default=0) - active = mapped_column(sa.Boolean, nullable=False, default=True) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()), init=False) + plugin_id: Mapped[str] = mapped_column(LongText, nullable=False) + provider_name: Mapped[str] = mapped_column(LongText, nullable=False) + position: Mapped[int] = mapped_column(sa.Integer, nullable=False, default=0) + active: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, default=True) + created_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_at: Mapped[datetime] = mapped_column( + sa.DateTime, + nullable=False, + server_default=func.current_timestamp(), + onupdate=func.current_timestamp(), + init=False, + ) diff --git a/api/models/enums.py b/api/models/enums.py index da35a34123..8cd3d4cf2a 100644 --- a/api/models/enums.py +++ b/api/models/enums.py @@ -64,6 +64,7 @@ class AppTriggerStatus(StrEnum): ENABLED = "enabled" DISABLED = "disabled" UNAUTHORIZED = "unauthorized" + RATE_LIMITED = "rate_limited" class AppTriggerType(StrEnum): @@ -72,3 +73,6 @@ class AppTriggerType(StrEnum): TRIGGER_WEBHOOK = NodeType.TRIGGER_WEBHOOK.value TRIGGER_SCHEDULE = NodeType.TRIGGER_SCHEDULE.value TRIGGER_PLUGIN = NodeType.TRIGGER_PLUGIN.value + + # for backward compatibility + UNKNOWN = "unknown" diff --git a/api/models/model.py b/api/models/model.py index 8a8574e2fe..b0bf46e7d7 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -3,8 +3,10 @@ import re import uuid from collections.abc import Mapping from datetime import datetime +from decimal import Decimal from enum import StrEnum, auto from typing import TYPE_CHECKING, Any, Literal, Optional, cast +from uuid import uuid4 import sqlalchemy as sa from flask import request @@ -19,24 +21,27 @@ from core.file import helpers as file_helpers from core.tools.signature import sign_tool_file from core.workflow.enums import WorkflowExecutionStatus from libs.helper import generate_string # type: ignore[import-not-found] +from libs.uuid_utils import uuidv7 from .account import Account, Tenant -from .base import Base +from .base import Base, TypeBase from .engine import db from .enums import CreatorUserRole from .provider_ids import GenericProviderID -from .types import StringUUID +from .types import LongText, StringUUID if TYPE_CHECKING: - from models.workflow import Workflow + from .workflow import Workflow -class DifySetup(Base): +class DifySetup(TypeBase): __tablename__ = "dify_setups" __table_args__ = (sa.PrimaryKeyConstraint("version", name="dify_setup_pkey"),) version: Mapped[str] = mapped_column(String(255), nullable=False) - setup_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + setup_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) class AppMode(StrEnum): @@ -71,17 +76,17 @@ class App(Base): __tablename__ = "apps" __table_args__ = (sa.PrimaryKeyConstraint("id", name="app_pkey"), sa.Index("app_tenant_id_idx", "tenant_id")) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id: Mapped[str] = mapped_column(StringUUID) name: Mapped[str] = mapped_column(String(255)) - description: Mapped[str] = mapped_column(sa.Text, server_default=sa.text("''::character varying")) + description: Mapped[str] = mapped_column(LongText, default=sa.text("''")) mode: Mapped[str] = mapped_column(String(255)) icon_type: Mapped[str | None] = mapped_column(String(255)) # image, emoji icon = mapped_column(String(255)) icon_background: Mapped[str | None] = mapped_column(String(255)) app_model_config_id = mapped_column(StringUUID, nullable=True) workflow_id = mapped_column(StringUUID, nullable=True) - status: Mapped[str] = mapped_column(String(255), server_default=sa.text("'normal'::character varying")) + status: Mapped[str] = mapped_column(String(255), server_default=sa.text("'normal'")) enable_site: Mapped[bool] = mapped_column(sa.Boolean) enable_api: Mapped[bool] = mapped_column(sa.Boolean) api_rpm: Mapped[int] = mapped_column(sa.Integer, server_default=sa.text("0")) @@ -89,12 +94,14 @@ class App(Base): is_demo: Mapped[bool] = mapped_column(sa.Boolean, server_default=sa.text("false")) is_public: Mapped[bool] = mapped_column(sa.Boolean, server_default=sa.text("false")) is_universal: Mapped[bool] = mapped_column(sa.Boolean, server_default=sa.text("false")) - tracing = mapped_column(sa.Text, nullable=True) + tracing = mapped_column(LongText, nullable=True) max_active_requests: Mapped[int | None] created_by = mapped_column(StringUUID, nullable=True) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) updated_by = mapped_column(StringUUID, nullable=True) - updated_at: Mapped[datetime] = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) use_icon_as_answer_icon: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) @property @@ -305,7 +312,7 @@ class AppModelConfig(Base): __tablename__ = "app_model_configs" __table_args__ = (sa.PrimaryKeyConstraint("id", name="app_model_config_pkey"), sa.Index("app_app_id_idx", "app_id")) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, default=lambda: str(uuid4())) app_id = mapped_column(StringUUID, nullable=False) provider = mapped_column(String(255), nullable=True) model_id = mapped_column(String(255), nullable=True) @@ -313,26 +320,28 @@ class AppModelConfig(Base): created_by = mapped_column(StringUUID, nullable=True) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) updated_by = mapped_column(StringUUID, nullable=True) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - opening_statement = mapped_column(sa.Text) - suggested_questions = mapped_column(sa.Text) - suggested_questions_after_answer = mapped_column(sa.Text) - speech_to_text = mapped_column(sa.Text) - text_to_speech = mapped_column(sa.Text) - more_like_this = mapped_column(sa.Text) - model = mapped_column(sa.Text) - user_input_form = mapped_column(sa.Text) + updated_at = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) + opening_statement = mapped_column(LongText) + suggested_questions = mapped_column(LongText) + suggested_questions_after_answer = mapped_column(LongText) + speech_to_text = mapped_column(LongText) + text_to_speech = mapped_column(LongText) + more_like_this = mapped_column(LongText) + model = mapped_column(LongText) + user_input_form = mapped_column(LongText) dataset_query_variable = mapped_column(String(255)) - pre_prompt = mapped_column(sa.Text) - agent_mode = mapped_column(sa.Text) - sensitive_word_avoidance = mapped_column(sa.Text) - retriever_resource = mapped_column(sa.Text) - prompt_type = mapped_column(String(255), nullable=False, server_default=sa.text("'simple'::character varying")) - chat_prompt_config = mapped_column(sa.Text) - completion_prompt_config = mapped_column(sa.Text) - dataset_configs = mapped_column(sa.Text) - external_data_tools = mapped_column(sa.Text) - file_upload = mapped_column(sa.Text) + pre_prompt = mapped_column(LongText) + agent_mode = mapped_column(LongText) + sensitive_word_avoidance = mapped_column(LongText) + retriever_resource = mapped_column(LongText) + prompt_type = mapped_column(String(255), nullable=False, server_default=sa.text("'simple'")) + chat_prompt_config = mapped_column(LongText) + completion_prompt_config = mapped_column(LongText) + dataset_configs = mapped_column(LongText) + external_data_tools = mapped_column(LongText) + file_upload = mapped_column(LongText) @property def app(self) -> App | None: @@ -532,19 +541,21 @@ class RecommendedApp(Base): sa.Index("recommended_app_is_listed_idx", "is_listed", "language"), ) - id = mapped_column(StringUUID, primary_key=True, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, primary_key=True, default=lambda: str(uuid4())) app_id = mapped_column(StringUUID, nullable=False) description = mapped_column(sa.JSON, nullable=False) copyright: Mapped[str] = mapped_column(String(255), nullable=False) privacy_policy: Mapped[str] = mapped_column(String(255), nullable=False) - custom_disclaimer: Mapped[str] = mapped_column(sa.TEXT, default="") + custom_disclaimer: Mapped[str] = mapped_column(LongText, default="") category: Mapped[str] = mapped_column(String(255), nullable=False) position: Mapped[int] = mapped_column(sa.Integer, nullable=False, default=0) is_listed: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, default=True) install_count: Mapped[int] = mapped_column(sa.Integer, nullable=False, default=0) - language = mapped_column(String(255), nullable=False, server_default=sa.text("'en-US'::character varying")) + language = mapped_column(String(255), nullable=False, server_default=sa.text("'en-US'")) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) @property def app(self) -> App | None: @@ -552,7 +563,7 @@ class RecommendedApp(Base): return app -class InstalledApp(Base): +class InstalledApp(TypeBase): __tablename__ = "installed_apps" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="installed_app_pkey"), @@ -561,14 +572,16 @@ class InstalledApp(Base): sa.UniqueConstraint("tenant_id", "app_id", name="unique_tenant_app"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) - tenant_id = mapped_column(StringUUID, nullable=False) - app_id = mapped_column(StringUUID, nullable=False) - app_owner_tenant_id = mapped_column(StringUUID, nullable=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + app_owner_tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) position: Mapped[int] = mapped_column(sa.Integer, nullable=False, default=0) - is_pinned: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) - last_used_at = mapped_column(sa.DateTime, nullable=True) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + is_pinned: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false"), default=False) + last_used_at: Mapped[datetime | None] = mapped_column(sa.DateTime, nullable=True, default=None) + created_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) @property def app(self) -> App | None: @@ -593,18 +606,18 @@ class OAuthProviderApp(Base): sa.Index("oauth_provider_app_client_id_idx", "client_id"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuidv7()")) + id = mapped_column(StringUUID, default=lambda: str(uuidv7())) app_icon = mapped_column(String(255), nullable=False) - app_label = mapped_column(sa.JSON, nullable=False, server_default="{}") + app_label = mapped_column(sa.JSON, nullable=False, default="{}") client_id = mapped_column(String(255), nullable=False) client_secret = mapped_column(String(255), nullable=False) - redirect_uris = mapped_column(sa.JSON, nullable=False, server_default="[]") + redirect_uris = mapped_column(sa.JSON, nullable=False, default="[]") scope = mapped_column( String(255), nullable=False, server_default=sa.text("'read:name read:email read:avatar read:interface_language read:timezone'"), ) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=sa.text("CURRENT_TIMESTAMP(0)")) + created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) class Conversation(Base): @@ -614,18 +627,18 @@ class Conversation(Base): sa.Index("conversation_app_from_user_idx", "app_id", "from_source", "from_end_user_id"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) app_id = mapped_column(StringUUID, nullable=False) app_model_config_id = mapped_column(StringUUID, nullable=True) model_provider = mapped_column(String(255), nullable=True) - override_model_configs = mapped_column(sa.Text) + override_model_configs = mapped_column(LongText) model_id = mapped_column(String(255), nullable=True) mode: Mapped[str] = mapped_column(String(255)) name: Mapped[str] = mapped_column(String(255), nullable=False) - summary = mapped_column(sa.Text) + summary = mapped_column(LongText) _inputs: Mapped[dict[str, Any]] = mapped_column("inputs", sa.JSON) - introduction = mapped_column(sa.Text) - system_instruction = mapped_column(sa.Text) + introduction = mapped_column(LongText) + system_instruction = mapped_column(LongText) system_instruction_tokens: Mapped[int] = mapped_column(sa.Integer, nullable=False, server_default=sa.text("0")) status: Mapped[str] = mapped_column(String(255), nullable=False) @@ -643,7 +656,9 @@ class Conversation(Base): read_account_id = mapped_column(StringUUID) dialogue_count: Mapped[int] = mapped_column(default=0) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) messages = db.relationship("Message", backref="conversation", lazy="select", passive_deletes="all") message_annotations = db.relationship( @@ -913,35 +928,41 @@ class Message(Base): Index("message_app_mode_idx", "app_mode"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) - app_id = mapped_column(StringUUID, nullable=False) - model_provider = mapped_column(String(255), nullable=True) - model_id = mapped_column(String(255), nullable=True) - override_model_configs = mapped_column(sa.Text) - conversation_id = mapped_column(StringUUID, sa.ForeignKey("conversations.id"), nullable=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) + app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + model_provider: Mapped[str | None] = mapped_column(String(255), nullable=True) + model_id: Mapped[str | None] = mapped_column(String(255), nullable=True) + override_model_configs: Mapped[str | None] = mapped_column(LongText) + conversation_id: Mapped[str] = mapped_column(StringUUID, sa.ForeignKey("conversations.id"), nullable=False) _inputs: Mapped[dict[str, Any]] = mapped_column("inputs", sa.JSON) - query: Mapped[str] = mapped_column(sa.Text, nullable=False) - message = mapped_column(sa.JSON, nullable=False) + query: Mapped[str] = mapped_column(LongText, nullable=False) + message: Mapped[dict[str, Any]] = mapped_column(sa.JSON, nullable=False) message_tokens: Mapped[int] = mapped_column(sa.Integer, nullable=False, server_default=sa.text("0")) - message_unit_price = mapped_column(sa.Numeric(10, 4), nullable=False) - message_price_unit = mapped_column(sa.Numeric(10, 7), nullable=False, server_default=sa.text("0.001")) - answer: Mapped[str] = mapped_column(sa.Text, nullable=False) + message_unit_price: Mapped[Decimal] = mapped_column(sa.Numeric(10, 4), nullable=False) + message_price_unit: Mapped[Decimal] = mapped_column( + sa.Numeric(10, 7), nullable=False, server_default=sa.text("0.001") + ) + answer: Mapped[str] = mapped_column(LongText, nullable=False) answer_tokens: Mapped[int] = mapped_column(sa.Integer, nullable=False, server_default=sa.text("0")) - answer_unit_price = mapped_column(sa.Numeric(10, 4), nullable=False) - answer_price_unit = mapped_column(sa.Numeric(10, 7), nullable=False, server_default=sa.text("0.001")) - parent_message_id = mapped_column(StringUUID, nullable=True) - provider_response_latency = mapped_column(sa.Float, nullable=False, server_default=sa.text("0")) - total_price = mapped_column(sa.Numeric(10, 7)) + answer_unit_price: Mapped[Decimal] = mapped_column(sa.Numeric(10, 4), nullable=False) + answer_price_unit: Mapped[Decimal] = mapped_column( + sa.Numeric(10, 7), nullable=False, server_default=sa.text("0.001") + ) + parent_message_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) + provider_response_latency: Mapped[float] = mapped_column(sa.Float, nullable=False, server_default=sa.text("0")) + total_price: Mapped[Decimal | None] = mapped_column(sa.Numeric(10, 7)) currency: Mapped[str] = mapped_column(String(255), nullable=False) - status = mapped_column(String(255), nullable=False, server_default=sa.text("'normal'::character varying")) - error = mapped_column(sa.Text) - message_metadata = mapped_column(sa.Text) + status: Mapped[str] = mapped_column(String(255), nullable=False, server_default=sa.text("'normal'")) + error: Mapped[str | None] = mapped_column(LongText) + message_metadata: Mapped[str | None] = mapped_column(LongText) invoke_from: Mapped[str | None] = mapped_column(String(255), nullable=True) from_source: Mapped[str] = mapped_column(String(255), nullable=False) from_end_user_id: Mapped[str | None] = mapped_column(StringUUID) from_account_id: Mapped[str | None] = mapped_column(StringUUID) created_at: Mapped[datetime] = mapped_column(sa.DateTime, server_default=func.current_timestamp()) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) agent_based: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) workflow_run_id: Mapped[str | None] = mapped_column(StringUUID) app_mode: Mapped[str | None] = mapped_column(String(255), nullable=True) @@ -1212,9 +1233,13 @@ class Message(Base): @property def workflow_run(self): if self.workflow_run_id: - from .workflow import WorkflowRun + from sqlalchemy.orm import sessionmaker - return db.session.query(WorkflowRun).where(WorkflowRun.id == self.workflow_run_id).first() + from repositories.factory import DifyAPIRepositoryFactory + + session_maker = sessionmaker(bind=db.engine, expire_on_commit=False) + repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker) + return repo.get_workflow_run_by_id_without_tenant(run_id=self.workflow_run_id) return None @@ -1275,20 +1300,22 @@ class MessageFeedback(Base): sa.Index("message_feedback_conversation_idx", "conversation_id", "from_source", "rating"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) - app_id = mapped_column(StringUUID, nullable=False) - conversation_id = mapped_column(StringUUID, nullable=False) - message_id = mapped_column(StringUUID, nullable=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) + app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + conversation_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + message_id: Mapped[str] = mapped_column(StringUUID, nullable=False) rating: Mapped[str] = mapped_column(String(255), nullable=False) - content = mapped_column(sa.Text) + content: Mapped[str | None] = mapped_column(LongText) from_source: Mapped[str] = mapped_column(String(255), nullable=False) - from_end_user_id = mapped_column(StringUUID) - from_account_id = mapped_column(StringUUID) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + from_end_user_id: Mapped[str | None] = mapped_column(StringUUID) + from_account_id: Mapped[str | None] = mapped_column(StringUUID) + created_at: Mapped[datetime] = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) @property - def from_account(self): + def from_account(self) -> Account | None: account = db.session.query(Account).where(Account.id == self.from_account_id).first() return account @@ -1337,11 +1364,11 @@ class MessageFile(Base): self.created_by_role = created_by_role.value self.created_by = created_by - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) message_id: Mapped[str] = mapped_column(StringUUID, nullable=False) type: Mapped[str] = mapped_column(String(255), nullable=False) transfer_method: Mapped[str] = mapped_column(String(255), nullable=False) - url: Mapped[str | None] = mapped_column(sa.Text, nullable=True) + url: Mapped[str | None] = mapped_column(LongText, nullable=True) belongs_to: Mapped[str | None] = mapped_column(String(255), nullable=True) upload_file_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) @@ -1358,16 +1385,18 @@ class MessageAnnotation(Base): sa.Index("message_annotation_message_idx", "message_id"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) app_id: Mapped[str] = mapped_column(StringUUID) conversation_id: Mapped[str | None] = mapped_column(StringUUID, sa.ForeignKey("conversations.id")) message_id: Mapped[str | None] = mapped_column(StringUUID) - question = mapped_column(sa.Text, nullable=True) - content = mapped_column(sa.Text, nullable=False) + question = mapped_column(LongText, nullable=True) + content = mapped_column(LongText, nullable=False) hit_count: Mapped[int] = mapped_column(sa.Integer, nullable=False, server_default=sa.text("0")) account_id = mapped_column(StringUUID, nullable=False) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) @property def account(self): @@ -1390,17 +1419,17 @@ class AppAnnotationHitHistory(Base): sa.Index("app_annotation_hit_histories_message_idx", "message_id"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, default=lambda: str(uuid4())) app_id = mapped_column(StringUUID, nullable=False) annotation_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - source = mapped_column(sa.Text, nullable=False) - question = mapped_column(sa.Text, nullable=False) + source = mapped_column(LongText, nullable=False) + question = mapped_column(LongText, nullable=False) account_id = mapped_column(StringUUID, nullable=False) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) score = mapped_column(Float, nullable=False, server_default=sa.text("0")) message_id = mapped_column(StringUUID, nullable=False) - annotation_question = mapped_column(sa.Text, nullable=False) - annotation_content = mapped_column(sa.Text, nullable=False) + annotation_question = mapped_column(LongText, nullable=False) + annotation_content = mapped_column(LongText, nullable=False) @property def account(self): @@ -1425,14 +1454,16 @@ class AppAnnotationSetting(Base): sa.Index("app_annotation_settings_app_idx", "app_id"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, default=lambda: str(uuid4())) app_id = mapped_column(StringUUID, nullable=False) score_threshold = mapped_column(Float, nullable=False, server_default=sa.text("0")) collection_binding_id = mapped_column(StringUUID, nullable=False) created_user_id = mapped_column(StringUUID, nullable=False) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) updated_user_id = mapped_column(StringUUID, nullable=False) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) @property def collection_binding_detail(self): @@ -1453,14 +1484,16 @@ class OperationLog(Base): sa.Index("operation_log_account_action_idx", "tenant_id", "account_id", "action"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id = mapped_column(StringUUID, nullable=False) account_id = mapped_column(StringUUID, nullable=False) action: Mapped[str] = mapped_column(String(255), nullable=False) content = mapped_column(sa.JSON) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) created_ip: Mapped[str] = mapped_column(String(255), nullable=False) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) class DefaultEndUserSessionID(StrEnum): @@ -1479,7 +1512,7 @@ class EndUser(Base, UserMixin): sa.Index("end_user_tenant_session_id_idx", "tenant_id", "session_id", "type"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) app_id = mapped_column(StringUUID, nullable=True) type: Mapped[str] = mapped_column(String(255), nullable=False) @@ -1497,29 +1530,39 @@ class EndUser(Base, UserMixin): def is_anonymous(self, value: bool) -> None: self._is_anonymous = value - session_id: Mapped[str] = mapped_column() + session_id: Mapped[str] = mapped_column(String(255), nullable=False) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) -class AppMCPServer(Base): +class AppMCPServer(TypeBase): __tablename__ = "app_mcp_servers" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="app_mcp_server_pkey"), sa.UniqueConstraint("tenant_id", "app_id", name="unique_app_mcp_server_tenant_app_id"), sa.UniqueConstraint("server_code", name="unique_app_mcp_server_server_code"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) - tenant_id = mapped_column(StringUUID, nullable=False) - app_id = mapped_column(StringUUID, nullable=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) name: Mapped[str] = mapped_column(String(255), nullable=False) description: Mapped[str] = mapped_column(String(255), nullable=False) server_code: Mapped[str] = mapped_column(String(255), nullable=False) - status = mapped_column(String(255), nullable=False, server_default=sa.text("'normal'::character varying")) - parameters = mapped_column(sa.Text, nullable=False) + status: Mapped[str] = mapped_column(String(255), nullable=False, server_default=sa.text("'normal'")) + parameters: Mapped[str] = mapped_column(LongText, nullable=False) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_at: Mapped[datetime] = mapped_column( + sa.DateTime, + nullable=False, + server_default=func.current_timestamp(), + onupdate=func.current_timestamp(), + init=False, + ) @staticmethod def generate_server_code(n: int) -> str: @@ -1543,13 +1586,13 @@ class Site(Base): sa.Index("site_code_idx", "code", "status"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, default=lambda: str(uuid4())) app_id = mapped_column(StringUUID, nullable=False) title: Mapped[str] = mapped_column(String(255), nullable=False) icon_type = mapped_column(String(255), nullable=True) icon = mapped_column(String(255)) icon_background = mapped_column(String(255)) - description = mapped_column(sa.Text) + description = mapped_column(LongText) default_language: Mapped[str] = mapped_column(String(255), nullable=False) chat_color_theme = mapped_column(String(255)) chat_color_theme_inverted: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) @@ -1557,15 +1600,17 @@ class Site(Base): privacy_policy = mapped_column(String(255)) show_workflow_steps: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true")) use_icon_as_answer_icon: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) - _custom_disclaimer: Mapped[str] = mapped_column("custom_disclaimer", sa.TEXT, default="") + _custom_disclaimer: Mapped[str] = mapped_column("custom_disclaimer", LongText, default="") customize_domain = mapped_column(String(255)) customize_token_strategy: Mapped[str] = mapped_column(String(255), nullable=False) prompt_public: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) - status = mapped_column(String(255), nullable=False, server_default=sa.text("'normal'::character varying")) + status = mapped_column(String(255), nullable=False, server_default=sa.text("'normal'")) created_by = mapped_column(StringUUID, nullable=True) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) updated_by = mapped_column(StringUUID, nullable=True) - updated_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) code = mapped_column(String(255)) @property @@ -1601,7 +1646,7 @@ class ApiToken(Base): sa.Index("api_token_tenant_idx", "tenant_id", "type"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, default=lambda: str(uuid4())) app_id = mapped_column(StringUUID, nullable=True) tenant_id = mapped_column(StringUUID, nullable=True) type = mapped_column(String(16), nullable=False) @@ -1628,7 +1673,7 @@ class UploadFile(Base): # NOTE: The `id` field is generated within the application to minimize extra roundtrips # (especially when generating `source_url`). # The `server_default` serves as a fallback mechanism. - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) storage_type: Mapped[str] = mapped_column(String(255), nullable=False) key: Mapped[str] = mapped_column(String(255), nullable=False) @@ -1639,9 +1684,7 @@ class UploadFile(Base): # The `created_by_role` field indicates whether the file was created by an `Account` or an `EndUser`. # Its value is derived from the `CreatorUserRole` enumeration. - created_by_role: Mapped[str] = mapped_column( - String(255), nullable=False, server_default=sa.text("'account'::character varying") - ) + created_by_role: Mapped[str] = mapped_column(String(255), nullable=False, server_default=sa.text("'account'")) # The `created_by` field stores the ID of the entity that created this upload file. # @@ -1665,7 +1708,7 @@ class UploadFile(Base): used_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True) used_at: Mapped[datetime | None] = mapped_column(sa.DateTime, nullable=True) hash: Mapped[str | None] = mapped_column(String(255), nullable=True) - source_url: Mapped[str] = mapped_column(sa.TEXT, default="") + source_url: Mapped[str] = mapped_column(LongText, default="") def __init__( self, @@ -1704,36 +1747,40 @@ class UploadFile(Base): self.source_url = source_url -class ApiRequest(Base): +class ApiRequest(TypeBase): __tablename__ = "api_requests" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="api_request_pkey"), sa.Index("api_request_token_idx", "tenant_id", "api_token_id"), ) - id = mapped_column(StringUUID, nullable=False, server_default=sa.text("uuid_generate_v4()")) - tenant_id = mapped_column(StringUUID, nullable=False) - api_token_id = mapped_column(StringUUID, nullable=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + api_token_id: Mapped[str] = mapped_column(StringUUID, nullable=False) path: Mapped[str] = mapped_column(String(255), nullable=False) - request = mapped_column(sa.Text, nullable=True) - response = mapped_column(sa.Text, nullable=True) + request: Mapped[str | None] = mapped_column(LongText, nullable=True) + response: Mapped[str | None] = mapped_column(LongText, nullable=True) ip: Mapped[str] = mapped_column(String(255), nullable=False) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) -class MessageChain(Base): +class MessageChain(TypeBase): __tablename__ = "message_chains" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="message_chain_pkey"), sa.Index("message_chain_message_id_idx", "message_id"), ) - id = mapped_column(StringUUID, nullable=False, server_default=sa.text("uuid_generate_v4()")) - message_id = mapped_column(StringUUID, nullable=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) + message_id: Mapped[str] = mapped_column(StringUUID, nullable=False) type: Mapped[str] = mapped_column(String(255), nullable=False) - input = mapped_column(sa.Text, nullable=True) - output = mapped_column(sa.Text, nullable=True) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=sa.func.current_timestamp()) + input: Mapped[str | None] = mapped_column(LongText, nullable=True) + output: Mapped[str | None] = mapped_column(LongText, nullable=True) + created_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=sa.func.current_timestamp(), init=False + ) class MessageAgentThought(Base): @@ -1744,32 +1791,32 @@ class MessageAgentThought(Base): sa.Index("message_agent_thought_message_chain_id_idx", "message_chain_id"), ) - id = mapped_column(StringUUID, nullable=False, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, default=lambda: str(uuid4())) message_id = mapped_column(StringUUID, nullable=False) message_chain_id = mapped_column(StringUUID, nullable=True) position: Mapped[int] = mapped_column(sa.Integer, nullable=False) - thought = mapped_column(sa.Text, nullable=True) - tool = mapped_column(sa.Text, nullable=True) - tool_labels_str = mapped_column(sa.Text, nullable=False, server_default=sa.text("'{}'::text")) - tool_meta_str = mapped_column(sa.Text, nullable=False, server_default=sa.text("'{}'::text")) - tool_input = mapped_column(sa.Text, nullable=True) - observation = mapped_column(sa.Text, nullable=True) + thought = mapped_column(LongText, nullable=True) + tool = mapped_column(LongText, nullable=True) + tool_labels_str = mapped_column(LongText, nullable=False, default=sa.text("'{}'")) + tool_meta_str = mapped_column(LongText, nullable=False, default=sa.text("'{}'")) + tool_input = mapped_column(LongText, nullable=True) + observation = mapped_column(LongText, nullable=True) # plugin_id = mapped_column(StringUUID, nullable=True) ## for future design - tool_process_data = mapped_column(sa.Text, nullable=True) - message = mapped_column(sa.Text, nullable=True) + tool_process_data = mapped_column(LongText, nullable=True) + message = mapped_column(LongText, nullable=True) message_token: Mapped[int | None] = mapped_column(sa.Integer, nullable=True) message_unit_price = mapped_column(sa.Numeric, nullable=True) message_price_unit = mapped_column(sa.Numeric(10, 7), nullable=False, server_default=sa.text("0.001")) - message_files = mapped_column(sa.Text, nullable=True) - answer = mapped_column(sa.Text, nullable=True) + message_files = mapped_column(LongText, nullable=True) + answer = mapped_column(LongText, nullable=True) answer_token: Mapped[int | None] = mapped_column(sa.Integer, nullable=True) answer_unit_price = mapped_column(sa.Numeric, nullable=True) answer_price_unit = mapped_column(sa.Numeric(10, 7), nullable=False, server_default=sa.text("0.001")) tokens: Mapped[int | None] = mapped_column(sa.Integer, nullable=True) total_price = mapped_column(sa.Numeric, nullable=True) - currency = mapped_column(String, nullable=True) + currency = mapped_column(String(255), nullable=True) latency: Mapped[float | None] = mapped_column(sa.Float, nullable=True) - created_by_role = mapped_column(String, nullable=False) + created_by_role = mapped_column(String(255), nullable=False) created_by = mapped_column(StringUUID, nullable=False) created_at = mapped_column(sa.DateTime, nullable=False, server_default=sa.func.current_timestamp()) @@ -1857,22 +1904,22 @@ class DatasetRetrieverResource(Base): sa.Index("dataset_retriever_resource_message_id_idx", "message_id"), ) - id = mapped_column(StringUUID, nullable=False, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, default=lambda: str(uuid4())) message_id = mapped_column(StringUUID, nullable=False) position: Mapped[int] = mapped_column(sa.Integer, nullable=False) dataset_id = mapped_column(StringUUID, nullable=False) - dataset_name = mapped_column(sa.Text, nullable=False) + dataset_name = mapped_column(LongText, nullable=False) document_id = mapped_column(StringUUID, nullable=True) - document_name = mapped_column(sa.Text, nullable=False) - data_source_type = mapped_column(sa.Text, nullable=True) + document_name = mapped_column(LongText, nullable=False) + data_source_type = mapped_column(LongText, nullable=True) segment_id = mapped_column(StringUUID, nullable=True) score: Mapped[float | None] = mapped_column(sa.Float, nullable=True) - content = mapped_column(sa.Text, nullable=False) + content = mapped_column(LongText, nullable=False) hit_count: Mapped[int | None] = mapped_column(sa.Integer, nullable=True) word_count: Mapped[int | None] = mapped_column(sa.Integer, nullable=True) segment_position: Mapped[int | None] = mapped_column(sa.Integer, nullable=True) - index_node_hash = mapped_column(sa.Text, nullable=True) - retriever_from = mapped_column(sa.Text, nullable=False) + index_node_hash = mapped_column(LongText, nullable=True) + retriever_from = mapped_column(LongText, nullable=False) created_by = mapped_column(StringUUID, nullable=False) created_at = mapped_column(sa.DateTime, nullable=False, server_default=sa.func.current_timestamp()) @@ -1887,7 +1934,7 @@ class Tag(Base): TAG_TYPE_LIST = ["knowledge", "app"] - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id = mapped_column(StringUUID, nullable=True) type = mapped_column(String(16), nullable=False) name: Mapped[str] = mapped_column(String(255), nullable=False) @@ -1895,7 +1942,7 @@ class Tag(Base): created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) -class TagBinding(Base): +class TagBinding(TypeBase): __tablename__ = "tag_bindings" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="tag_binding_pkey"), @@ -1903,30 +1950,38 @@ class TagBinding(Base): sa.Index("tag_bind_tag_id_idx", "tag_id"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) - tenant_id = mapped_column(StringUUID, nullable=True) - tag_id = mapped_column(StringUUID, nullable=True) - target_id = mapped_column(StringUUID, nullable=True) - created_by = mapped_column(StringUUID, nullable=False) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) + tenant_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) + tag_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) + target_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) + created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) + created_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) -class TraceAppConfig(Base): +class TraceAppConfig(TypeBase): __tablename__ = "trace_app_config" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="tracing_app_config_pkey"), sa.Index("trace_app_config_app_id_idx", "app_id"), ) - id = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) - app_id = mapped_column(StringUUID, nullable=False) - tracing_provider = mapped_column(String(255), nullable=True) - tracing_config = mapped_column(sa.JSON, nullable=True) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at = mapped_column( - sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) + app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + tracing_provider: Mapped[str | None] = mapped_column(String(255), nullable=True) + tracing_config: Mapped[dict | None] = mapped_column(sa.JSON, nullable=True) + created_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False ) - is_active: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true")) + updated_at: Mapped[datetime] = mapped_column( + sa.DateTime, + nullable=False, + server_default=func.current_timestamp(), + onupdate=func.current_timestamp(), + init=False, + ) + is_active: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"), default=True) @property def tracing_config_dict(self) -> dict[str, Any]: diff --git a/api/models/oauth.py b/api/models/oauth.py index ef23780dc8..2fce67c998 100644 --- a/api/models/oauth.py +++ b/api/models/oauth.py @@ -1,62 +1,79 @@ from datetime import datetime import sqlalchemy as sa -from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy import func from sqlalchemy.orm import Mapped, mapped_column -from .base import Base -from .engine import db -from .types import StringUUID +from libs.uuid_utils import uuidv7 + +from .base import TypeBase +from .types import AdjustedJSON, LongText, StringUUID -class DatasourceOauthParamConfig(Base): # type: ignore[name-defined] +class DatasourceOauthParamConfig(TypeBase): __tablename__ = "datasource_oauth_params" __table_args__ = ( - db.PrimaryKeyConstraint("id", name="datasource_oauth_config_pkey"), - db.UniqueConstraint("plugin_id", "provider", name="datasource_oauth_config_datasource_id_provider_idx"), + sa.PrimaryKeyConstraint("id", name="datasource_oauth_config_pkey"), + sa.UniqueConstraint("plugin_id", "provider", name="datasource_oauth_config_datasource_id_provider_idx"), ) - id = mapped_column(StringUUID, server_default=db.text("uuidv7()")) - plugin_id: Mapped[str] = mapped_column(db.String(255), nullable=False) - provider: Mapped[str] = mapped_column(db.String(255), nullable=False) - system_credentials: Mapped[dict] = mapped_column(JSONB, nullable=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()), init=False) + plugin_id: Mapped[str] = mapped_column(sa.String(255), nullable=False) + provider: Mapped[str] = mapped_column(sa.String(255), nullable=False) + system_credentials: Mapped[dict] = mapped_column(AdjustedJSON, nullable=False) -class DatasourceProvider(Base): +class DatasourceProvider(TypeBase): __tablename__ = "datasource_providers" __table_args__ = ( - db.PrimaryKeyConstraint("id", name="datasource_provider_pkey"), - db.UniqueConstraint("tenant_id", "plugin_id", "provider", "name", name="datasource_provider_unique_name"), - db.Index("datasource_provider_auth_type_provider_idx", "tenant_id", "plugin_id", "provider"), + sa.PrimaryKeyConstraint("id", name="datasource_provider_pkey"), + sa.UniqueConstraint("tenant_id", "plugin_id", "provider", "name", name="datasource_provider_unique_name"), + sa.Index("datasource_provider_auth_type_provider_idx", "tenant_id", "plugin_id", "provider"), ) - id = mapped_column(StringUUID, server_default=db.text("uuidv7()")) - tenant_id = mapped_column(StringUUID, nullable=False) - name: Mapped[str] = mapped_column(db.String(255), nullable=False) - provider: Mapped[str] = mapped_column(db.String(255), nullable=False) - plugin_id: Mapped[str] = mapped_column(db.String(255), nullable=False) - auth_type: Mapped[str] = mapped_column(db.String(255), nullable=False) - encrypted_credentials: Mapped[dict] = mapped_column(JSONB, nullable=False) - avatar_url: Mapped[str] = mapped_column(sa.Text, nullable=True, default="default") - is_default: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=db.text("false")) - expires_at: Mapped[int] = mapped_column(sa.Integer, nullable=False, server_default="-1") + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()), init=False) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + name: Mapped[str] = mapped_column(sa.String(255), nullable=False) + provider: Mapped[str] = mapped_column(sa.String(128), nullable=False) + plugin_id: Mapped[str] = mapped_column(sa.String(255), nullable=False) + auth_type: Mapped[str] = mapped_column(sa.String(255), nullable=False) + encrypted_credentials: Mapped[dict] = mapped_column(AdjustedJSON, nullable=False) + avatar_url: Mapped[str] = mapped_column(LongText, nullable=True, default="default") + is_default: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false"), default=False) + expires_at: Mapped[int] = mapped_column(sa.Integer, nullable=False, server_default="-1", default=-1) - created_at: Mapped[datetime] = mapped_column(sa.DateTime, nullable=False, default=datetime.now) - updated_at: Mapped[datetime] = mapped_column(sa.DateTime, nullable=False, default=datetime.now) + created_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_at: Mapped[datetime] = mapped_column( + sa.DateTime, + nullable=False, + server_default=func.current_timestamp(), + onupdate=func.current_timestamp(), + init=False, + ) -class DatasourceOauthTenantParamConfig(Base): +class DatasourceOauthTenantParamConfig(TypeBase): __tablename__ = "datasource_oauth_tenant_params" __table_args__ = ( - db.PrimaryKeyConstraint("id", name="datasource_oauth_tenant_config_pkey"), - db.UniqueConstraint("tenant_id", "plugin_id", "provider", name="datasource_oauth_tenant_config_unique"), + sa.PrimaryKeyConstraint("id", name="datasource_oauth_tenant_config_pkey"), + sa.UniqueConstraint("tenant_id", "plugin_id", "provider", name="datasource_oauth_tenant_config_unique"), ) - id = mapped_column(StringUUID, server_default=db.text("uuidv7()")) - tenant_id = mapped_column(StringUUID, nullable=False) - provider: Mapped[str] = mapped_column(db.String(255), nullable=False) - plugin_id: Mapped[str] = mapped_column(db.String(255), nullable=False) - client_params: Mapped[dict] = mapped_column(JSONB, nullable=False, default={}) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()), init=False) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + provider: Mapped[str] = mapped_column(sa.String(255), nullable=False) + plugin_id: Mapped[str] = mapped_column(sa.String(255), nullable=False) + client_params: Mapped[dict] = mapped_column(AdjustedJSON, nullable=False, default_factory=dict) enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, default=False) - created_at: Mapped[datetime] = mapped_column(sa.DateTime, nullable=False, default=datetime.now) - updated_at: Mapped[datetime] = mapped_column(sa.DateTime, nullable=False, default=datetime.now) + created_at: Mapped[datetime] = mapped_column( + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_at: Mapped[datetime] = mapped_column( + sa.DateTime, + nullable=False, + server_default=func.current_timestamp(), + onupdate=func.current_timestamp(), + init=False, + ) diff --git a/api/models/provider.py b/api/models/provider.py index f6852d49f4..a840a483ab 100644 --- a/api/models/provider.py +++ b/api/models/provider.py @@ -1,14 +1,17 @@ from datetime import datetime from enum import StrEnum, auto from functools import cached_property +from uuid import uuid4 import sqlalchemy as sa from sqlalchemy import DateTime, String, func, text from sqlalchemy.orm import Mapped, mapped_column -from .base import Base +from libs.uuid_utils import uuidv7 + +from .base import Base, TypeBase from .engine import db -from .types import StringUUID +from .types import LongText, StringUUID class ProviderType(StrEnum): @@ -41,7 +44,7 @@ class ProviderQuotaType(StrEnum): raise ValueError(f"No matching enum found for value '{value}'") -class Provider(Base): +class Provider(TypeBase): """ Provider model representing the API providers and their configurations. """ @@ -55,24 +58,26 @@ class Provider(Base): ), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, primary_key=True, default=lambda: str(uuidv7()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) provider_name: Mapped[str] = mapped_column(String(255), nullable=False) provider_type: Mapped[str] = mapped_column( - String(40), nullable=False, server_default=text("'custom'::character varying") + String(40), nullable=False, server_default=text("'custom'"), default="custom" ) - is_valid: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=text("false")) - last_used: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) - credential_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) + is_valid: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=text("false"), default=False) + last_used: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, init=False) + credential_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None) - quota_type: Mapped[str | None] = mapped_column( - String(40), nullable=True, server_default=text("''::character varying") + quota_type: Mapped[str | None] = mapped_column(String(40), nullable=True, server_default=text("''"), default="") + quota_limit: Mapped[int | None] = mapped_column(sa.BigInteger, nullable=True, default=None) + quota_used: Mapped[int] = mapped_column(sa.BigInteger, nullable=False, default=0) + + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False ) - quota_limit: Mapped[int | None] = mapped_column(sa.BigInteger, nullable=True) - quota_used: Mapped[int | None] = mapped_column(sa.BigInteger, default=0) - - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) def __repr__(self): return ( @@ -113,7 +118,7 @@ class Provider(Base): return self.is_valid and self.token_is_set -class ProviderModel(Base): +class ProviderModel(TypeBase): """ Provider model representing the API provider_models and their configurations. """ @@ -127,15 +132,19 @@ class ProviderModel(Base): ), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) provider_name: Mapped[str] = mapped_column(String(255), nullable=False) model_name: Mapped[str] = mapped_column(String(255), nullable=False) model_type: Mapped[str] = mapped_column(String(40), nullable=False) - credential_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) - is_valid: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=text("false")) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + credential_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None) + is_valid: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=text("false"), default=False) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False + ) @cached_property def credential(self): @@ -157,45 +166,53 @@ class ProviderModel(Base): return credential.encrypted_config if credential else None -class TenantDefaultModel(Base): +class TenantDefaultModel(TypeBase): __tablename__ = "tenant_default_models" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="tenant_default_model_pkey"), sa.Index("tenant_default_model_tenant_id_provider_type_idx", "tenant_id", "provider_name", "model_type"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) provider_name: Mapped[str] = mapped_column(String(255), nullable=False) model_name: Mapped[str] = mapped_column(String(255), nullable=False) model_type: Mapped[str] = mapped_column(String(40), nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False + ) -class TenantPreferredModelProvider(Base): +class TenantPreferredModelProvider(TypeBase): __tablename__ = "tenant_preferred_model_providers" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="tenant_preferred_model_provider_pkey"), sa.Index("tenant_preferred_model_provider_tenant_provider_idx", "tenant_id", "provider_name"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) provider_name: Mapped[str] = mapped_column(String(255), nullable=False) preferred_provider_type: Mapped[str] = mapped_column(String(40), nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False + ) -class ProviderOrder(Base): +class ProviderOrder(TypeBase): __tablename__ = "provider_orders" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="provider_order_pkey"), sa.Index("provider_order_tenant_provider_idx", "tenant_id", "provider_name"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) provider_name: Mapped[str] = mapped_column(String(255), nullable=False) account_id: Mapped[str] = mapped_column(StringUUID, nullable=False) @@ -205,17 +222,19 @@ class ProviderOrder(Base): quantity: Mapped[int] = mapped_column(sa.Integer, nullable=False, server_default=text("1")) currency: Mapped[str | None] = mapped_column(String(40)) total_amount: Mapped[int | None] = mapped_column(sa.Integer) - payment_status: Mapped[str] = mapped_column( - String(40), nullable=False, server_default=text("'wait_pay'::character varying") - ) + payment_status: Mapped[str] = mapped_column(String(40), nullable=False, server_default=text("'wait_pay'")) paid_at: Mapped[datetime | None] = mapped_column(DateTime) pay_failed_at: Mapped[datetime | None] = mapped_column(DateTime) refunded_at: Mapped[datetime | None] = mapped_column(DateTime) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False + ) -class ProviderModelSetting(Base): +class ProviderModelSetting(TypeBase): """ Provider model settings for record the model enabled status and load balancing status. """ @@ -226,15 +245,21 @@ class ProviderModelSetting(Base): sa.Index("provider_model_setting_tenant_provider_model_idx", "tenant_id", "provider_name", "model_type"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) provider_name: Mapped[str] = mapped_column(String(255), nullable=False) model_name: Mapped[str] = mapped_column(String(255), nullable=False) model_type: Mapped[str] = mapped_column(String(40), nullable=False) - enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=text("true")) - load_balancing_enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=text("false")) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=text("true"), default=True) + load_balancing_enabled: Mapped[bool] = mapped_column( + sa.Boolean, nullable=False, server_default=text("false"), default=False + ) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False + ) class LoadBalancingModelConfig(Base): @@ -248,18 +273,20 @@ class LoadBalancingModelConfig(Base): sa.Index("load_balancing_model_config_tenant_provider_model_idx", "tenant_id", "provider_name", "model_type"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) provider_name: Mapped[str] = mapped_column(String(255), nullable=False) model_name: Mapped[str] = mapped_column(String(255), nullable=False) model_type: Mapped[str] = mapped_column(String(40), nullable=False) name: Mapped[str] = mapped_column(String(255), nullable=False) - encrypted_config: Mapped[str | None] = mapped_column(sa.Text, nullable=True) + encrypted_config: Mapped[str | None] = mapped_column(LongText, nullable=True) credential_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) credential_source_type: Mapped[str | None] = mapped_column(String(40), nullable=True) enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=text("true")) created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) class ProviderCredential(Base): @@ -273,13 +300,15 @@ class ProviderCredential(Base): sa.Index("provider_credential_tenant_provider_idx", "tenant_id", "provider_name"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuidv7()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) provider_name: Mapped[str] = mapped_column(String(255), nullable=False) credential_name: Mapped[str] = mapped_column(String(255), nullable=False) - encrypted_config: Mapped[str] = mapped_column(sa.Text, nullable=False) + encrypted_config: Mapped[str] = mapped_column(LongText, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) class ProviderModelCredential(Base): @@ -299,12 +328,14 @@ class ProviderModelCredential(Base): ), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=text("uuidv7()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) provider_name: Mapped[str] = mapped_column(String(255), nullable=False) model_name: Mapped[str] = mapped_column(String(255), nullable=False) model_type: Mapped[str] = mapped_column(String(40), nullable=False) credential_name: Mapped[str] = mapped_column(String(255), nullable=False) - encrypted_config: Mapped[str] = mapped_column(sa.Text, nullable=False) + encrypted_config: Mapped[str] = mapped_column(LongText, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + updated_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + ) diff --git a/api/models/source.py b/api/models/source.py index 0ed7c4c70e..f093048c00 100644 --- a/api/models/source.py +++ b/api/models/source.py @@ -1,14 +1,13 @@ import json from datetime import datetime +from uuid import uuid4 import sqlalchemy as sa from sqlalchemy import DateTime, String, func -from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, mapped_column -from models.base import TypeBase - -from .types import StringUUID +from .base import TypeBase +from .types import AdjustedJSON, LongText, StringUUID, adjusted_json_index class DataSourceOauthBinding(TypeBase): @@ -16,14 +15,14 @@ class DataSourceOauthBinding(TypeBase): __table_args__ = ( sa.PrimaryKeyConstraint("id", name="source_binding_pkey"), sa.Index("source_binding_tenant_id_idx", "tenant_id"), - sa.Index("source_info_idx", "source_info", postgresql_using="gin"), + adjusted_json_index("source_info_idx", "source_info"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) access_token: Mapped[str] = mapped_column(String(255), nullable=False) provider: Mapped[str] = mapped_column(String(255), nullable=False) - source_info: Mapped[dict] = mapped_column(JSONB, nullable=False) + source_info: Mapped[dict] = mapped_column(AdjustedJSON, nullable=False) created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp(), init=False ) @@ -45,11 +44,11 @@ class DataSourceApiKeyAuthBinding(TypeBase): sa.Index("data_source_api_key_auth_binding_provider_idx", "provider"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) category: Mapped[str] = mapped_column(String(255), nullable=False) provider: Mapped[str] = mapped_column(String(255), nullable=False) - credentials: Mapped[str | None] = mapped_column(sa.Text, nullable=True, default=None) # JSON + credentials: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) # JSON created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp(), init=False ) diff --git a/api/models/task.py b/api/models/task.py index 513f167cce..539945b251 100644 --- a/api/models/task.py +++ b/api/models/task.py @@ -6,7 +6,9 @@ from sqlalchemy import DateTime, String from sqlalchemy.orm import Mapped, mapped_column from libs.datetime_utils import naive_utc_now -from models.base import TypeBase + +from .base import TypeBase +from .types import BinaryData, LongText class CeleryTask(TypeBase): @@ -19,17 +21,17 @@ class CeleryTask(TypeBase): ) task_id: Mapped[str] = mapped_column(String(155), unique=True) status: Mapped[str] = mapped_column(String(50), default=states.PENDING) - result: Mapped[bytes | None] = mapped_column(sa.PickleType, nullable=True, default=None) + result: Mapped[bytes | None] = mapped_column(BinaryData, nullable=True, default=None) date_done: Mapped[datetime | None] = mapped_column( DateTime, default=naive_utc_now, onupdate=naive_utc_now, nullable=True, ) - traceback: Mapped[str | None] = mapped_column(sa.Text, nullable=True, default=None) + traceback: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) name: Mapped[str | None] = mapped_column(String(155), nullable=True, default=None) - args: Mapped[bytes | None] = mapped_column(sa.LargeBinary, nullable=True, default=None) - kwargs: Mapped[bytes | None] = mapped_column(sa.LargeBinary, nullable=True, default=None) + args: Mapped[bytes | None] = mapped_column(BinaryData, nullable=True, default=None) + kwargs: Mapped[bytes | None] = mapped_column(BinaryData, nullable=True, default=None) worker: Mapped[str | None] = mapped_column(String(155), nullable=True, default=None) retries: Mapped[int | None] = mapped_column(sa.Integer, nullable=True, default=None) queue: Mapped[str | None] = mapped_column(String(155), nullable=True, default=None) @@ -44,5 +46,5 @@ class CeleryTaskSet(TypeBase): sa.Integer, sa.Sequence("taskset_id_sequence"), autoincrement=True, primary_key=True, init=False ) taskset_id: Mapped[str] = mapped_column(String(155), unique=True) - result: Mapped[bytes | None] = mapped_column(sa.PickleType, nullable=True, default=None) + result: Mapped[bytes | None] = mapped_column(BinaryData, nullable=True, default=None) date_done: Mapped[datetime | None] = mapped_column(DateTime, default=naive_utc_now, nullable=True) diff --git a/api/models/tools.py b/api/models/tools.py index f365136708..249eb9fd6a 100644 --- a/api/models/tools.py +++ b/api/models/tools.py @@ -1,31 +1,27 @@ import json -from collections.abc import Mapping from datetime import datetime from decimal import Decimal from typing import TYPE_CHECKING, Any, cast -from urllib.parse import urlparse +from uuid import uuid4 import sqlalchemy as sa from deprecated import deprecated from sqlalchemy import ForeignKey, String, func from sqlalchemy.orm import Mapped, mapped_column -from core.helper import encrypter +from core.plugin.entities.plugin_daemon import CredentialType from core.tools.entities.common_entities import I18nObject from core.tools.entities.tool_bundle import ApiToolBundle from core.tools.entities.tool_entities import ApiProviderSchemaType, WorkflowToolParameterConfiguration from libs.uuid_utils import uuidv7 -from models.base import TypeBase +from .base import TypeBase from .engine import db from .model import Account, App, Tenant -from .types import StringUUID +from .types import LongText, StringUUID if TYPE_CHECKING: - from core.mcp.types import Tool as MCPTool - from core.tools.entities.common_entities import I18nObject - from core.tools.entities.tool_bundle import ApiToolBundle - from core.tools.entities.tool_entities import ApiProviderSchemaType, WorkflowToolParameterConfiguration + from core.entities.mcp_provider import MCPProviderEntity # system level tool oauth client params (client_id, client_secret, etc.) @@ -36,11 +32,11 @@ class ToolOAuthSystemClient(TypeBase): sa.UniqueConstraint("plugin_id", "provider", name="tool_oauth_system_client_plugin_id_provider_idx"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) plugin_id: Mapped[str] = mapped_column(String(512), nullable=False) provider: Mapped[str] = mapped_column(String(255), nullable=False) # oauth params of the tool provider - encrypted_oauth_params: Mapped[str] = mapped_column(sa.Text, nullable=False) + encrypted_oauth_params: Mapped[str] = mapped_column(LongText, nullable=False) # tenant level tool oauth client params (client_id, client_secret, etc.) @@ -51,14 +47,14 @@ class ToolOAuthTenantClient(TypeBase): sa.UniqueConstraint("tenant_id", "plugin_id", "provider", name="unique_tool_oauth_tenant_client"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) # tenant id tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - plugin_id: Mapped[str] = mapped_column(String(512), nullable=False) + plugin_id: Mapped[str] = mapped_column(String(255), nullable=False) provider: Mapped[str] = mapped_column(String(255), nullable=False) enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"), init=False) # oauth params of the tool provider - encrypted_oauth_params: Mapped[str] = mapped_column(sa.Text, nullable=False, init=False) + encrypted_oauth_params: Mapped[str] = mapped_column(LongText, nullable=False, init=False) @property def oauth_params(self) -> dict[str, Any]: @@ -77,11 +73,11 @@ class BuiltinToolProvider(TypeBase): ) # id of the tool provider - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) name: Mapped[str] = mapped_column( String(256), nullable=False, - server_default=sa.text("'API KEY 1'::character varying"), + server_default=sa.text("'API KEY 1'"), ) # id of the tenant tenant_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) @@ -90,21 +86,21 @@ class BuiltinToolProvider(TypeBase): # name of the tool provider provider: Mapped[str] = mapped_column(String(256), nullable=False) # credential of the tool provider - encrypted_credentials: Mapped[str | None] = mapped_column(sa.Text, nullable=True, default=None) + encrypted_credentials: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) created_at: Mapped[datetime] = mapped_column( - sa.DateTime, nullable=False, server_default=sa.text("CURRENT_TIMESTAMP(0)"), init=False + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False ) updated_at: Mapped[datetime] = mapped_column( sa.DateTime, nullable=False, - server_default=sa.text("CURRENT_TIMESTAMP(0)"), + server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False, ) is_default: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false"), default=False) # credential type, e.g., "api-key", "oauth2" credential_type: Mapped[str] = mapped_column( - String(32), nullable=False, server_default=sa.text("'api-key'::character varying"), default="api-key" + String(32), nullable=False, server_default=sa.text("'api-key'"), default="api-key" ) expires_at: Mapped[int] = mapped_column(sa.BigInteger, nullable=False, server_default=sa.text("-1"), default=-1) @@ -123,7 +119,7 @@ class EndUserAuthenticationProvider(TypeBase): __tablename__ = "tool_enduser_authentication_providers" __table_args__ = ( - sa.UniqueConstraint("tenant_id", "provider", "end_user_id", "name", name="unique_enduser_authentication_provider"), + sa.UniqueConstraint("end_user_id", "provider", "name"), ) # id of the authentication provider @@ -132,15 +128,16 @@ class EndUserAuthenticationProvider(TypeBase): String(256), nullable=False, default="API KEY 1", + index=True ) # id of the tenant - tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False, index=True) + tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) # id of the end user end_user_id: Mapped[str] = mapped_column(StringUUID, nullable=False, index=True) # name of the tool provider - provider: Mapped[str] = mapped_column(sa.Text, nullable=False) + provider: Mapped[str] = mapped_column(LongText, nullable=False, index=True) # encrypted credentials for the end user - encrypted_credentials: Mapped[str] = mapped_column(sa.Text, nullable=False, default="") + encrypted_credentials: Mapped[str] = mapped_column(LongText, nullable=False, default="") created_at: Mapped[datetime] = mapped_column( sa.DateTime, nullable=False, default=datetime.now, init=False ) @@ -152,8 +149,8 @@ class EndUserAuthenticationProvider(TypeBase): init=False, ) # credential type, e.g., "api-key", "oauth2" - credential_type: Mapped[str] = mapped_column( - String(32), nullable=False, default="api-key" + credential_type: Mapped[CredentialType] = mapped_column( + String(32), nullable=False, default=CredentialType.API_KEY ) expires_at: Mapped[int] = mapped_column(sa.BigInteger, nullable=False, default=-1) @@ -175,32 +172,32 @@ class ApiToolProvider(TypeBase): sa.UniqueConstraint("name", "tenant_id", name="unique_api_tool_provider"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) # name of the api provider name: Mapped[str] = mapped_column( String(255), nullable=False, - server_default=sa.text("'API KEY 1'::character varying"), + server_default=sa.text("'API KEY 1'"), ) # icon icon: Mapped[str] = mapped_column(String(255), nullable=False) # original schema - schema: Mapped[str] = mapped_column(sa.Text, nullable=False) + schema: Mapped[str] = mapped_column(LongText, nullable=False) schema_type_str: Mapped[str] = mapped_column(String(40), nullable=False) # who created this tool user_id: Mapped[str] = mapped_column(StringUUID, nullable=False) # tenant id tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) # description of the provider - description: Mapped[str] = mapped_column(sa.Text, nullable=False) + description: Mapped[str] = mapped_column(LongText, nullable=False) # json format tools - tools_str: Mapped[str] = mapped_column(sa.Text, nullable=False) + tools_str: Mapped[str] = mapped_column(LongText, nullable=False) # json format credentials - credentials_str: Mapped[str] = mapped_column(sa.Text, nullable=False) + credentials_str: Mapped[str] = mapped_column(LongText, nullable=False) # privacy policy privacy_policy: Mapped[str | None] = mapped_column(String(255), nullable=True, default=None) # custom_disclaimer - custom_disclaimer: Mapped[str] = mapped_column(sa.TEXT, default="") + custom_disclaimer: Mapped[str] = mapped_column(LongText, default="") created_at: Mapped[datetime] = mapped_column( sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False @@ -215,14 +212,10 @@ class ApiToolProvider(TypeBase): @property def schema_type(self) -> "ApiProviderSchemaType": - from core.tools.entities.tool_entities import ApiProviderSchemaType - return ApiProviderSchemaType.value_of(self.schema_type_str) @property def tools(self) -> list["ApiToolBundle"]: - from core.tools.entities.tool_bundle import ApiToolBundle - return [ApiToolBundle.model_validate(tool) for tool in json.loads(self.tools_str)] @property @@ -251,7 +244,7 @@ class ToolLabelBinding(TypeBase): sa.UniqueConstraint("tool_id", "label_name", name="unique_tool_label_bind"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) # tool id tool_id: Mapped[str] = mapped_column(String(64), nullable=False) # tool type @@ -272,7 +265,7 @@ class WorkflowToolProvider(TypeBase): sa.UniqueConstraint("tenant_id", "app_id", name="unique_workflow_tool_provider_app_id"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) # name of the workflow provider name: Mapped[str] = mapped_column(String(255), nullable=False) # label of the workflow provider @@ -288,19 +281,19 @@ class WorkflowToolProvider(TypeBase): # tenant id tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) # description of the provider - description: Mapped[str] = mapped_column(sa.Text, nullable=False) + description: Mapped[str] = mapped_column(LongText, nullable=False) # parameter configuration - parameter_configuration: Mapped[str] = mapped_column(sa.Text, nullable=False, server_default="[]", default="[]") + parameter_configuration: Mapped[str] = mapped_column(LongText, nullable=False, default="[]") # privacy policy privacy_policy: Mapped[str | None] = mapped_column(String(255), nullable=True, server_default="", default=None) created_at: Mapped[datetime] = mapped_column( - sa.DateTime, nullable=False, server_default=sa.text("CURRENT_TIMESTAMP(0)"), init=False + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False ) updated_at: Mapped[datetime] = mapped_column( sa.DateTime, nullable=False, - server_default=sa.text("CURRENT_TIMESTAMP(0)"), + server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False, ) @@ -315,8 +308,6 @@ class WorkflowToolProvider(TypeBase): @property def parameter_configurations(self) -> list["WorkflowToolParameterConfiguration"]: - from core.tools.entities.tool_entities import WorkflowToolParameterConfiguration - return [ WorkflowToolParameterConfiguration.model_validate(config) for config in json.loads(self.parameter_configuration) @@ -340,13 +331,13 @@ class MCPToolProvider(TypeBase): sa.UniqueConstraint("tenant_id", "server_identifier", name="unique_mcp_provider_server_identifier"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) # name of the mcp provider name: Mapped[str] = mapped_column(String(40), nullable=False) # server identifier of the mcp provider server_identifier: Mapped[str] = mapped_column(String(64), nullable=False) # encrypted url of the mcp provider - server_url: Mapped[str] = mapped_column(sa.Text, nullable=False) + server_url: Mapped[str] = mapped_column(LongText, nullable=False) # hash of server_url for uniqueness check server_url_hash: Mapped[str] = mapped_column(String(64), nullable=False) # icon of the mcp provider @@ -356,18 +347,18 @@ class MCPToolProvider(TypeBase): # who created this tool user_id: Mapped[str] = mapped_column(StringUUID, nullable=False) # encrypted credentials - encrypted_credentials: Mapped[str | None] = mapped_column(sa.Text, nullable=True, default=None) + encrypted_credentials: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) # authed authed: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, default=False) # tools - tools: Mapped[str] = mapped_column(sa.Text, nullable=False, default="[]") + tools: Mapped[str] = mapped_column(LongText, nullable=False, default="[]") created_at: Mapped[datetime] = mapped_column( - sa.DateTime, nullable=False, server_default=sa.text("CURRENT_TIMESTAMP(0)"), init=False + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False ) updated_at: Mapped[datetime] = mapped_column( sa.DateTime, nullable=False, - server_default=sa.text("CURRENT_TIMESTAMP(0)"), + server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False, ) @@ -376,131 +367,41 @@ class MCPToolProvider(TypeBase): sa.Float, nullable=False, server_default=sa.text("300"), default=300.0 ) # encrypted headers for MCP server requests - encrypted_headers: Mapped[str | None] = mapped_column(sa.Text, nullable=True, default=None) + encrypted_headers: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) def load_user(self) -> Account | None: return db.session.query(Account).where(Account.id == self.user_id).first() - @property - def tenant(self) -> Tenant | None: - return db.session.query(Tenant).where(Tenant.id == self.tenant_id).first() - @property def credentials(self) -> dict[str, Any]: if not self.encrypted_credentials: return {} try: - return cast(dict[str, Any], json.loads(self.encrypted_credentials)) or {} - except json.JSONDecodeError: - return {} - - @property - def mcp_tools(self) -> list["MCPTool"]: - from core.mcp.types import Tool as MCPTool - - return [MCPTool.model_validate(tool) for tool in json.loads(self.tools)] - - @property - def provider_icon(self) -> Mapping[str, str] | str: - from core.file import helpers as file_helpers - - assert self.icon - try: - return json.loads(self.icon) - except json.JSONDecodeError: - return file_helpers.get_signed_file_url(self.icon) - - @property - def decrypted_server_url(self) -> str: - return encrypter.decrypt_token(self.tenant_id, self.server_url) - - @property - def decrypted_headers(self) -> dict[str, Any]: - """Get decrypted headers for MCP server requests.""" - from core.entities.provider_entities import BasicProviderConfig - from core.helper.provider_cache import NoOpProviderCredentialCache - from core.tools.utils.encryption import create_provider_encrypter - - try: - if not self.encrypted_headers: - return {} - - headers_data = json.loads(self.encrypted_headers) - - # Create dynamic config for all headers as SECRET_INPUT - config = [BasicProviderConfig(type=BasicProviderConfig.Type.SECRET_INPUT, name=key) for key in headers_data] - - encrypter_instance, _ = create_provider_encrypter( - tenant_id=self.tenant_id, - config=config, - cache=NoOpProviderCredentialCache(), - ) - - result = encrypter_instance.decrypt(headers_data) - return result + return json.loads(self.encrypted_credentials) except Exception: return {} @property - def masked_headers(self) -> dict[str, Any]: - """Get masked headers for frontend display.""" - from core.entities.provider_entities import BasicProviderConfig - from core.helper.provider_cache import NoOpProviderCredentialCache - from core.tools.utils.encryption import create_provider_encrypter - + def headers(self) -> dict[str, Any]: + if self.encrypted_headers is None: + return {} try: - if not self.encrypted_headers: - return {} - - headers_data = json.loads(self.encrypted_headers) - - # Create dynamic config for all headers as SECRET_INPUT - config = [BasicProviderConfig(type=BasicProviderConfig.Type.SECRET_INPUT, name=key) for key in headers_data] - - encrypter_instance, _ = create_provider_encrypter( - tenant_id=self.tenant_id, - config=config, - cache=NoOpProviderCredentialCache(), - ) - - # First decrypt, then mask - decrypted_headers = encrypter_instance.decrypt(headers_data) - result = encrypter_instance.mask_plugin_credentials(decrypted_headers) - return result + return json.loads(self.encrypted_headers) except Exception: return {} @property - def masked_server_url(self) -> str: - def mask_url(url: str, mask_char: str = "*") -> str: - """ - mask the url to a simple string - """ - parsed = urlparse(url) - base_url = f"{parsed.scheme}://{parsed.netloc}" + def tool_dict(self) -> list[dict[str, Any]]: + try: + return json.loads(self.tools) if self.tools else [] + except (json.JSONDecodeError, TypeError): + return [] - if parsed.path and parsed.path != "/": - return f"{base_url}/{mask_char * 6}" - else: - return base_url + def to_entity(self) -> "MCPProviderEntity": + """Convert to domain entity""" + from core.entities.mcp_provider import MCPProviderEntity - return mask_url(self.decrypted_server_url) - - @property - def decrypted_credentials(self) -> dict[str, Any]: - from core.helper.provider_cache import NoOpProviderCredentialCache - from core.tools.mcp_tool.provider import MCPToolProviderController - from core.tools.utils.encryption import create_provider_encrypter - - provider_controller = MCPToolProviderController.from_db(self) - - encrypter, _ = create_provider_encrypter( - tenant_id=self.tenant_id, - config=[x.to_basic_provider_config() for x in provider_controller.get_credentials_schema()], - cache=NoOpProviderCredentialCache(), - ) - - return encrypter.decrypt(self.credentials) + return MCPProviderEntity.from_db_model(self) class ToolModelInvoke(TypeBase): @@ -511,7 +412,7 @@ class ToolModelInvoke(TypeBase): __tablename__ = "tool_model_invokes" __table_args__ = (sa.PrimaryKeyConstraint("id", name="tool_model_invoke_pkey"),) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) # who invoke this tool user_id: Mapped[str] = mapped_column(StringUUID, nullable=False) # tenant id @@ -523,11 +424,11 @@ class ToolModelInvoke(TypeBase): # tool name tool_name: Mapped[str] = mapped_column(String(128), nullable=False) # invoke parameters - model_parameters: Mapped[str] = mapped_column(sa.Text, nullable=False) + model_parameters: Mapped[str] = mapped_column(LongText, nullable=False) # prompt messages - prompt_messages: Mapped[str] = mapped_column(sa.Text, nullable=False) + prompt_messages: Mapped[str] = mapped_column(LongText, nullable=False) # invoke response - model_response: Mapped[str] = mapped_column(sa.Text, nullable=False) + model_response: Mapped[str] = mapped_column(LongText, nullable=False) prompt_tokens: Mapped[int] = mapped_column(sa.Integer, nullable=False, server_default=sa.text("0")) answer_tokens: Mapped[int] = mapped_column(sa.Integer, nullable=False, server_default=sa.text("0")) @@ -564,7 +465,7 @@ class ToolConversationVariables(TypeBase): sa.Index("conversation_id_idx", "conversation_id"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) # conversation user id user_id: Mapped[str] = mapped_column(StringUUID, nullable=False) # tenant id @@ -572,7 +473,7 @@ class ToolConversationVariables(TypeBase): # conversation id conversation_id: Mapped[str] = mapped_column(StringUUID, nullable=False) # variables pool - variables_str: Mapped[str] = mapped_column(sa.Text, nullable=False) + variables_str: Mapped[str] = mapped_column(LongText, nullable=False) created_at: Mapped[datetime] = mapped_column( sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False @@ -601,7 +502,7 @@ class ToolFile(TypeBase): sa.Index("tool_file_conversation_id_idx", "conversation_id"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) # conversation user id user_id: Mapped[str] = mapped_column(StringUUID) # tenant id @@ -615,9 +516,9 @@ class ToolFile(TypeBase): # original url original_url: Mapped[str | None] = mapped_column(String(2048), nullable=True, default=None) # name - name: Mapped[str] = mapped_column(default="") + name: Mapped[str] = mapped_column(String(255), default="") # size - size: Mapped[int] = mapped_column(default=-1) + size: Mapped[int] = mapped_column(sa.Integer, default=-1) @deprecated @@ -632,18 +533,18 @@ class DeprecatedPublishedAppTool(TypeBase): sa.UniqueConstraint("app_id", "user_id", name="unique_published_app_tool"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) # id of the app app_id: Mapped[str] = mapped_column(StringUUID, ForeignKey("apps.id"), nullable=False) user_id: Mapped[str] = mapped_column(StringUUID, nullable=False) # who published this tool - description: Mapped[str] = mapped_column(sa.Text, nullable=False) + description: Mapped[str] = mapped_column(LongText, nullable=False) # llm_description of the tool, for LLM - llm_description: Mapped[str] = mapped_column(sa.Text, nullable=False) + llm_description: Mapped[str] = mapped_column(LongText, nullable=False) # query description, query will be seem as a parameter of the tool, # to describe this parameter to llm, we need this field - query_description: Mapped[str] = mapped_column(sa.Text, nullable=False) + query_description: Mapped[str] = mapped_column(LongText, nullable=False) # query name, the name of the query parameter query_name: Mapped[str] = mapped_column(String(40), nullable=False) # name of the tool provider @@ -651,18 +552,16 @@ class DeprecatedPublishedAppTool(TypeBase): # author author: Mapped[str] = mapped_column(String(40), nullable=False) created_at: Mapped[datetime] = mapped_column( - sa.DateTime, nullable=False, server_default=sa.text("CURRENT_TIMESTAMP(0)"), init=False + sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False ) updated_at: Mapped[datetime] = mapped_column( sa.DateTime, nullable=False, - server_default=sa.text("CURRENT_TIMESTAMP(0)"), + server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False, ) @property def description_i18n(self) -> "I18nObject": - from core.tools.entities.common_entities import I18nObject - return I18nObject.model_validate(json.loads(self.description)) diff --git a/api/models/trigger.py b/api/models/trigger.py index d34006589a..753fdb227b 100644 --- a/api/models/trigger.py +++ b/api/models/trigger.py @@ -3,7 +3,8 @@ import time from collections.abc import Mapping from datetime import datetime from functools import cached_property -from typing import Any, Optional, cast +from typing import Any, cast +from uuid import uuid4 import sqlalchemy as sa from sqlalchemy import DateTime, Index, Integer, String, UniqueConstraint, func @@ -14,14 +15,16 @@ from core.trigger.entities.api_entities import TriggerProviderSubscriptionApiEnt from core.trigger.entities.entities import Subscription from core.trigger.utils.endpoint import generate_plugin_trigger_endpoint_url, generate_webhook_trigger_endpoint from libs.datetime_utils import naive_utc_now -from models.base import Base -from models.engine import db -from models.enums import AppTriggerStatus, AppTriggerType, CreatorUserRole, WorkflowTriggerStatus -from models.model import Account -from models.types import EnumText, StringUUID +from libs.uuid_utils import uuidv7 + +from .base import Base, TypeBase +from .engine import db +from .enums import AppTriggerStatus, AppTriggerType, CreatorUserRole, WorkflowTriggerStatus +from .model import Account +from .types import EnumText, LongText, StringUUID -class TriggerSubscription(Base): +class TriggerSubscription(TypeBase): """ Trigger provider model for managing credentials Supports multiple credential instances per provider @@ -38,7 +41,7 @@ class TriggerSubscription(Base): UniqueConstraint("tenant_id", "provider_id", "name", name="unique_trigger_provider"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) name: Mapped[str] = mapped_column(String(255), nullable=False, comment="Subscription instance name") tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) user_id: Mapped[str] = mapped_column(StringUUID, nullable=False) @@ -60,12 +63,15 @@ class TriggerSubscription(Base): Integer, default=-1, comment="Subscription instance expiration timestamp, -1 for never" ) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) updated_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp(), server_onupdate=func.current_timestamp(), + init=False, ) def is_credential_expired(self) -> bool: @@ -98,24 +104,27 @@ class TriggerSubscription(Base): # system level trigger oauth client params -class TriggerOAuthSystemClient(Base): +class TriggerOAuthSystemClient(TypeBase): __tablename__ = "trigger_oauth_system_clients" __table_args__ = ( sa.PrimaryKeyConstraint("id", name="trigger_oauth_system_client_pkey"), sa.UniqueConstraint("plugin_id", "provider", name="trigger_oauth_system_client_plugin_id_provider_idx"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) - plugin_id: Mapped[str] = mapped_column(String(512), nullable=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) + plugin_id: Mapped[str] = mapped_column(String(255), nullable=False) provider: Mapped[str] = mapped_column(String(255), nullable=False) # oauth params of the trigger provider - encrypted_oauth_params: Mapped[str] = mapped_column(sa.Text, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + encrypted_oauth_params: Mapped[str] = mapped_column(LongText, nullable=False) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) updated_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp(), server_onupdate=func.current_timestamp(), + init=False, ) @@ -127,14 +136,14 @@ class TriggerOAuthTenantClient(Base): sa.UniqueConstraint("tenant_id", "plugin_id", "provider", name="unique_trigger_oauth_tenant_client"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) # tenant id tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - plugin_id: Mapped[str] = mapped_column(String(512), nullable=False) + plugin_id: Mapped[str] = mapped_column(String(255), nullable=False) provider: Mapped[str] = mapped_column(String(255), nullable=False) - enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true")) + enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"), default=True) # oauth params of the trigger provider - encrypted_oauth_params: Mapped[str] = mapped_column(sa.Text, nullable=False) + encrypted_oauth_params: Mapped[str] = mapped_column(LongText, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) updated_at: Mapped[datetime] = mapped_column( DateTime, @@ -161,6 +170,7 @@ class WorkflowTriggerLog(Base): - workflow_id (uuid) Workflow ID - workflow_run_id (uuid) Optional - Associated workflow run ID when execution starts - root_node_id (string) Optional - Custom starting node ID for workflow execution + - trigger_metadata (text) Optional - Trigger metadata (JSON) - trigger_type (string) Type of trigger: webhook, schedule, plugin - trigger_data (text) Full trigger data including inputs (JSON) - inputs (text) Input parameters (JSON) @@ -189,36 +199,36 @@ class WorkflowTriggerLog(Base): sa.Index("workflow_trigger_log_workflow_id_idx", "workflow_id"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuidv7()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) workflow_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - workflow_run_id: Mapped[Optional[str]] = mapped_column(StringUUID, nullable=True) - root_node_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) - + workflow_run_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) + root_node_id: Mapped[str | None] = mapped_column(String(255), nullable=True) + trigger_metadata: Mapped[str] = mapped_column(LongText, nullable=False) trigger_type: Mapped[str] = mapped_column(EnumText(AppTriggerType, length=50), nullable=False) - trigger_data: Mapped[str] = mapped_column(sa.Text, nullable=False) # Full TriggerData as JSON - inputs: Mapped[str] = mapped_column(sa.Text, nullable=False) # Just inputs for easy viewing - outputs: Mapped[Optional[str]] = mapped_column(sa.Text, nullable=True) + trigger_data: Mapped[str] = mapped_column(LongText, nullable=False) # Full TriggerData as JSON + inputs: Mapped[str] = mapped_column(LongText, nullable=False) # Just inputs for easy viewing + outputs: Mapped[str | None] = mapped_column(LongText, nullable=True) status: Mapped[str] = mapped_column( EnumText(WorkflowTriggerStatus, length=50), nullable=False, default=WorkflowTriggerStatus.PENDING ) - error: Mapped[Optional[str]] = mapped_column(sa.Text, nullable=True) + error: Mapped[str | None] = mapped_column(LongText, nullable=True) queue_name: Mapped[str] = mapped_column(String(100), nullable=False) - celery_task_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + celery_task_id: Mapped[str | None] = mapped_column(String(255), nullable=True) retry_count: Mapped[int] = mapped_column(sa.Integer, nullable=False, default=0) - elapsed_time: Mapped[Optional[float]] = mapped_column(sa.Float, nullable=True) - total_tokens: Mapped[Optional[int]] = mapped_column(sa.Integer, nullable=True) + elapsed_time: Mapped[float | None] = mapped_column(sa.Float, nullable=True) + total_tokens: Mapped[int | None] = mapped_column(sa.Integer, nullable=True) created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) created_by: Mapped[str] = mapped_column(String(255), nullable=False) - triggered_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) - finished_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) + triggered_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) + finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) @property def created_by_account(self): @@ -227,7 +237,7 @@ class WorkflowTriggerLog(Base): @property def created_by_end_user(self): - from models.model import EndUser + from .model import EndUser created_by_role = CreatorUserRole(self.created_by_role) return db.session.get(EndUser, self.created_by) if created_by_role == CreatorUserRole.END_USER else None @@ -240,6 +250,8 @@ class WorkflowTriggerLog(Base): "app_id": self.app_id, "workflow_id": self.workflow_id, "workflow_run_id": self.workflow_run_id, + "root_node_id": self.root_node_id, + "trigger_metadata": json.loads(self.trigger_metadata) if self.trigger_metadata else None, "trigger_type": self.trigger_type, "trigger_data": json.loads(self.trigger_data), "inputs": json.loads(self.inputs), @@ -259,7 +271,7 @@ class WorkflowTriggerLog(Base): } -class WorkflowWebhookTrigger(Base): +class WorkflowWebhookTrigger(TypeBase): """ Workflow Webhook Trigger @@ -282,18 +294,21 @@ class WorkflowWebhookTrigger(Base): sa.UniqueConstraint("webhook_id", name="uniq_webhook_id"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuidv7()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()), init=False) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) node_id: Mapped[str] = mapped_column(String(64), nullable=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) webhook_id: Mapped[str] = mapped_column(String(24), nullable=False) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) updated_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp(), server_onupdate=func.current_timestamp(), + init=False, ) @cached_property @@ -311,7 +326,7 @@ class WorkflowWebhookTrigger(Base): return generate_webhook_trigger_endpoint(self.webhook_id, True) -class WorkflowPluginTrigger(Base): +class WorkflowPluginTrigger(TypeBase): """ Workflow Plugin Trigger @@ -336,23 +351,26 @@ class WorkflowPluginTrigger(Base): sa.UniqueConstraint("app_id", "node_id", name="uniq_app_node_subscription"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) node_id: Mapped[str] = mapped_column(String(64), nullable=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) provider_id: Mapped[str] = mapped_column(String(512), nullable=False) event_name: Mapped[str] = mapped_column(String(255), nullable=False) subscription_id: Mapped[str] = mapped_column(String(255), nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) updated_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp(), server_onupdate=func.current_timestamp(), + init=False, ) -class AppTrigger(Base): +class AppTrigger(TypeBase): """ App Trigger @@ -377,26 +395,29 @@ class AppTrigger(Base): sa.Index("app_trigger_tenant_app_idx", "tenant_id", "app_id"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuidv7()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - node_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=False) + node_id: Mapped[str | None] = mapped_column(String(64), nullable=False) trigger_type: Mapped[str] = mapped_column(EnumText(AppTriggerType, length=50), nullable=False) title: Mapped[str] = mapped_column(String(255), nullable=False) - provider_name: Mapped[str] = mapped_column(String(255), server_default="", nullable=True) + provider_name: Mapped[str] = mapped_column(String(255), server_default="", default="") # why it is nullable? status: Mapped[str] = mapped_column( EnumText(AppTriggerStatus, length=50), nullable=False, default=AppTriggerStatus.ENABLED ) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) updated_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, default=naive_utc_now(), server_onupdate=func.current_timestamp(), + init=False, ) -class WorkflowSchedulePlan(Base): +class WorkflowSchedulePlan(TypeBase): """ Workflow Schedule Configuration @@ -422,7 +443,7 @@ class WorkflowSchedulePlan(Base): sa.Index("workflow_schedule_plan_next_idx", "next_run_at"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuidv7()")) + id: Mapped[str] = mapped_column(StringUUID, primary_key=True, default=lambda: str(uuidv7()), init=False) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) node_id: Mapped[str] = mapped_column(String(64), nullable=False) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) @@ -432,10 +453,12 @@ class WorkflowSchedulePlan(Base): timezone: Mapped[str] = mapped_column(String(64), nullable=False) # Schedule control - next_run_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + next_run_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) updated_at: Mapped[datetime] = mapped_column( - DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False ) def to_dict(self) -> dict[str, Any]: diff --git a/api/models/types.py b/api/models/types.py index cc69ae4f57..75dc495fed 100644 --- a/api/models/types.py +++ b/api/models/types.py @@ -2,11 +2,15 @@ import enum import uuid from typing import Any, Generic, TypeVar -from sqlalchemy import CHAR, VARCHAR, TypeDecorator -from sqlalchemy.dialects.postgresql import UUID +import sqlalchemy as sa +from sqlalchemy import CHAR, TEXT, VARCHAR, LargeBinary, TypeDecorator +from sqlalchemy.dialects.mysql import LONGBLOB, LONGTEXT +from sqlalchemy.dialects.postgresql import BYTEA, JSONB, UUID from sqlalchemy.engine.interfaces import Dialect from sqlalchemy.sql.type_api import TypeEngine +from configs import dify_config + class StringUUID(TypeDecorator[uuid.UUID | str | None]): impl = CHAR @@ -34,6 +38,78 @@ class StringUUID(TypeDecorator[uuid.UUID | str | None]): return str(value) +class LongText(TypeDecorator[str | None]): + impl = TEXT + cache_ok = True + + def process_bind_param(self, value: str | None, dialect: Dialect) -> str | None: + if value is None: + return value + return value + + def load_dialect_impl(self, dialect: Dialect) -> TypeEngine[Any]: + if dialect.name == "postgresql": + return dialect.type_descriptor(TEXT()) + elif dialect.name == "mysql": + return dialect.type_descriptor(LONGTEXT()) + else: + return dialect.type_descriptor(TEXT()) + + def process_result_value(self, value: str | None, dialect: Dialect) -> str | None: + if value is None: + return value + return value + + +class BinaryData(TypeDecorator[bytes | None]): + impl = LargeBinary + cache_ok = True + + def process_bind_param(self, value: bytes | None, dialect: Dialect) -> bytes | None: + if value is None: + return value + return value + + def load_dialect_impl(self, dialect: Dialect) -> TypeEngine[Any]: + if dialect.name == "postgresql": + return dialect.type_descriptor(BYTEA()) + elif dialect.name == "mysql": + return dialect.type_descriptor(LONGBLOB()) + else: + return dialect.type_descriptor(LargeBinary()) + + def process_result_value(self, value: bytes | None, dialect: Dialect) -> bytes | None: + if value is None: + return value + return value + + +class AdjustedJSON(TypeDecorator[dict | list | None]): + impl = sa.JSON + cache_ok = True + + def __init__(self, astext_type=None): + self.astext_type = astext_type + super().__init__() + + def load_dialect_impl(self, dialect: Dialect) -> TypeEngine[Any]: + if dialect.name == "postgresql": + if self.astext_type: + return dialect.type_descriptor(JSONB(astext_type=self.astext_type)) + else: + return dialect.type_descriptor(JSONB()) + elif dialect.name == "mysql": + return dialect.type_descriptor(sa.JSON()) + else: + return dialect.type_descriptor(sa.JSON()) + + def process_bind_param(self, value: dict | list | None, dialect: Dialect) -> dict | list | None: + return value + + def process_result_value(self, value: dict | list | None, dialect: Dialect) -> dict | list | None: + return value + + _E = TypeVar("_E", bound=enum.StrEnum) @@ -77,3 +153,11 @@ class EnumText(TypeDecorator[_E | None], Generic[_E]): if x is None or y is None: return x is y return x == y + + +def adjusted_json_index(index_name, column_name): + index_name = index_name or f"{column_name}_idx" + if dify_config.DB_TYPE == "postgresql": + return sa.Index(index_name, column_name, postgresql_using="gin") + else: + return None diff --git a/api/models/web.py b/api/models/web.py index 7df5bd6e87..4f0bf7c7da 100644 --- a/api/models/web.py +++ b/api/models/web.py @@ -1,11 +1,11 @@ from datetime import datetime +from uuid import uuid4 import sqlalchemy as sa from sqlalchemy import DateTime, String, func from sqlalchemy.orm import Mapped, mapped_column -from models.base import TypeBase - +from .base import TypeBase from .engine import db from .model import Message from .types import StringUUID @@ -18,12 +18,10 @@ class SavedMessage(TypeBase): sa.Index("saved_message_message_idx", "app_id", "message_id", "created_by_role", "created_by"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) message_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - created_by_role: Mapped[str] = mapped_column( - String(255), nullable=False, server_default=sa.text("'end_user'::character varying") - ) + created_by_role: Mapped[str] = mapped_column(String(255), nullable=False, server_default=sa.text("'end_user'")) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column( DateTime, @@ -44,13 +42,13 @@ class PinnedConversation(TypeBase): sa.Index("pinned_conversation_conversation_idx", "app_id", "conversation_id", "created_by_role", "created_by"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()"), init=False) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) conversation_id: Mapped[str] = mapped_column(StringUUID) created_by_role: Mapped[str] = mapped_column( String(255), nullable=False, - server_default=sa.text("'end_user'::character varying"), + server_default=sa.text("'end_user'"), ) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column( diff --git a/api/models/workflow.py b/api/models/workflow.py index 1ef5330504..3ebc36bee3 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -7,13 +7,28 @@ from typing import TYPE_CHECKING, Any, Optional, Union, cast from uuid import uuid4 import sqlalchemy as sa -from sqlalchemy import DateTime, Select, exists, orm, select +from sqlalchemy import ( + DateTime, + Index, + PrimaryKeyConstraint, + Select, + String, + UniqueConstraint, + exists, + func, + orm, + select, +) +from sqlalchemy.orm import Mapped, declared_attr, mapped_column from core.file.constants import maybe_file_object from core.file.models import File from core.variables import utils as variable_utils from core.variables.variables import FloatVariable, IntegerVariable, StringVariable -from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID +from core.workflow.constants import ( + CONVERSATION_VARIABLE_NODE_ID, + SYSTEM_VARIABLE_NODE_ID, +) from core.workflow.enums import NodeType from extensions.ext_storage import Storage from factories.variable_factory import TypeMismatchError, build_segment_with_type @@ -23,10 +38,8 @@ from libs.uuid_utils import uuidv7 from ._workflow_exc import NodeNotFoundError, WorkflowDataError if TYPE_CHECKING: - from models.model import AppMode, UploadFile + from .model import AppMode, UploadFile -from sqlalchemy import Index, PrimaryKeyConstraint, String, UniqueConstraint, func -from sqlalchemy.orm import Mapped, declared_attr, mapped_column from constants import DEFAULT_FILE_NUMBER_LIMITS, HIDDEN_VALUE from core.helper import encrypter @@ -35,10 +48,10 @@ from factories import variable_factory from libs import helper from .account import Account -from .base import Base +from .base import Base, DefaultFieldsMixin, TypeBase from .engine import db from .enums import CreatorUserRole, DraftVariableType, ExecutionOffLoadType -from .types import EnumText, StringUUID +from .types import EnumText, LongText, StringUUID logger = logging.getLogger(__name__) @@ -73,7 +86,7 @@ class WorkflowType(StrEnum): :param app_mode: app mode :return: workflow type """ - from models.model import AppMode + from .model import AppMode app_mode = app_mode if isinstance(app_mode, AppMode) else AppMode.value_of(app_mode) return cls.WORKFLOW if app_mode == AppMode.WORKFLOW else cls.CHAT @@ -122,32 +135,31 @@ class Workflow(Base): sa.Index("workflow_version_idx", "tenant_id", "app_id", "version"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) type: Mapped[str] = mapped_column(String(255), nullable=False) version: Mapped[str] = mapped_column(String(255), nullable=False) - marked_name: Mapped[str] = mapped_column(default="", server_default="") - marked_comment: Mapped[str] = mapped_column(default="", server_default="") - graph: Mapped[str] = mapped_column(sa.Text) - _features: Mapped[str] = mapped_column("features", sa.TEXT) + marked_name: Mapped[str] = mapped_column(String(255), default="", server_default="") + marked_comment: Mapped[str] = mapped_column(String(255), default="", server_default="") + graph: Mapped[str] = mapped_column(LongText) + _features: Mapped[str] = mapped_column("features", LongText) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) updated_by: Mapped[str | None] = mapped_column(StringUUID) updated_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, - default=naive_utc_now(), - server_onupdate=func.current_timestamp(), - ) - _environment_variables: Mapped[str] = mapped_column( - "environment_variables", sa.Text, nullable=False, server_default="{}" + default=func.current_timestamp(), + server_default=func.current_timestamp(), + onupdate=func.current_timestamp(), ) + _environment_variables: Mapped[str] = mapped_column("environment_variables", LongText, nullable=False, default="{}") _conversation_variables: Mapped[str] = mapped_column( - "conversation_variables", sa.Text, nullable=False, server_default="{}" + "conversation_variables", LongText, nullable=False, default="{}" ) _rag_pipeline_variables: Mapped[str] = mapped_column( - "rag_pipeline_variables", db.Text, nullable=False, server_default="{}" + "rag_pipeline_variables", LongText, nullable=False, default="{}" ) VERSION_DRAFT = "draft" @@ -247,7 +259,9 @@ class Workflow(Base): return node_type @staticmethod - def get_enclosing_node_type_and_id(node_config: Mapping[str, Any]) -> tuple[NodeType, str] | None: + def get_enclosing_node_type_and_id( + node_config: Mapping[str, Any], + ) -> tuple[NodeType, str] | None: in_loop = node_config.get("isInLoop", False) in_iteration = node_config.get("isInIteration", False) if in_loop: @@ -354,7 +368,10 @@ class Workflow(Base): if "nodes" not in graph_dict: return [] - start_node = next((node for node in graph_dict["nodes"] if node["data"]["type"] == "start"), None) + start_node = next( + (node for node in graph_dict["nodes"] if node["data"]["type"] == "start"), + None, + ) if not start_node: return [] @@ -396,7 +413,7 @@ class Workflow(Base): For accurate checking, use a direct query with tenant_id, app_id, and version. """ - from models.tools import WorkflowToolProvider + from .tools import WorkflowToolProvider stmt = select( exists().where( @@ -407,7 +424,9 @@ class Workflow(Base): return db.session.execute(stmt).scalar_one() @property - def environment_variables(self) -> Sequence[StringVariable | IntegerVariable | FloatVariable | SecretVariable]: + def environment_variables( + self, + ) -> Sequence[StringVariable | IntegerVariable | FloatVariable | SecretVariable]: # TODO: find some way to init `self._environment_variables` when instance created. if self._environment_variables is None: self._environment_variables = "{}" @@ -424,7 +443,9 @@ class Workflow(Base): ] # decrypt secret variables value - def decrypt_func(var: Variable) -> StringVariable | IntegerVariable | FloatVariable | SecretVariable: + def decrypt_func( + var: Variable, + ) -> StringVariable | IntegerVariable | FloatVariable | SecretVariable: if isinstance(var, SecretVariable): return var.model_copy(update={"value": encrypter.decrypt_token(tenant_id=tenant_id, token=var.value)}) elif isinstance(var, (StringVariable, IntegerVariable, FloatVariable)): @@ -575,7 +596,7 @@ class WorkflowRun(Base): sa.Index("workflow_run_triggerd_from_idx", "tenant_id", "app_id", "triggered_from"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id: Mapped[str] = mapped_column(StringUUID) app_id: Mapped[str] = mapped_column(StringUUID) @@ -583,11 +604,11 @@ class WorkflowRun(Base): type: Mapped[str] = mapped_column(String(255)) triggered_from: Mapped[str] = mapped_column(String(255)) version: Mapped[str] = mapped_column(String(255)) - graph: Mapped[str | None] = mapped_column(sa.Text) - inputs: Mapped[str | None] = mapped_column(sa.Text) + graph: Mapped[str | None] = mapped_column(LongText) + inputs: Mapped[str | None] = mapped_column(LongText) status: Mapped[str] = mapped_column(String(255)) # running, succeeded, failed, stopped, partial-succeeded - outputs: Mapped[str | None] = mapped_column(sa.Text, default="{}") - error: Mapped[str | None] = mapped_column(sa.Text) + outputs: Mapped[str | None] = mapped_column(LongText, default="{}") + error: Mapped[str | None] = mapped_column(LongText) elapsed_time: Mapped[float] = mapped_column(sa.Float, nullable=False, server_default=sa.text("0")) total_tokens: Mapped[int] = mapped_column(sa.BigInteger, server_default=sa.text("0")) total_steps: Mapped[int] = mapped_column(sa.Integer, server_default=sa.text("0"), nullable=True) @@ -597,6 +618,15 @@ class WorkflowRun(Base): finished_at: Mapped[datetime | None] = mapped_column(DateTime) exceptions_count: Mapped[int] = mapped_column(sa.Integer, server_default=sa.text("0"), nullable=True) + pause: Mapped[Optional["WorkflowPause"]] = orm.relationship( + "WorkflowPause", + primaryjoin="WorkflowRun.id == foreign(WorkflowPause.workflow_run_id)", + uselist=False, + # require explicit preloading. + lazy="raise", + back_populates="workflow_run", + ) + @property def created_by_account(self): created_by_role = CreatorUserRole(self.created_by_role) @@ -604,7 +634,7 @@ class WorkflowRun(Base): @property def created_by_end_user(self): - from models.model import EndUser + from .model import EndUser created_by_role = CreatorUserRole(self.created_by_role) return db.session.get(EndUser, self.created_by) if created_by_role == CreatorUserRole.END_USER else None @@ -623,7 +653,7 @@ class WorkflowRun(Base): @property def message(self): - from models.model import Message + from .model import Message return ( db.session.query(Message).where(Message.app_id == self.app_id, Message.workflow_run_id == self.id).first() @@ -786,7 +816,7 @@ class WorkflowNodeExecutionModel(Base): # This model is expected to have `offlo ), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id: Mapped[str] = mapped_column(StringUUID) app_id: Mapped[str] = mapped_column(StringUUID) workflow_id: Mapped[str] = mapped_column(StringUUID) @@ -798,13 +828,13 @@ class WorkflowNodeExecutionModel(Base): # This model is expected to have `offlo node_id: Mapped[str] = mapped_column(String(255)) node_type: Mapped[str] = mapped_column(String(255)) title: Mapped[str] = mapped_column(String(255)) - inputs: Mapped[str | None] = mapped_column(sa.Text) - process_data: Mapped[str | None] = mapped_column(sa.Text) - outputs: Mapped[str | None] = mapped_column(sa.Text) + inputs: Mapped[str | None] = mapped_column(LongText) + process_data: Mapped[str | None] = mapped_column(LongText) + outputs: Mapped[str | None] = mapped_column(LongText) status: Mapped[str] = mapped_column(String(255)) - error: Mapped[str | None] = mapped_column(sa.Text) + error: Mapped[str | None] = mapped_column(LongText) elapsed_time: Mapped[float] = mapped_column(sa.Float, server_default=sa.text("0")) - execution_metadata: Mapped[str | None] = mapped_column(sa.Text) + execution_metadata: Mapped[str | None] = mapped_column(LongText) created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.current_timestamp()) created_by_role: Mapped[str] = mapped_column(String(255)) created_by: Mapped[str] = mapped_column(StringUUID) @@ -844,7 +874,7 @@ class WorkflowNodeExecutionModel(Base): # This model is expected to have `offlo @property def created_by_end_user(self): - from models.model import EndUser + from .model import EndUser created_by_role = CreatorUserRole(self.created_by_role) # TODO(-LAN-): Avoid using db.session.get() here. @@ -875,8 +905,6 @@ class WorkflowNodeExecutionModel(Base): # This model is expected to have `offlo extras: dict[str, Any] = {} if self.execution_metadata_dict: - from core.workflow.nodes import NodeType - if self.node_type == NodeType.TOOL and "tool_info" in self.execution_metadata_dict: tool_info: dict[str, Any] = self.execution_metadata_dict["tool_info"] extras["icon"] = ToolManager.get_tool_icon( @@ -961,7 +989,7 @@ class WorkflowNodeExecutionOffload(Base): id: Mapped[str] = mapped_column( StringUUID, primary_key=True, - server_default=sa.text("uuidv7()"), + default=lambda: str(uuid4()), ) created_at: Mapped[datetime] = mapped_column( @@ -1034,7 +1062,7 @@ class WorkflowAppLogCreatedFrom(StrEnum): raise ValueError(f"invalid workflow app log created from value {value}") -class WorkflowAppLog(Base): +class WorkflowAppLog(TypeBase): """ Workflow App execution log, excluding workflow debugging records. @@ -1070,7 +1098,7 @@ class WorkflowAppLog(Base): sa.Index("workflow_app_log_workflow_run_id_idx", "workflow_run_id"), ) - id: Mapped[str] = mapped_column(StringUUID, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False) tenant_id: Mapped[str] = mapped_column(StringUUID) app_id: Mapped[str] = mapped_column(StringUUID) workflow_id: Mapped[str] = mapped_column(StringUUID, nullable=False) @@ -1078,11 +1106,22 @@ class WorkflowAppLog(Base): created_from: Mapped[str] = mapped_column(String(255), nullable=False) created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.current_timestamp(), init=False + ) @property def workflow_run(self): - return db.session.get(WorkflowRun, self.workflow_run_id) + if self.workflow_run_id: + from sqlalchemy.orm import sessionmaker + + from repositories.factory import DifyAPIRepositoryFactory + + session_maker = sessionmaker(bind=db.engine, expire_on_commit=False) + repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker) + return repo.get_workflow_run_by_id_without_tenant(run_id=self.workflow_run_id) + + return None @property def created_by_account(self): @@ -1091,7 +1130,7 @@ class WorkflowAppLog(Base): @property def created_by_end_user(self): - from models.model import EndUser + from .model import EndUser created_by_role = CreatorUserRole(self.created_by_role) return db.session.get(EndUser, self.created_by) if created_by_role == CreatorUserRole.END_USER else None @@ -1110,26 +1149,20 @@ class WorkflowAppLog(Base): } -class ConversationVariable(Base): +class ConversationVariable(TypeBase): __tablename__ = "workflow_conversation_variables" id: Mapped[str] = mapped_column(StringUUID, primary_key=True) conversation_id: Mapped[str] = mapped_column(StringUUID, nullable=False, primary_key=True, index=True) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False, index=True) - data: Mapped[str] = mapped_column(sa.Text, nullable=False) + data: Mapped[str] = mapped_column(LongText, nullable=False) created_at: Mapped[datetime] = mapped_column( - DateTime, nullable=False, server_default=func.current_timestamp(), index=True + DateTime, nullable=False, server_default=func.current_timestamp(), index=True, init=False ) updated_at: Mapped[datetime] = mapped_column( - DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() + DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), init=False ) - def __init__(self, *, id: str, app_id: str, conversation_id: str, data: str): - self.id = id - self.app_id = app_id - self.conversation_id = conversation_id - self.data = data - @classmethod def from_variable(cls, *, app_id: str, conversation_id: str, variable: Variable) -> "ConversationVariable": obj = cls( @@ -1149,10 +1182,6 @@ class ConversationVariable(Base): _EDITABLE_SYSTEM_VARIABLE = frozenset(["query", "files"]) -def _naive_utc_datetime(): - return naive_utc_now() - - class WorkflowDraftVariable(Base): """`WorkflowDraftVariable` record variables and outputs generated during debugging workflow or chatflow. @@ -1181,19 +1210,19 @@ class WorkflowDraftVariable(Base): __allow_unmapped__ = True # id is the unique identifier of a draft variable. - id: Mapped[str] = mapped_column(StringUUID, primary_key=True, server_default=sa.text("uuid_generate_v4()")) + id: Mapped[str] = mapped_column(StringUUID, primary_key=True, default=lambda: str(uuid4())) created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, - default=_naive_utc_datetime, + default=naive_utc_now, server_default=func.current_timestamp(), ) updated_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, - default=_naive_utc_datetime, + default=naive_utc_now, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), ) @@ -1247,7 +1276,7 @@ class WorkflowDraftVariable(Base): # The variable's value serialized as a JSON string # # If the variable is offloaded, `value` contains a truncated version, not the full original value. - value: Mapped[str] = mapped_column(sa.Text, nullable=False, name="value") + value: Mapped[str] = mapped_column(LongText, nullable=False, name="value") # Controls whether the variable should be displayed in the variable inspection panel visible: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, default=True) @@ -1460,8 +1489,8 @@ class WorkflowDraftVariable(Base): file_id: str | None = None, ) -> "WorkflowDraftVariable": variable = WorkflowDraftVariable() - variable.created_at = _naive_utc_datetime() - variable.updated_at = _naive_utc_datetime() + variable.created_at = naive_utc_now() + variable.updated_at = naive_utc_now() variable.description = description variable.app_id = app_id variable.node_id = node_id @@ -1559,14 +1588,13 @@ class WorkflowDraftVariableFile(Base): id: Mapped[str] = mapped_column( StringUUID, primary_key=True, - default=uuidv7, - server_default=sa.text("uuidv7()"), + default=lambda: str(uuidv7()), ) created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, - default=_naive_utc_datetime, + default=naive_utc_now, server_default=func.current_timestamp(), ) @@ -1631,3 +1659,68 @@ class WorkflowDraftVariableFile(Base): def is_system_variable_editable(name: str) -> bool: return name in _EDITABLE_SYSTEM_VARIABLE + + +class WorkflowPause(DefaultFieldsMixin, Base): + """ + WorkflowPause records the paused state and related metadata for a specific workflow run. + + Each `WorkflowRun` can have zero or one associated `WorkflowPause`, depending on its execution status. + If a `WorkflowRun` is in the `PAUSED` state, there must be a corresponding `WorkflowPause` + that has not yet been resumed. + Otherwise, there should be no active (non-resumed) `WorkflowPause` linked to that run. + + This model captures the execution context required to resume workflow processing at a later time. + """ + + __tablename__ = "workflow_pauses" + __table_args__ = ( + # Design Note: + # Instead of adding a `pause_id` field to the `WorkflowRun` model—which would require a migration + # on a potentially large table—we reference `WorkflowRun` from `WorkflowPause` and enforce a unique + # constraint on `workflow_run_id` to guarantee a one-to-one relationship. + UniqueConstraint("workflow_run_id"), + ) + + # `workflow_id` represents the unique identifier of the workflow associated with this pause. + # It corresponds to the `id` field in the `Workflow` model. + # + # Since an application can have multiple versions of a workflow, each with its own unique ID, + # the `app_id` alone is insufficient to determine which workflow version should be loaded + # when resuming a suspended workflow. + workflow_id: Mapped[str] = mapped_column( + StringUUID, + nullable=False, + ) + + # `workflow_run_id` represents the identifier of the execution of workflow, + # correspond to the `id` field of `WorkflowRun`. + workflow_run_id: Mapped[str] = mapped_column( + StringUUID, + nullable=False, + ) + + # `resumed_at` records the timestamp when the suspended workflow was resumed. + # It is set to `NULL` if the workflow has not been resumed. + # + # NOTE: Resuming a suspended WorkflowPause does not delete the record immediately. + # It only set `resumed_at` to a non-null value. + resumed_at: Mapped[datetime | None] = mapped_column( + sa.DateTime, + nullable=True, + ) + + # state_object_key stores the object key referencing the serialized runtime state + # of the `GraphEngine`. This object captures the complete execution context of the + # workflow at the moment it was paused, enabling accurate resumption. + state_object_key: Mapped[str] = mapped_column(String(length=255), nullable=False) + + # Relationship to WorkflowRun + workflow_run: Mapped["WorkflowRun"] = orm.relationship( + foreign_keys=[workflow_run_id], + # require explicit preloading. + lazy="raise", + uselist=False, + primaryjoin="WorkflowPause.workflow_run_id == WorkflowRun.id", + back_populates="pause", + ) diff --git a/api/pyproject.toml b/api/pyproject.toml index d87ac625bf..1cf7d719ea 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dify-api" -version = "1.9.2" +version = "1.10.0" requires-python = ">=3.11,<3.13" dependencies = [ @@ -37,6 +37,7 @@ dependencies = [ "numpy~=1.26.4", "openpyxl~=3.1.5", "opik~=1.8.72", + "litellm==1.77.1", # Pinned to avoid madoka dependency issue "opentelemetry-api==1.27.0", "opentelemetry-distro==0.48b0", "opentelemetry-exporter-otlp==1.27.0", @@ -74,11 +75,10 @@ dependencies = [ "resend~=2.9.0", "sentry-sdk[flask]~=2.28.0", "sqlalchemy~=2.0.29", - "starlette==0.47.2", + "starlette==0.49.1", "tiktoken~=0.9.0", "transformers~=4.56.1", "unstructured[docx,epub,md,ppt,pptx]~=0.16.1", - "weave~=0.51.0", "yarl~=1.18.3", "webvtt-py~=0.5.1", "sseclient-py~=1.8.0", @@ -89,6 +89,7 @@ dependencies = [ "croniter>=6.0.0", "weaviate-client==4.17.0", "apscheduler>=3.11.0", + "weave>=0.52.16", ] # Before adding new dependency, consider place it in # alphabet order (a-z) and suitable group. @@ -211,9 +212,9 @@ vdb = [ "pgvector==0.2.5", "pymilvus~=2.5.0", "pymochow==2.2.9", - "pyobvector~=0.2.15", + "pyobvector~=0.2.17", "qdrant-client==1.9.0", - "tablestore==6.2.0", + "tablestore==6.3.7", "tcvectordb~=1.6.4", "tidb-vector==0.0.9", "upstash-vector==0.6.0", diff --git a/api/repositories/api_workflow_run_repository.py b/api/repositories/api_workflow_run_repository.py index 72de9fed31..21fd57cd22 100644 --- a/api/repositories/api_workflow_run_repository.py +++ b/api/repositories/api_workflow_run_repository.py @@ -28,7 +28,7 @@ Example: runs = repo.get_paginated_workflow_runs( tenant_id="tenant-123", app_id="app-456", - triggered_from="debugging", + triggered_from=WorkflowRunTriggeredFrom.DEBUGGING, limit=20 ) ``` @@ -38,9 +38,17 @@ from collections.abc import Sequence from datetime import datetime from typing import Protocol +from core.workflow.entities.workflow_pause import WorkflowPauseEntity from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository from libs.infinite_scroll_pagination import InfiniteScrollPagination +from models.enums import WorkflowRunTriggeredFrom from models.workflow import WorkflowRun +from repositories.types import ( + AverageInteractionStats, + DailyRunsStats, + DailyTerminalsStats, + DailyTokenCostStats, +) class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol): @@ -56,7 +64,7 @@ class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol): self, tenant_id: str, app_id: str, - triggered_from: str, + triggered_from: WorkflowRunTriggeredFrom | Sequence[WorkflowRunTriggeredFrom], limit: int = 20, last_id: str | None = None, status: str | None = None, @@ -71,7 +79,7 @@ class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol): Args: tenant_id: Tenant identifier for multi-tenant isolation app_id: Application identifier - triggered_from: Filter by trigger source (e.g., "debugging", "app-run") + triggered_from: Filter by trigger source(s) (e.g., "debugging", "app-run", or list of values) limit: Maximum number of records to return (default: 20) last_id: Cursor for pagination - ID of the last record from previous page status: Optional filter by status (e.g., "running", "succeeded", "failed") @@ -109,6 +117,31 @@ class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol): """ ... + def get_workflow_run_by_id_without_tenant( + self, + run_id: str, + ) -> WorkflowRun | None: + """ + Get a specific workflow run by ID without tenant/app context. + + Retrieves a single workflow run using only the run ID, without + requiring tenant_id or app_id. This method is intended for internal + system operations like tracing and monitoring where the tenant context + is not available upfront. + + Args: + run_id: Workflow run identifier + + Returns: + WorkflowRun object if found, None otherwise + + Note: + This method bypasses tenant isolation checks and should only be used + in trusted system contexts like ops trace collection. For user-facing + operations, use get_workflow_run_by_id() with proper tenant isolation. + """ + ... + def get_workflow_runs_count( self, tenant_id: str, @@ -218,3 +251,229 @@ class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol): and ensure proper data retention policies are followed. """ ... + + def create_workflow_pause( + self, + workflow_run_id: str, + state_owner_user_id: str, + state: str, + ) -> WorkflowPauseEntity: + """ + Create a new workflow pause state. + + Creates a pause state for a workflow run, storing the current execution + state and marking the workflow as paused. This is used when a workflow + needs to be suspended and later resumed. + + Args: + workflow_run_id: Identifier of the workflow run to pause + state_owner_user_id: User ID who owns the pause state for file storage + state: Serialized workflow execution state (JSON string) + + Returns: + WorkflowPauseEntity representing the created pause state + + Raises: + ValueError: If workflow_run_id is invalid or workflow run doesn't exist + RuntimeError: If workflow is already paused or in invalid state + """ + # NOTE: we may get rid of the `state_owner_user_id` in parameter list. + # However, removing it would require an extra for `Workflow` model + # while creating pause. + ... + + def resume_workflow_pause( + self, + workflow_run_id: str, + pause_entity: WorkflowPauseEntity, + ) -> WorkflowPauseEntity: + """ + Resume a paused workflow. + + Marks a paused workflow as resumed, set the `resumed_at` field of WorkflowPauseEntity + and returning the workflow to running status. Returns the pause entity + that was resumed. + + The returned `WorkflowPauseEntity` model has `resumed_at` set. + + NOTE: this method does not delete the correspond `WorkflowPauseEntity` record and associated states. + It's the callers responsibility to clear the correspond state with `delete_workflow_pause`. + + Args: + workflow_run_id: Identifier of the workflow run to resume + pause_entity: The pause entity to resume + + Returns: + WorkflowPauseEntity representing the resumed pause state + + Raises: + ValueError: If workflow_run_id is invalid + RuntimeError: If workflow is not paused or already resumed + """ + ... + + def delete_workflow_pause( + self, + pause_entity: WorkflowPauseEntity, + ) -> None: + """ + Delete a workflow pause state. + + Permanently removes the pause state for a workflow run, including + the stored state file. Used for cleanup operations when a paused + workflow is no longer needed. + + Args: + pause_entity: The pause entity to delete + + Raises: + ValueError: If pause_entity is invalid + RuntimeError: If workflow is not paused + + Note: + This operation is irreversible. The stored workflow state will be + permanently deleted along with the pause record. + """ + ... + + def prune_pauses( + self, + expiration: datetime, + resumption_expiration: datetime, + limit: int | None = None, + ) -> Sequence[str]: + """ + Clean up expired and old pause states. + + Removes pause states that have expired (created before expiration time) + and pause states that were resumed more than resumption_duration ago. + This is used for maintenance and cleanup operations. + + Args: + expiration: Remove pause states created before this time + resumption_expiration: Remove pause states resumed before this time + limit: maximum number of records deleted in one call + + Returns: + a list of ids for pause records that were pruned + + Raises: + ValueError: If parameters are invalid + """ + ... + + def get_daily_runs_statistics( + self, + tenant_id: str, + app_id: str, + triggered_from: str, + start_date: datetime | None = None, + end_date: datetime | None = None, + timezone: str = "UTC", + ) -> list[DailyRunsStats]: + """ + Get daily runs statistics. + + Retrieves daily workflow runs count grouped by date for a specific app + and trigger source. Used for workflow statistics dashboard. + + Args: + tenant_id: Tenant identifier for multi-tenant isolation + app_id: Application identifier + triggered_from: Filter by trigger source (e.g., "app-run") + start_date: Optional start date filter + end_date: Optional end date filter + timezone: Timezone for date grouping (default: "UTC") + + Returns: + List of dictionaries containing date and runs count: + [{"date": "2024-01-01", "runs": 10}, ...] + """ + ... + + def get_daily_terminals_statistics( + self, + tenant_id: str, + app_id: str, + triggered_from: str, + start_date: datetime | None = None, + end_date: datetime | None = None, + timezone: str = "UTC", + ) -> list[DailyTerminalsStats]: + """ + Get daily terminals statistics. + + Retrieves daily unique terminal count grouped by date for a specific app + and trigger source. Used for workflow statistics dashboard. + + Args: + tenant_id: Tenant identifier for multi-tenant isolation + app_id: Application identifier + triggered_from: Filter by trigger source (e.g., "app-run") + start_date: Optional start date filter + end_date: Optional end date filter + timezone: Timezone for date grouping (default: "UTC") + + Returns: + List of dictionaries containing date and terminal count: + [{"date": "2024-01-01", "terminal_count": 5}, ...] + """ + ... + + def get_daily_token_cost_statistics( + self, + tenant_id: str, + app_id: str, + triggered_from: str, + start_date: datetime | None = None, + end_date: datetime | None = None, + timezone: str = "UTC", + ) -> list[DailyTokenCostStats]: + """ + Get daily token cost statistics. + + Retrieves daily total token count grouped by date for a specific app + and trigger source. Used for workflow statistics dashboard. + + Args: + tenant_id: Tenant identifier for multi-tenant isolation + app_id: Application identifier + triggered_from: Filter by trigger source (e.g., "app-run") + start_date: Optional start date filter + end_date: Optional end date filter + timezone: Timezone for date grouping (default: "UTC") + + Returns: + List of dictionaries containing date and token count: + [{"date": "2024-01-01", "token_count": 1000}, ...] + """ + ... + + def get_average_app_interaction_statistics( + self, + tenant_id: str, + app_id: str, + triggered_from: str, + start_date: datetime | None = None, + end_date: datetime | None = None, + timezone: str = "UTC", + ) -> list[AverageInteractionStats]: + """ + Get average app interaction statistics. + + Retrieves daily average interactions per user grouped by date for a specific app + and trigger source. Used for workflow statistics dashboard. + + Args: + tenant_id: Tenant identifier for multi-tenant isolation + app_id: Application identifier + triggered_from: Filter by trigger source (e.g., "app-run") + start_date: Optional start date filter + end_date: Optional end date filter + timezone: Timezone for date grouping (default: "UTC") + + Returns: + List of dictionaries containing date and average interactions: + [{"date": "2024-01-01", "interactions": 2.5}, ...] + """ + ... diff --git a/api/repositories/factory.py b/api/repositories/factory.py index 96f9f886a4..8e098a7059 100644 --- a/api/repositories/factory.py +++ b/api/repositories/factory.py @@ -5,7 +5,7 @@ This factory is specifically designed for DifyAPI repositories that handle service-layer operations with dependency injection patterns. """ -from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import Session, sessionmaker from configs import dify_config from core.repositories import DifyCoreRepositoryFactory, RepositoryImportError @@ -25,7 +25,7 @@ class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory): @classmethod def create_api_workflow_node_execution_repository( - cls, session_maker: sessionmaker + cls, session_maker: sessionmaker[Session] ) -> DifyAPIWorkflowNodeExecutionRepository: """ Create a DifyAPIWorkflowNodeExecutionRepository instance based on configuration. @@ -55,7 +55,7 @@ class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory): ) from e @classmethod - def create_api_workflow_run_repository(cls, session_maker: sessionmaker) -> APIWorkflowRunRepository: + def create_api_workflow_run_repository(cls, session_maker: sessionmaker[Session]) -> APIWorkflowRunRepository: """ Create an APIWorkflowRunRepository instance based on configuration. diff --git a/api/repositories/sqlalchemy_api_workflow_run_repository.py b/api/repositories/sqlalchemy_api_workflow_run_repository.py index 68affb59f3..eb2a32d764 100644 --- a/api/repositories/sqlalchemy_api_workflow_run_repository.py +++ b/api/repositories/sqlalchemy_api_workflow_run_repository.py @@ -20,22 +20,43 @@ Implementation Notes: """ import logging +import uuid from collections.abc import Sequence from datetime import datetime -from typing import cast +from decimal import Decimal +from typing import Any, cast -from sqlalchemy import delete, func, select +import sqlalchemy as sa +from sqlalchemy import and_, delete, func, null, or_, select from sqlalchemy.engine import CursorResult -from sqlalchemy.orm import Session, sessionmaker +from sqlalchemy.orm import Session, selectinload, sessionmaker +from core.workflow.entities.workflow_pause import WorkflowPauseEntity +from core.workflow.enums import WorkflowExecutionStatus +from extensions.ext_storage import storage +from libs.datetime_utils import naive_utc_now +from libs.helper import convert_datetime_to_date from libs.infinite_scroll_pagination import InfiniteScrollPagination from libs.time_parser import get_time_threshold +from libs.uuid_utils import uuidv7 +from models.enums import WorkflowRunTriggeredFrom +from models.workflow import WorkflowPause as WorkflowPauseModel from models.workflow import WorkflowRun from repositories.api_workflow_run_repository import APIWorkflowRunRepository +from repositories.types import ( + AverageInteractionStats, + DailyRunsStats, + DailyTerminalsStats, + DailyTokenCostStats, +) logger = logging.getLogger(__name__) +class _WorkflowRunError(Exception): + pass + + class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository): """ SQLAlchemy implementation of APIWorkflowRunRepository. @@ -61,7 +82,7 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository): self, tenant_id: str, app_id: str, - triggered_from: str, + triggered_from: WorkflowRunTriggeredFrom | Sequence[WorkflowRunTriggeredFrom], limit: int = 20, last_id: str | None = None, status: str | None = None, @@ -78,9 +99,14 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository): base_stmt = select(WorkflowRun).where( WorkflowRun.tenant_id == tenant_id, WorkflowRun.app_id == app_id, - WorkflowRun.triggered_from == triggered_from, ) + # Handle triggered_from values + if isinstance(triggered_from, WorkflowRunTriggeredFrom): + triggered_from = [triggered_from] + if triggered_from: + base_stmt = base_stmt.where(WorkflowRun.triggered_from.in_(triggered_from)) + # Add optional status filter if status: base_stmt = base_stmt.where(WorkflowRun.status == status) @@ -126,6 +152,17 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository): ) return session.scalar(stmt) + def get_workflow_run_by_id_without_tenant( + self, + run_id: str, + ) -> WorkflowRun | None: + """ + Get a specific workflow run by ID without tenant/app context. + """ + with self._session_maker() as session: + stmt = select(WorkflowRun).where(WorkflowRun.id == run_id) + return session.scalar(stmt) + def get_workflow_runs_count( self, tenant_id: str, @@ -275,3 +312,558 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository): logger.info("Total deleted %s workflow runs for app %s", total_deleted, app_id) return total_deleted + + def create_workflow_pause( + self, + workflow_run_id: str, + state_owner_user_id: str, + state: str, + ) -> WorkflowPauseEntity: + """ + Create a new workflow pause state. + + Creates a pause state for a workflow run, storing the current execution + state and marking the workflow as paused. This is used when a workflow + needs to be suspended and later resumed. + + Args: + workflow_run_id: Identifier of the workflow run to pause + state_owner_user_id: User ID who owns the pause state for file storage + state: Serialized workflow execution state (JSON string) + + Returns: + RepositoryWorkflowPauseEntity representing the created pause state + + Raises: + ValueError: If workflow_run_id is invalid or workflow run doesn't exist + RuntimeError: If workflow is already paused or in invalid state + """ + previous_pause_model_query = select(WorkflowPauseModel).where( + WorkflowPauseModel.workflow_run_id == workflow_run_id + ) + with self._session_maker() as session, session.begin(): + # Get the workflow run + workflow_run = session.get(WorkflowRun, workflow_run_id) + if workflow_run is None: + raise ValueError(f"WorkflowRun not found: {workflow_run_id}") + + # Check if workflow is in RUNNING status + if workflow_run.status != WorkflowExecutionStatus.RUNNING: + raise _WorkflowRunError( + f"Only WorkflowRun with RUNNING status can be paused, " + f"workflow_run_id={workflow_run_id}, current_status={workflow_run.status}" + ) + # + previous_pause = session.scalars(previous_pause_model_query).first() + if previous_pause: + self._delete_pause_model(session, previous_pause) + # we need to flush here to ensure that the old one is actually deleted. + session.flush() + + state_obj_key = f"workflow-state-{uuid.uuid4()}.json" + storage.save(state_obj_key, state.encode()) + # Upload the state file + + # Create the pause record + pause_model = WorkflowPauseModel() + pause_model.id = str(uuidv7()) + pause_model.workflow_id = workflow_run.workflow_id + pause_model.workflow_run_id = workflow_run.id + pause_model.state_object_key = state_obj_key + pause_model.created_at = naive_utc_now() + + # Update workflow run status + workflow_run.status = WorkflowExecutionStatus.PAUSED + + # Save everything in a transaction + session.add(pause_model) + session.add(workflow_run) + + logger.info("Created workflow pause %s for workflow run %s", pause_model.id, workflow_run_id) + + return _PrivateWorkflowPauseEntity.from_models(pause_model) + + def get_workflow_pause( + self, + workflow_run_id: str, + ) -> WorkflowPauseEntity | None: + """ + Get an existing workflow pause state. + + Retrieves the pause state for a specific workflow run if it exists. + Used to check if a workflow is paused and to retrieve its saved state. + + Args: + workflow_run_id: Identifier of the workflow run to get pause state for + + Returns: + RepositoryWorkflowPauseEntity if pause state exists, None otherwise + + Raises: + ValueError: If workflow_run_id is invalid + """ + with self._session_maker() as session: + # Query workflow run with pause and state file + stmt = select(WorkflowRun).options(selectinload(WorkflowRun.pause)).where(WorkflowRun.id == workflow_run_id) + workflow_run = session.scalar(stmt) + + if workflow_run is None: + raise ValueError(f"WorkflowRun not found: {workflow_run_id}") + + pause_model = workflow_run.pause + if pause_model is None: + return None + + return _PrivateWorkflowPauseEntity.from_models(pause_model) + + def resume_workflow_pause( + self, + workflow_run_id: str, + pause_entity: WorkflowPauseEntity, + ) -> WorkflowPauseEntity: + """ + Resume a paused workflow. + + Marks a paused workflow as resumed, clearing the pause state and + returning the workflow to running status. Returns the pause entity + that was resumed. + + Args: + workflow_run_id: Identifier of the workflow run to resume + pause_entity: The pause entity to resume + + Returns: + RepositoryWorkflowPauseEntity representing the resumed pause state + + Raises: + ValueError: If workflow_run_id is invalid + RuntimeError: If workflow is not paused or already resumed + """ + with self._session_maker() as session, session.begin(): + # Get the workflow run with pause + stmt = select(WorkflowRun).options(selectinload(WorkflowRun.pause)).where(WorkflowRun.id == workflow_run_id) + workflow_run = session.scalar(stmt) + + if workflow_run is None: + raise ValueError(f"WorkflowRun not found: {workflow_run_id}") + + if workflow_run.status != WorkflowExecutionStatus.PAUSED: + raise _WorkflowRunError( + f"WorkflowRun is not in PAUSED status, workflow_run_id={workflow_run_id}, " + f"current_status={workflow_run.status}" + ) + pause_model = workflow_run.pause + if pause_model is None: + raise _WorkflowRunError(f"No pause state found for workflow run: {workflow_run_id}") + + if pause_model.id != pause_entity.id: + raise _WorkflowRunError( + "different id in WorkflowPause and WorkflowPauseEntity, " + f"WorkflowPause.id={pause_model.id}, " + f"WorkflowPauseEntity.id={pause_entity.id}" + ) + + if pause_model.resumed_at is not None: + raise _WorkflowRunError(f"Cannot resume an already resumed pause, pause_id={pause_model.id}") + + # Mark as resumed + pause_model.resumed_at = naive_utc_now() + workflow_run.pause_id = None # type: ignore + workflow_run.status = WorkflowExecutionStatus.RUNNING + + session.add(pause_model) + session.add(workflow_run) + + logger.info("Resumed workflow pause %s for workflow run %s", pause_model.id, workflow_run_id) + + return _PrivateWorkflowPauseEntity.from_models(pause_model) + + def delete_workflow_pause( + self, + pause_entity: WorkflowPauseEntity, + ) -> None: + """ + Delete a workflow pause state. + + Permanently removes the pause state for a workflow run, including + the stored state file. Used for cleanup operations when a paused + workflow is no longer needed. + + Args: + pause_entity: The pause entity to delete + + Raises: + ValueError: If pause_entity is invalid + _WorkflowRunError: If workflow is not paused + + Note: + This operation is irreversible. The stored workflow state will be + permanently deleted along with the pause record. + """ + with self._session_maker() as session, session.begin(): + # Get the pause model by ID + pause_model = session.get(WorkflowPauseModel, pause_entity.id) + if pause_model is None: + raise _WorkflowRunError(f"WorkflowPause not found: {pause_entity.id}") + self._delete_pause_model(session, pause_model) + + @staticmethod + def _delete_pause_model(session: Session, pause_model: WorkflowPauseModel): + storage.delete(pause_model.state_object_key) + + # Delete the pause record + session.delete(pause_model) + + logger.info("Deleted workflow pause %s for workflow run %s", pause_model.id, pause_model.workflow_run_id) + + def prune_pauses( + self, + expiration: datetime, + resumption_expiration: datetime, + limit: int | None = None, + ) -> Sequence[str]: + """ + Clean up expired and old pause states. + + Removes pause states that have expired (created before expiration time) + and pause states that were resumed more than resumption_duration ago. + This is used for maintenance and cleanup operations. + + Args: + expiration: Remove pause states created before this time + resumption_expiration: Remove pause states resumed before this time + limit: maximum number of records deleted in one call + + Returns: + a list of ids for pause records that were pruned + + Raises: + ValueError: If parameters are invalid + """ + _limit: int = limit or 1000 + pruned_record_ids: list[str] = [] + cond = or_( + WorkflowPauseModel.created_at < expiration, + and_( + WorkflowPauseModel.resumed_at.is_not(null()), + WorkflowPauseModel.resumed_at < resumption_expiration, + ), + ) + # First, collect pause records to delete with their state files + # Expired pauses (created before expiration time) + stmt = select(WorkflowPauseModel).where(cond).limit(_limit) + + with self._session_maker(expire_on_commit=False) as session: + # Old resumed pauses (resumed more than resumption_duration ago) + + # Get all records to delete + pauses_to_delete = session.scalars(stmt).all() + + # Delete state files from storage + for pause in pauses_to_delete: + with self._session_maker(expire_on_commit=False) as session, session.begin(): + # todo: this issues a separate query for each WorkflowPauseModel record. + # consider batching this lookup. + try: + storage.delete(pause.state_object_key) + logger.info( + "Deleted state object for pause, pause_id=%s, object_key=%s", + pause.id, + pause.state_object_key, + ) + except Exception: + logger.exception( + "Failed to delete state file for pause, pause_id=%s, object_key=%s", + pause.id, + pause.state_object_key, + ) + continue + session.delete(pause) + pruned_record_ids.append(pause.id) + logger.info( + "workflow pause records deleted, id=%s, resumed_at=%s", + pause.id, + pause.resumed_at, + ) + + return pruned_record_ids + + def get_daily_runs_statistics( + self, + tenant_id: str, + app_id: str, + triggered_from: str, + start_date: datetime | None = None, + end_date: datetime | None = None, + timezone: str = "UTC", + ) -> list[DailyRunsStats]: + """ + Get daily runs statistics using raw SQL for optimal performance. + """ + converted_created_at = convert_datetime_to_date("created_at") + sql_query = f"""SELECT + {converted_created_at} AS date, + COUNT(id) AS runs +FROM + workflow_runs +WHERE + tenant_id = :tenant_id + AND app_id = :app_id + AND triggered_from = :triggered_from""" + + arg_dict: dict[str, Any] = { + "tz": timezone, + "tenant_id": tenant_id, + "app_id": app_id, + "triggered_from": triggered_from, + } + + if start_date: + sql_query += " AND created_at >= :start_date" + arg_dict["start_date"] = start_date + + if end_date: + sql_query += " AND created_at < :end_date" + arg_dict["end_date"] = end_date + + sql_query += " GROUP BY date ORDER BY date" + + response_data = [] + with self._session_maker() as session: + rs = session.execute(sa.text(sql_query), arg_dict) + for row in rs: + response_data.append({"date": str(row.date), "runs": row.runs}) + + return cast(list[DailyRunsStats], response_data) + + def get_daily_terminals_statistics( + self, + tenant_id: str, + app_id: str, + triggered_from: str, + start_date: datetime | None = None, + end_date: datetime | None = None, + timezone: str = "UTC", + ) -> list[DailyTerminalsStats]: + """ + Get daily terminals statistics using raw SQL for optimal performance. + """ + converted_created_at = convert_datetime_to_date("created_at") + sql_query = f"""SELECT + {converted_created_at} AS date, + COUNT(DISTINCT created_by) AS terminal_count +FROM + workflow_runs +WHERE + tenant_id = :tenant_id + AND app_id = :app_id + AND triggered_from = :triggered_from""" + + arg_dict: dict[str, Any] = { + "tz": timezone, + "tenant_id": tenant_id, + "app_id": app_id, + "triggered_from": triggered_from, + } + + if start_date: + sql_query += " AND created_at >= :start_date" + arg_dict["start_date"] = start_date + + if end_date: + sql_query += " AND created_at < :end_date" + arg_dict["end_date"] = end_date + + sql_query += " GROUP BY date ORDER BY date" + + response_data = [] + with self._session_maker() as session: + rs = session.execute(sa.text(sql_query), arg_dict) + for row in rs: + response_data.append({"date": str(row.date), "terminal_count": row.terminal_count}) + + return cast(list[DailyTerminalsStats], response_data) + + def get_daily_token_cost_statistics( + self, + tenant_id: str, + app_id: str, + triggered_from: str, + start_date: datetime | None = None, + end_date: datetime | None = None, + timezone: str = "UTC", + ) -> list[DailyTokenCostStats]: + """ + Get daily token cost statistics using raw SQL for optimal performance. + """ + converted_created_at = convert_datetime_to_date("created_at") + sql_query = f"""SELECT + {converted_created_at} AS date, + SUM(total_tokens) AS token_count +FROM + workflow_runs +WHERE + tenant_id = :tenant_id + AND app_id = :app_id + AND triggered_from = :triggered_from""" + + arg_dict: dict[str, Any] = { + "tz": timezone, + "tenant_id": tenant_id, + "app_id": app_id, + "triggered_from": triggered_from, + } + + if start_date: + sql_query += " AND created_at >= :start_date" + arg_dict["start_date"] = start_date + + if end_date: + sql_query += " AND created_at < :end_date" + arg_dict["end_date"] = end_date + + sql_query += " GROUP BY date ORDER BY date" + + response_data = [] + with self._session_maker() as session: + rs = session.execute(sa.text(sql_query), arg_dict) + for row in rs: + response_data.append( + { + "date": str(row.date), + "token_count": row.token_count, + } + ) + + return cast(list[DailyTokenCostStats], response_data) + + def get_average_app_interaction_statistics( + self, + tenant_id: str, + app_id: str, + triggered_from: str, + start_date: datetime | None = None, + end_date: datetime | None = None, + timezone: str = "UTC", + ) -> list[AverageInteractionStats]: + """ + Get average app interaction statistics using raw SQL for optimal performance. + """ + converted_created_at = convert_datetime_to_date("c.created_at") + sql_query = f"""SELECT + AVG(sub.interactions) AS interactions, + sub.date +FROM + ( + SELECT + {converted_created_at} AS date, + c.created_by, + COUNT(c.id) AS interactions + FROM + workflow_runs c + WHERE + c.tenant_id = :tenant_id + AND c.app_id = :app_id + AND c.triggered_from = :triggered_from + {{{{start}}}} + {{{{end}}}} + GROUP BY + date, c.created_by + ) sub +GROUP BY + sub.date""" + + arg_dict: dict[str, Any] = { + "tz": timezone, + "tenant_id": tenant_id, + "app_id": app_id, + "triggered_from": triggered_from, + } + + if start_date: + sql_query = sql_query.replace("{{start}}", " AND c.created_at >= :start_date") + arg_dict["start_date"] = start_date + else: + sql_query = sql_query.replace("{{start}}", "") + + if end_date: + sql_query = sql_query.replace("{{end}}", " AND c.created_at < :end_date") + arg_dict["end_date"] = end_date + else: + sql_query = sql_query.replace("{{end}}", "") + + response_data = [] + with self._session_maker() as session: + rs = session.execute(sa.text(sql_query), arg_dict) + for row in rs: + response_data.append( + {"date": str(row.date), "interactions": float(row.interactions.quantize(Decimal("0.01")))} + ) + + return cast(list[AverageInteractionStats], response_data) + + +class _PrivateWorkflowPauseEntity(WorkflowPauseEntity): + """ + Private implementation of WorkflowPauseEntity for SQLAlchemy repository. + + This implementation is internal to the repository layer and provides + the concrete implementation of the WorkflowPauseEntity interface. + """ + + def __init__( + self, + *, + pause_model: WorkflowPauseModel, + ) -> None: + self._pause_model = pause_model + self._cached_state: bytes | None = None + + @classmethod + def from_models(cls, workflow_pause_model) -> "_PrivateWorkflowPauseEntity": + """ + Create a _PrivateWorkflowPauseEntity from database models. + + Args: + workflow_pause_model: The WorkflowPause database model + upload_file_model: The UploadFile database model + + Returns: + _PrivateWorkflowPauseEntity: The constructed entity + + Raises: + ValueError: If required model attributes are missing + """ + return cls(pause_model=workflow_pause_model) + + @property + def id(self) -> str: + return self._pause_model.id + + @property + def workflow_execution_id(self) -> str: + return self._pause_model.workflow_run_id + + def get_state(self) -> bytes: + """ + Retrieve the serialized workflow state from storage. + + Returns: + Mapping[str, Any]: The workflow state as a dictionary + + Raises: + FileNotFoundError: If the state file cannot be found + IOError: If there are issues reading the state file + _Workflow: If the state cannot be deserialized properly + """ + if self._cached_state is not None: + return self._cached_state + + # Load the state from storage + state_data = storage.load(self._pause_model.state_object_key) + self._cached_state = state_data + return state_data + + @property + def resumed_at(self) -> datetime | None: + return self._pause_model.resumed_at diff --git a/api/repositories/sqlalchemy_workflow_trigger_log_repository.py b/api/repositories/sqlalchemy_workflow_trigger_log_repository.py index f0ccc0a2ba..0d67e286b0 100644 --- a/api/repositories/sqlalchemy_workflow_trigger_log_repository.py +++ b/api/repositories/sqlalchemy_workflow_trigger_log_repository.py @@ -4,14 +4,13 @@ SQLAlchemy implementation of WorkflowTriggerLogRepository. from collections.abc import Sequence from datetime import UTC, datetime, timedelta -from typing import Any, Optional -from sqlalchemy import and_, delete, func, select, update +from sqlalchemy import and_, select from sqlalchemy.orm import Session from models.enums import WorkflowTriggerStatus from models.trigger import WorkflowTriggerLog -from repositories.workflow_trigger_log_repository import TriggerLogOrderBy, WorkflowTriggerLogRepository +from repositories.workflow_trigger_log_repository import WorkflowTriggerLogRepository class SQLAlchemyWorkflowTriggerLogRepository(WorkflowTriggerLogRepository): @@ -36,7 +35,7 @@ class SQLAlchemyWorkflowTriggerLogRepository(WorkflowTriggerLogRepository): self.session.flush() return trigger_log - def get_by_id(self, trigger_log_id: str, tenant_id: Optional[str] = None) -> Optional[WorkflowTriggerLog]: + def get_by_id(self, trigger_log_id: str, tenant_id: str | None = None) -> WorkflowTriggerLog | None: """Get a trigger log by its ID.""" query = select(WorkflowTriggerLog).where(WorkflowTriggerLog.id == trigger_log_id) @@ -45,37 +44,6 @@ class SQLAlchemyWorkflowTriggerLogRepository(WorkflowTriggerLogRepository): return self.session.scalar(query) - def get_by_status( - self, - tenant_id: str, - app_id: str, - status: WorkflowTriggerStatus, - limit: int = 100, - offset: int = 0, - order_by: TriggerLogOrderBy = TriggerLogOrderBy.CREATED_AT, - order_desc: bool = True, - ) -> Sequence[WorkflowTriggerLog]: - """Get trigger logs by status with pagination.""" - query = select(WorkflowTriggerLog).where( - and_( - WorkflowTriggerLog.tenant_id == tenant_id, - WorkflowTriggerLog.app_id == app_id, - WorkflowTriggerLog.status == status, - ) - ) - - # Apply ordering - order_column = getattr(WorkflowTriggerLog, order_by.value) - if order_desc: - query = query.order_by(order_column.desc()) - else: - query = query.order_by(order_column.asc()) - - # Apply pagination - query = query.limit(limit).offset(offset) - - return list(self.session.scalars(query).all()) - def get_failed_for_retry( self, tenant_id: str, max_retry_count: int = 3, limit: int = 100 ) -> Sequence[WorkflowTriggerLog]: @@ -116,84 +84,3 @@ class SQLAlchemyWorkflowTriggerLogRepository(WorkflowTriggerLogRepository): ) return list(self.session.scalars(query).all()) - - def count_by_status( - self, - tenant_id: str, - app_id: str, - status: Optional[WorkflowTriggerStatus] = None, - since: Optional[datetime] = None, - ) -> int: - """Count trigger logs by status.""" - query = select(func.count(WorkflowTriggerLog.id)).where( - and_(WorkflowTriggerLog.tenant_id == tenant_id, WorkflowTriggerLog.app_id == app_id) - ) - - if status: - query = query.where(WorkflowTriggerLog.status == status) - - if since: - query = query.where(WorkflowTriggerLog.created_at >= since) - - return self.session.scalar(query) or 0 - - def delete_expired_logs(self, tenant_id: str, before_date: datetime, batch_size: int = 1000) -> int: - """Delete expired trigger logs in batches.""" - total_deleted = 0 - - while True: - # Get batch of IDs to delete - subquery = ( - select(WorkflowTriggerLog.id) - .where(and_(WorkflowTriggerLog.tenant_id == tenant_id, WorkflowTriggerLog.created_at < before_date)) - .limit(batch_size) - ) - - # Delete the batch - result = self.session.execute(delete(WorkflowTriggerLog).where(WorkflowTriggerLog.id.in_(subquery))) - - deleted = result.rowcount - total_deleted += deleted - - if deleted < batch_size: - break - - self.session.commit() - - return total_deleted - - def archive_completed_logs( - self, tenant_id: str, before_date: datetime, batch_size: int = 1000 - ) -> Sequence[WorkflowTriggerLog]: - """Get completed logs for archival.""" - query = ( - select(WorkflowTriggerLog) - .where( - and_( - WorkflowTriggerLog.tenant_id == tenant_id, - WorkflowTriggerLog.status == WorkflowTriggerStatus.SUCCEEDED, - WorkflowTriggerLog.finished_at < before_date, - ) - ) - .limit(batch_size) - ) - - return list(self.session.scalars(query).all()) - - def update_status_batch( - self, trigger_log_ids: Sequence[str], new_status: WorkflowTriggerStatus, error_message: Optional[str] = None - ) -> int: - """Update status for multiple trigger logs.""" - update_data: dict[str, Any] = {"status": new_status} - - if error_message is not None: - update_data["error"] = error_message - - if new_status in [WorkflowTriggerStatus.SUCCEEDED, WorkflowTriggerStatus.FAILED]: - update_data["finished_at"] = datetime.now(UTC) - - result = self.session.execute( - update(WorkflowTriggerLog).where(WorkflowTriggerLog.id.in_(trigger_log_ids)).values(**update_data) - ) - - return result.rowcount diff --git a/api/repositories/types.py b/api/repositories/types.py new file mode 100644 index 0000000000..3b3ef7f635 --- /dev/null +++ b/api/repositories/types.py @@ -0,0 +1,21 @@ +from typing import TypedDict + + +class DailyRunsStats(TypedDict): + date: str + runs: int + + +class DailyTerminalsStats(TypedDict): + date: str + terminal_count: int + + +class DailyTokenCostStats(TypedDict): + date: str + token_count: int + + +class AverageInteractionStats(TypedDict): + date: str + interactions: float diff --git a/api/repositories/workflow_trigger_log_repository.py b/api/repositories/workflow_trigger_log_repository.py index 1c026c2f5d..138b8779ac 100644 --- a/api/repositories/workflow_trigger_log_repository.py +++ b/api/repositories/workflow_trigger_log_repository.py @@ -7,11 +7,9 @@ proper indexing and batch operations. """ from collections.abc import Sequence -from datetime import datetime from enum import StrEnum -from typing import Optional, Protocol +from typing import Protocol -from models.enums import WorkflowTriggerStatus from models.trigger import WorkflowTriggerLog @@ -65,7 +63,7 @@ class WorkflowTriggerLogRepository(Protocol): """ ... - def get_by_id(self, trigger_log_id: str, tenant_id: Optional[str] = None) -> Optional[WorkflowTriggerLog]: + def get_by_id(self, trigger_log_id: str, tenant_id: str | None = None) -> WorkflowTriggerLog | None: """ Get a trigger log by its ID. @@ -78,33 +76,6 @@ class WorkflowTriggerLogRepository(Protocol): """ ... - def get_by_status( - self, - tenant_id: str, - app_id: str, - status: WorkflowTriggerStatus, - limit: int = 100, - offset: int = 0, - order_by: TriggerLogOrderBy = TriggerLogOrderBy.CREATED_AT, - order_desc: bool = True, - ) -> Sequence[WorkflowTriggerLog]: - """ - Get trigger logs by status with pagination. - - Args: - tenant_id: The tenant identifier - app_id: The application identifier - status: The workflow trigger status to filter by - limit: Maximum number of results - offset: Number of results to skip - order_by: Field to order results by - order_desc: Whether to order descending (True) or ascending (False) - - Returns: - A sequence of WorkflowTriggerLog instances - """ - ... - def get_failed_for_retry( self, tenant_id: str, max_retry_count: int = 3, limit: int = 100 ) -> Sequence[WorkflowTriggerLog]: @@ -138,70 +109,3 @@ class WorkflowTriggerLogRepository(Protocol): A sequence of recent WorkflowTriggerLog instances """ ... - - def count_by_status( - self, - tenant_id: str, - app_id: str, - status: Optional[WorkflowTriggerStatus] = None, - since: Optional[datetime] = None, - ) -> int: - """ - Count trigger logs by status. - - Args: - tenant_id: The tenant identifier - app_id: The application identifier - status: Optional status filter - since: Optional datetime to count from - - Returns: - Count of matching trigger logs - """ - ... - - def delete_expired_logs(self, tenant_id: str, before_date: datetime, batch_size: int = 1000) -> int: - """ - Delete expired trigger logs in batches. - - Args: - tenant_id: The tenant identifier - before_date: Delete logs created before this date - batch_size: Number of logs to delete per batch - - Returns: - Total number of logs deleted - """ - ... - - def archive_completed_logs( - self, tenant_id: str, before_date: datetime, batch_size: int = 1000 - ) -> Sequence[WorkflowTriggerLog]: - """ - Get completed logs for archival before deletion. - - Args: - tenant_id: The tenant identifier - before_date: Get logs completed before this date - batch_size: Number of logs to retrieve - - Returns: - A sequence of WorkflowTriggerLog instances for archival - """ - ... - - def update_status_batch( - self, trigger_log_ids: Sequence[str], new_status: WorkflowTriggerStatus, error_message: Optional[str] = None - ) -> int: - """ - Update status for multiple trigger logs at once. - - Args: - trigger_log_ids: List of trigger log IDs to update - new_status: The new status to set - error_message: Optional error message to set - - Returns: - Number of logs updated - """ - ... diff --git a/api/schedule/clean_messages.py b/api/schedule/clean_messages.py index 65038dce4d..352a84b592 100644 --- a/api/schedule/clean_messages.py +++ b/api/schedule/clean_messages.py @@ -7,6 +7,7 @@ from sqlalchemy.exc import SQLAlchemyError import app from configs import dify_config +from enums.cloud_plan import CloudPlan from extensions.ext_database import db from extensions.ext_redis import redis_client from models.model import ( @@ -63,7 +64,7 @@ def clean_messages(): plan = features.billing.subscription.plan else: plan = plan_cache.decode() - if plan == "sandbox": + if plan == CloudPlan.SANDBOX: # clean related message db.session.query(MessageFeedback).where(MessageFeedback.message_id == message.id).delete( synchronize_session=False diff --git a/api/schedule/clean_unused_datasets_task.py b/api/schedule/clean_unused_datasets_task.py index 9efd46ba5d..d9fb6a24f1 100644 --- a/api/schedule/clean_unused_datasets_task.py +++ b/api/schedule/clean_unused_datasets_task.py @@ -9,6 +9,7 @@ from sqlalchemy.exc import SQLAlchemyError import app from configs import dify_config from core.rag.index_processor.index_processor_factory import IndexProcessorFactory +from enums.cloud_plan import CloudPlan from extensions.ext_database import db from extensions.ext_redis import redis_client from models.dataset import Dataset, DatasetAutoDisableLog, DatasetQuery, Document @@ -35,7 +36,7 @@ def clean_unused_datasets_task(): }, { "clean_day": datetime.datetime.now() - datetime.timedelta(days=dify_config.PLAN_PRO_CLEAN_DAY_SETTING), - "plan_filter": "sandbox", + "plan_filter": CloudPlan.SANDBOX, "add_logs": False, }, ] diff --git a/api/schedule/mail_clean_document_notify_task.py b/api/schedule/mail_clean_document_notify_task.py index b70707b17e..d738bf46fa 100644 --- a/api/schedule/mail_clean_document_notify_task.py +++ b/api/schedule/mail_clean_document_notify_task.py @@ -7,6 +7,7 @@ from sqlalchemy import select import app from configs import dify_config +from enums.cloud_plan import CloudPlan from extensions.ext_database import db from extensions.ext_mail import mail from libs.email_i18n import EmailType, get_email_i18n_service @@ -45,7 +46,7 @@ def mail_clean_document_notify_task(): for tenant_id, tenant_dataset_auto_disable_logs in dataset_auto_disable_logs_map.items(): features = FeatureService.get_features(tenant_id) plan = features.billing.subscription.plan - if plan != "sandbox": + if plan != CloudPlan.SANDBOX: knowledge_details = [] # check tenant tenant = db.session.query(Tenant).where(Tenant.id == tenant_id).first() diff --git a/api/schedule/workflow_schedule_task.py b/api/schedule/workflow_schedule_task.py index 41e2232353..d68b9565ec 100644 --- a/api/schedule/workflow_schedule_task.py +++ b/api/schedule/workflow_schedule_task.py @@ -9,7 +9,6 @@ from extensions.ext_database import db from libs.datetime_utils import naive_utc_now from libs.schedule_utils import calculate_next_run_at from models.trigger import AppTrigger, AppTriggerStatus, AppTriggerType, WorkflowSchedulePlan -from services.workflow.queue_dispatcher import QueueDispatcherManager from tasks.workflow_schedule_tasks import run_schedule_trigger logger = logging.getLogger(__name__) @@ -29,7 +28,6 @@ def poll_workflow_schedules() -> None: with session_factory() as session: total_dispatched = 0 - total_rate_limited = 0 # Process in batches until we've handled all due schedules or hit the limit while True: @@ -38,11 +36,10 @@ def poll_workflow_schedules() -> None: if not due_schedules: break - dispatched_count, rate_limited_count = _process_schedules(session, due_schedules) + dispatched_count = _process_schedules(session, due_schedules) total_dispatched += dispatched_count - total_rate_limited += rate_limited_count - logger.debug("Batch processed: %d dispatched, %d rate limited", dispatched_count, rate_limited_count) + logger.debug("Batch processed: %d dispatched", dispatched_count) # Circuit breaker: check if we've hit the per-tick limit (if enabled) if ( @@ -55,8 +52,8 @@ def poll_workflow_schedules() -> None: ) break - if total_dispatched > 0 or total_rate_limited > 0: - logger.info("Total processed: %d dispatched, %d rate limited", total_dispatched, total_rate_limited) + if total_dispatched > 0: + logger.info("Total processed: %d dispatched", total_dispatched) def _fetch_due_schedules(session: Session) -> list[WorkflowSchedulePlan]: @@ -93,15 +90,12 @@ def _fetch_due_schedules(session: Session) -> list[WorkflowSchedulePlan]: return list(due_schedules) -def _process_schedules(session: Session, schedules: list[WorkflowSchedulePlan]) -> tuple[int, int]: +def _process_schedules(session: Session, schedules: list[WorkflowSchedulePlan]) -> int: """Process schedules: check quota, update next run time and dispatch to Celery in parallel.""" if not schedules: - return 0, 0 + return 0 - dispatcher_manager = QueueDispatcherManager() tasks_to_dispatch: list[str] = [] - rate_limited_count = 0 - for schedule in schedules: next_run_at = calculate_next_run_at( schedule.cron_expression, @@ -109,12 +103,7 @@ def _process_schedules(session: Session, schedules: list[WorkflowSchedulePlan]) ) schedule.next_run_at = next_run_at - dispatcher = dispatcher_manager.get_dispatcher(schedule.tenant_id) - if not dispatcher.check_daily_quota(schedule.tenant_id): - logger.info("Tenant %s rate limited, skipping schedule_plan %s", schedule.tenant_id, schedule.id) - rate_limited_count += 1 - else: - tasks_to_dispatch.append(schedule.id) + tasks_to_dispatch.append(schedule.id) if tasks_to_dispatch: job = group(run_schedule_trigger.s(schedule_id) for schedule_id in tasks_to_dispatch) @@ -124,4 +113,4 @@ def _process_schedules(session: Session, schedules: list[WorkflowSchedulePlan]) session.commit() - return len(tasks_to_dispatch), rate_limited_count + return len(tasks_to_dispatch) diff --git a/api/services/annotation_service.py b/api/services/annotation_service.py index c0d26cdd27..9258def907 100644 --- a/api/services/annotation_service.py +++ b/api/services/annotation_service.py @@ -32,41 +32,48 @@ class AppAnnotationService: if not app: raise NotFound("App not found") + + answer = args.get("answer") or args.get("content") + if answer is None: + raise ValueError("Either 'answer' or 'content' must be provided") + if args.get("message_id"): message_id = str(args["message_id"]) - # get message info message = db.session.query(Message).where(Message.id == message_id, Message.app_id == app.id).first() if not message: raise NotFound("Message Not Exists.") + question = args.get("question") or message.query or "" + annotation: MessageAnnotation | None = message.annotation - # save the message annotation if annotation: - annotation.content = args["answer"] - annotation.question = args["question"] + annotation.content = answer + annotation.question = question else: annotation = MessageAnnotation( app_id=app.id, conversation_id=message.conversation_id, message_id=message.id, - content=args["answer"], - question=args["question"], + content=answer, + question=question, account_id=current_user.id, ) else: - annotation = MessageAnnotation( - app_id=app.id, content=args["answer"], question=args["question"], account_id=current_user.id - ) + question = args.get("question") + if not question: + raise ValueError("'question' is required when 'message_id' is not provided") + + annotation = MessageAnnotation(app_id=app.id, content=answer, question=question, account_id=current_user.id) db.session.add(annotation) db.session.commit() - # if annotation reply is enabled , add annotation to index + annotation_setting = db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app_id).first() assert current_tenant_id is not None if annotation_setting: add_annotation_to_index_task.delay( annotation.id, - args["question"], + annotation.question, current_tenant_id, app_id, annotation_setting.collection_binding_id, diff --git a/api/services/app_generate_service.py b/api/services/app_generate_service.py index 137bb01f27..bb1ea742d0 100644 --- a/api/services/app_generate_service.py +++ b/api/services/app_generate_service.py @@ -1,6 +1,6 @@ import uuid from collections.abc import Generator, Mapping -from typing import Any, Optional, Union +from typing import Any, Union from configs import dify_config from core.app.apps.advanced_chat.app_generator import AdvancedChatAppGenerator @@ -10,18 +10,14 @@ from core.app.apps.completion.app_generator import CompletionAppGenerator from core.app.apps.workflow.app_generator import WorkflowAppGenerator from core.app.entities.app_invoke_entities import InvokeFrom from core.app.features.rate_limiting import RateLimit -from libs.helper import RateLimiter +from enums.quota_type import QuotaType, unlimited from models.model import Account, App, AppMode, EndUser from models.workflow import Workflow -from services.billing_service import BillingService -from services.errors.app import WorkflowIdFormatError, WorkflowNotFoundError -from services.errors.llm import InvokeRateLimitError +from services.errors.app import InvokeRateLimitError, QuotaExceededError, WorkflowIdFormatError, WorkflowNotFoundError from services.workflow_service import WorkflowService class AppGenerateService: - system_rate_limiter = RateLimiter("app_daily_rate_limiter", dify_config.APP_DAILY_RATE_LIMIT, 86400) - @classmethod def generate( cls, @@ -30,7 +26,7 @@ class AppGenerateService: args: Mapping[str, Any], invoke_from: InvokeFrom, streaming: bool = True, - root_node_id: Optional[str] = None, + root_node_id: str | None = None, ): """ App Content Generate @@ -41,17 +37,12 @@ class AppGenerateService: :param streaming: streaming :return: """ - # system level rate limiter + quota_charge = unlimited() if dify_config.BILLING_ENABLED: - # check if it's free plan - limit_info = BillingService.get_info(app_model.tenant_id) - if limit_info["subscription"]["plan"] == "sandbox": - if cls.system_rate_limiter.is_rate_limited(app_model.tenant_id): - raise InvokeRateLimitError( - "Rate limit exceeded, please upgrade your plan " - f"or your RPD was {dify_config.APP_DAILY_RATE_LIMIT} requests/day" - ) - cls.system_rate_limiter.increment_rate_limit(app_model.tenant_id) + try: + quota_charge = QuotaType.WORKFLOW.consume(app_model.tenant_id) + except QuotaExceededError: + raise InvokeRateLimitError(f"Workflow execution quota limit reached for tenant {app_model.tenant_id}") # app level rate limiter max_active_request = cls._get_max_active_requests(app_model) @@ -123,6 +114,7 @@ class AppGenerateService: else: raise ValueError(f"Invalid app mode {app_model.mode}") except Exception: + quota_charge.refund() rate_limit.exit(request_id) raise finally: diff --git a/api/services/async_workflow_service.py b/api/services/async_workflow_service.py index ab5ef7b6d4..8d62f121e2 100644 --- a/api/services/async_workflow_service.py +++ b/api/services/async_workflow_service.py @@ -7,24 +7,23 @@ with support for different subscription tiers, rate limiting, and execution trac import json from datetime import UTC, datetime -from typing import Any, Optional, Union +from typing import Any, Union from celery.result import AsyncResult from sqlalchemy import select from sqlalchemy.orm import Session +from enums.quota_type import QuotaType from extensions.ext_database import db -from extensions.ext_redis import redis_client from models.account import Account from models.enums import CreatorUserRole, WorkflowTriggerStatus from models.model import App, EndUser from models.trigger import WorkflowTriggerLog from models.workflow import Workflow from repositories.sqlalchemy_workflow_trigger_log_repository import SQLAlchemyWorkflowTriggerLogRepository -from services.errors.app import InvokeDailyRateLimitError, WorkflowNotFoundError +from services.errors.app import InvokeRateLimitError, QuotaExceededError, WorkflowNotFoundError from services.workflow.entities import AsyncTriggerResponse, TriggerData, WorkflowTaskData from services.workflow.queue_dispatcher import QueueDispatcherManager, QueuePriority -from services.workflow.rate_limiter import TenantDailyRateLimiter from services.workflow_service import WorkflowService from tasks.async_workflow_tasks import ( execute_workflow_professional, @@ -82,7 +81,6 @@ class AsyncWorkflowService: trigger_log_repo = SQLAlchemyWorkflowTriggerLogRepository(session) dispatcher_manager = QueueDispatcherManager() workflow_service = WorkflowService() - rate_limiter = TenantDailyRateLimiter(redis_client) # 1. Validate app exists app_model = session.scalar(select(App).where(App.id == trigger_data.app_id)) @@ -111,6 +109,9 @@ class AsyncWorkflowService: app_id=trigger_data.app_id, workflow_id=workflow.id, root_node_id=trigger_data.root_node_id, + trigger_metadata=( + trigger_data.trigger_metadata.model_dump_json() if trigger_data.trigger_metadata else "{}" + ), trigger_type=trigger_data.trigger_type, trigger_data=trigger_data.model_dump_json(), inputs=json.dumps(dict(trigger_data.inputs)), @@ -124,25 +125,19 @@ class AsyncWorkflowService: trigger_log = trigger_log_repo.create(trigger_log) session.commit() - # 7. Check and consume daily quota - if not dispatcher.consume_quota(trigger_data.tenant_id): + # 7. Check and consume quota + try: + QuotaType.WORKFLOW.consume(trigger_data.tenant_id) + except QuotaExceededError as e: # Update trigger log status trigger_log.status = WorkflowTriggerStatus.RATE_LIMITED - trigger_log.error = f"Daily limit reached for {dispatcher.get_queue_name()}" + trigger_log.error = f"Quota limit reached: {e}" trigger_log_repo.update(trigger_log) session.commit() - tenant_owner_tz = rate_limiter.get_tenant_owner_timezone(trigger_data.tenant_id) - - remaining = rate_limiter.get_remaining_quota(trigger_data.tenant_id, dispatcher.get_daily_limit()) - - reset_time = rate_limiter.get_quota_reset_time(trigger_data.tenant_id, tenant_owner_tz) - - raise InvokeDailyRateLimitError( - f"Daily workflow execution limit reached. " - f"Limit resets at {reset_time.strftime('%Y-%m-%d %H:%M:%S %Z')}. " - f"Remaining quota: {remaining}" - ) + raise InvokeRateLimitError( + f"Workflow execution quota limit reached for tenant {trigger_data.tenant_id}" + ) from e # 8. Create task data queue_name = dispatcher.get_queue_name() @@ -223,7 +218,7 @@ class AsyncWorkflowService: return cls.trigger_workflow_async(session, user, trigger_data) @classmethod - def get_trigger_log(cls, workflow_trigger_log_id: str, tenant_id: Optional[str] = None) -> Optional[dict[str, Any]]: + def get_trigger_log(cls, workflow_trigger_log_id: str, tenant_id: str | None = None) -> dict[str, Any] | None: """ Get trigger log by ID @@ -292,7 +287,7 @@ class AsyncWorkflowService: return [log.to_dict() for log in logs] @staticmethod - def _get_workflow(workflow_service: WorkflowService, app_model: App, workflow_id: Optional[str] = None) -> Workflow: + def _get_workflow(workflow_service: WorkflowService, app_model: App, workflow_id: str | None = None) -> Workflow: """ Get workflow for the app diff --git a/api/services/billing_service.py b/api/services/billing_service.py index a6851d2638..54e1c9d285 100644 --- a/api/services/billing_service.py +++ b/api/services/billing_service.py @@ -3,7 +3,9 @@ from typing import Literal import httpx from tenacity import retry, retry_if_exception_type, stop_before_delay, wait_fixed +from werkzeug.exceptions import InternalServerError +from enums.cloud_plan import CloudPlan from extensions.ext_database import db from extensions.ext_redis import redis_client from libs.helper import RateLimiter @@ -23,6 +25,13 @@ class BillingService: billing_info = cls._send_request("GET", "/subscription/info", params=params) return billing_info + @classmethod + def get_tenant_feature_plan_usage_info(cls, tenant_id: str): + params = {"tenant_id": tenant_id} + + usage_info = cls._send_request("GET", "/tenant-feature-usage/info", params=params) + return usage_info + @classmethod def get_knowledge_rate_limit(cls, tenant_id: str): params = {"tenant_id": tenant_id} @@ -31,7 +40,7 @@ class BillingService: return { "limit": knowledge_rate_limit.get("limit", 10), - "subscription_plan": knowledge_rate_limit.get("subscription_plan", "sandbox"), + "subscription_plan": knowledge_rate_limit.get("subscription_plan", CloudPlan.SANDBOX), } @classmethod @@ -54,6 +63,44 @@ class BillingService: params = {"prefilled_email": prefilled_email, "tenant_id": tenant_id} return cls._send_request("GET", "/invoices", params=params) + @classmethod + def update_tenant_feature_plan_usage(cls, tenant_id: str, feature_key: str, delta: int) -> dict: + """ + Update tenant feature plan usage. + + Args: + tenant_id: Tenant identifier + feature_key: Feature key (e.g., 'trigger', 'workflow') + delta: Usage delta (positive to add, negative to consume) + + Returns: + Response dict with 'result' and 'history_id' + Example: {"result": "success", "history_id": "uuid"} + """ + return cls._send_request( + "POST", + "/tenant-feature-usage/usage", + params={"tenant_id": tenant_id, "feature_key": feature_key, "delta": delta}, + ) + + @classmethod + def refund_tenant_feature_plan_usage(cls, history_id: str) -> dict: + """ + Refund a previous usage charge. + + Args: + history_id: The history_id returned from update_tenant_feature_plan_usage + + Returns: + Response dict with 'result' and 'history_id' + """ + return cls._send_request("POST", "/tenant-feature-usage/refund", params={"quota_usage_history_id": history_id}) + + @classmethod + def get_tenant_feature_plan_usage(cls, tenant_id: str, feature_key: str): + params = {"tenant_id": tenant_id, "feature_key": feature_key} + return cls._send_request("GET", "/billing/tenant_feature_plan/usage", params=params) + @classmethod @retry( wait=wait_fixed(2), @@ -61,13 +108,22 @@ class BillingService: retry=retry_if_exception_type(httpx.RequestError), reraise=True, ) - def _send_request(cls, method: Literal["GET", "POST", "DELETE"], endpoint: str, json=None, params=None): + def _send_request(cls, method: Literal["GET", "POST", "DELETE", "PUT"], endpoint: str, json=None, params=None): headers = {"Content-Type": "application/json", "Billing-Api-Secret-Key": cls.secret_key} url = f"{cls.base_url}{endpoint}" response = httpx.request(method, url, json=json, params=params, headers=headers) if method == "GET" and response.status_code != httpx.codes.OK: raise ValueError("Unable to retrieve billing information. Please try again later or contact support.") + if method == "PUT": + if response.status_code == httpx.codes.INTERNAL_SERVER_ERROR: + raise InternalServerError( + "Unable to process billing request. Please try again later or contact support." + ) + if response.status_code != httpx.codes.OK: + raise ValueError("Invalid arguments.") + if method == "POST" and response.status_code != httpx.codes.OK: + raise ValueError(f"Unable to send request to {url}. Please try again later or contact support.") return response.json() @staticmethod @@ -178,3 +234,8 @@ class BillingService: @classmethod def clean_billing_info_cache(cls, tenant_id: str): redis_client.delete(f"tenant:{tenant_id}:billing_info") + + @classmethod + def sync_partner_tenants_bindings(cls, account_id: str, partner_key: str, click_id: str): + payload = {"account_id": account_id, "click_id": click_id} + return cls._send_request("PUT", f"/partners/{partner_key}/tenants", json=payload) diff --git a/api/services/clear_free_plan_tenant_expired_logs.py b/api/services/clear_free_plan_tenant_expired_logs.py index f8f89d7428..aefc34fcae 100644 --- a/api/services/clear_free_plan_tenant_expired_logs.py +++ b/api/services/clear_free_plan_tenant_expired_logs.py @@ -11,6 +11,7 @@ from sqlalchemy.orm import Session, sessionmaker from configs import dify_config from core.model_runtime.utils.encoders import jsonable_encoder +from enums.cloud_plan import CloudPlan from extensions.ext_database import db from extensions.ext_storage import storage from models.account import Tenant @@ -358,7 +359,7 @@ class ClearFreePlanTenantExpiredLogs: try: if ( not dify_config.BILLING_ENABLED - or BillingService.get_info(tenant_id)["subscription"]["plan"] == "sandbox" + or BillingService.get_info(tenant_id)["subscription"]["plan"] == CloudPlan.SANDBOX ): # only process sandbox tenant cls.process_tenant(flask_app, tenant_id, days, batch) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index c97d419545..abfb4baeec 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -22,6 +22,7 @@ from core.model_runtime.entities.model_entities import ModelType from core.rag.index_processor.constant.built_in_field import BuiltInField from core.rag.index_processor.constant.index_type import IndexType from core.rag.retrieval.retrieval_methods import RetrievalMethod +from enums.cloud_plan import CloudPlan from events.dataset_event import dataset_was_deleted from events.document_event import document_was_deleted from extensions.ext_database import db @@ -49,6 +50,7 @@ from models.model import UploadFile from models.provider_ids import ModelProviderID from models.source import DataSourceOauthBinding from models.workflow import Workflow +from services.document_indexing_task_proxy import DocumentIndexingTaskProxy from services.entities.knowledge_entities.knowledge_entities import ( ChildChunkUpdateArgs, KnowledgeConfig, @@ -78,7 +80,6 @@ from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task from tasks.delete_segment_from_index_task import delete_segment_from_index_task from tasks.disable_segment_from_index_task import disable_segment_from_index_task from tasks.disable_segments_from_index_task import disable_segments_from_index_task -from tasks.document_indexing_task import document_indexing_task from tasks.document_indexing_update_task import document_indexing_update_task from tasks.duplicate_document_indexing_task import duplicate_document_indexing_task from tasks.enable_segments_to_index_task import enable_segments_to_index_task @@ -253,6 +254,8 @@ class DatasetService: external_knowledge_api = ExternalDatasetService.get_external_knowledge_api(external_knowledge_api_id) if not external_knowledge_api: raise ValueError("External API template not found.") + if external_knowledge_id is None: + raise ValueError("external_knowledge_id is required") external_knowledge_binding = ExternalKnowledgeBindings( tenant_id=tenant_id, dataset_id=dataset.id, @@ -1042,7 +1045,7 @@ class DatasetService: assert isinstance(current_user, Account) assert current_user.current_tenant_id is not None features = FeatureService.get_features(current_user.current_tenant_id) - if not features.billing.enabled or features.billing.subscription.plan == "sandbox": + if not features.billing.enabled or features.billing.subscription.plan == CloudPlan.SANDBOX: return { "document_ids": [], "count": 0, @@ -1081,6 +1084,62 @@ class DocumentService: }, } + DISPLAY_STATUS_ALIASES: dict[str, str] = { + "active": "available", + "enabled": "available", + } + + _INDEXING_STATUSES: tuple[str, ...] = ("parsing", "cleaning", "splitting", "indexing") + + DISPLAY_STATUS_FILTERS: dict[str, tuple[Any, ...]] = { + "queuing": (Document.indexing_status == "waiting",), + "indexing": ( + Document.indexing_status.in_(_INDEXING_STATUSES), + Document.is_paused.is_not(True), + ), + "paused": ( + Document.indexing_status.in_(_INDEXING_STATUSES), + Document.is_paused.is_(True), + ), + "error": (Document.indexing_status == "error",), + "available": ( + Document.indexing_status == "completed", + Document.archived.is_(False), + Document.enabled.is_(True), + ), + "disabled": ( + Document.indexing_status == "completed", + Document.archived.is_(False), + Document.enabled.is_(False), + ), + "archived": ( + Document.indexing_status == "completed", + Document.archived.is_(True), + ), + } + + @classmethod + def normalize_display_status(cls, status: str | None) -> str | None: + if not status: + return None + normalized = status.lower() + normalized = cls.DISPLAY_STATUS_ALIASES.get(normalized, normalized) + return normalized if normalized in cls.DISPLAY_STATUS_FILTERS else None + + @classmethod + def build_display_status_filters(cls, status: str | None) -> tuple[Any, ...]: + normalized = cls.normalize_display_status(status) + if not normalized: + return () + return cls.DISPLAY_STATUS_FILTERS[normalized] + + @classmethod + def apply_display_status_filter(cls, query, status: str | None): + filters = cls.build_display_status_filters(status) + if not filters: + return query + return query.where(*filters) + DOCUMENT_METADATA_SCHEMA: dict[str, Any] = { "book": { "title": str, @@ -1416,8 +1475,6 @@ class DocumentService: # check document limit assert isinstance(current_user, Account) assert current_user.current_tenant_id is not None - assert knowledge_config.data_source - assert knowledge_config.data_source.info_list.file_info_list features = FeatureService.get_features(current_user.current_tenant_id) @@ -1426,6 +1483,8 @@ class DocumentService: count = 0 if knowledge_config.data_source: if knowledge_config.data_source.info_list.data_source_type == "upload_file": + if not knowledge_config.data_source.info_list.file_info_list: + raise ValueError("File source info is required") upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids count = len(upload_file_list) elif knowledge_config.data_source.info_list.data_source_type == "notion_import": @@ -1438,7 +1497,7 @@ class DocumentService: count = len(website_info.urls) batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT) - if features.billing.subscription.plan == "sandbox" and count > 1: + if features.billing.subscription.plan == CloudPlan.SANDBOX and count > 1: raise ValueError("Your current plan does not support batch upload, please upgrade your plan.") if count > batch_upload_limit: raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") @@ -1446,7 +1505,7 @@ class DocumentService: DocumentService.check_documents_upload_quota(count, features) # if dataset is empty, update dataset data_source_type - if not dataset.data_source_type: + if not dataset.data_source_type and knowledge_config.data_source: dataset.data_source_type = knowledge_config.data_source.info_list.data_source_type if not dataset.indexing_technique: @@ -1492,6 +1551,10 @@ class DocumentService: documents.append(document) batch = document.batch else: + # When creating new documents, data_source must be provided + if not knowledge_config.data_source: + raise ValueError("Data source is required when creating new documents") + batch = time.strftime("%Y%m%d%H%M%S") + str(100000 + secrets.randbelow(exclusive_upper_bound=900000)) # save process rule if not dataset_process_rule: @@ -1531,6 +1594,8 @@ class DocumentService: document_ids = [] duplicate_document_ids = [] if knowledge_config.data_source.info_list.data_source_type == "upload_file": + if not knowledge_config.data_source.info_list.file_info_list: + raise ValueError("File source info is required") upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids for file_id in upload_file_list: file = ( @@ -1687,7 +1752,7 @@ class DocumentService: # trigger async task if document_ids: - document_indexing_task.delay(dataset.id, document_ids) + DocumentIndexingTaskProxy(dataset.tenant_id, dataset.id, document_ids).delay() if duplicate_document_ids: duplicate_document_indexing_task.delay(dataset.id, duplicate_document_ids) @@ -1721,7 +1786,7 @@ class DocumentService: # count = len(website_info.urls) # type: ignore # batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT) - # if features.billing.subscription.plan == "sandbox" and count > 1: + # if features.billing.subscription.plan == CloudPlan.SANDBOX and count > 1: # raise ValueError("Your current plan does not support batch upload, please upgrade your plan.") # if count > batch_upload_limit: # raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") @@ -2190,7 +2255,7 @@ class DocumentService: website_info = knowledge_config.data_source.info_list.website_info_list if website_info: count = len(website_info.urls) - if features.billing.subscription.plan == "sandbox" and count > 1: + if features.billing.subscription.plan == CloudPlan.SANDBOX and count > 1: raise ValueError("Your current plan does not support batch upload, please upgrade your plan.") batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT) if count > batch_upload_limit: diff --git a/api/services/datasource_provider_service.py b/api/services/datasource_provider_service.py index 1e018af19f..81e0c0ecd4 100644 --- a/api/services/datasource_provider_service.py +++ b/api/services/datasource_provider_service.py @@ -338,7 +338,7 @@ class DatasourceProviderService: key: value if value != HIDDEN_VALUE else original_params.get(key, UNKNOWN_VALUE) for key, value in client_params.items() } - tenant_oauth_client_params.client_params = encrypter.encrypt(new_params) + tenant_oauth_client_params.client_params = dict(encrypter.encrypt(new_params)) if enabled is not None: tenant_oauth_client_params.enabled = enabled @@ -374,7 +374,7 @@ class DatasourceProviderService: def get_tenant_oauth_client( self, tenant_id: str, datasource_provider_id: DatasourceProviderID, mask: bool = False - ) -> dict[str, Any] | None: + ) -> Mapping[str, Any] | None: """ get tenant oauth client """ @@ -434,7 +434,7 @@ class DatasourceProviderService: ) if tenant_oauth_client_params: encrypter, _ = self.get_oauth_encrypter(tenant_id, datasource_provider_id) - return encrypter.decrypt(tenant_oauth_client_params.client_params) + return dict(encrypter.decrypt(tenant_oauth_client_params.client_params)) provider_controller = self.provider_manager.fetch_datasource_provider( tenant_id=tenant_id, provider_id=str(datasource_provider_id) diff --git a/api/services/document_indexing_task_proxy.py b/api/services/document_indexing_task_proxy.py new file mode 100644 index 0000000000..861c84b586 --- /dev/null +++ b/api/services/document_indexing_task_proxy.py @@ -0,0 +1,83 @@ +import logging +from collections.abc import Callable, Sequence +from dataclasses import asdict +from functools import cached_property + +from core.entities.document_task import DocumentTask +from core.rag.pipeline.queue import TenantIsolatedTaskQueue +from enums.cloud_plan import CloudPlan +from services.feature_service import FeatureService +from tasks.document_indexing_task import normal_document_indexing_task, priority_document_indexing_task + +logger = logging.getLogger(__name__) + + +class DocumentIndexingTaskProxy: + def __init__(self, tenant_id: str, dataset_id: str, document_ids: Sequence[str]): + self._tenant_id = tenant_id + self._dataset_id = dataset_id + self._document_ids = document_ids + self._tenant_isolated_task_queue = TenantIsolatedTaskQueue(tenant_id, "document_indexing") + + @cached_property + def features(self): + return FeatureService.get_features(self._tenant_id) + + def _send_to_direct_queue(self, task_func: Callable[[str, str, Sequence[str]], None]): + logger.info("send dataset %s to direct queue", self._dataset_id) + task_func.delay( # type: ignore + tenant_id=self._tenant_id, dataset_id=self._dataset_id, document_ids=self._document_ids + ) + + def _send_to_tenant_queue(self, task_func: Callable[[str, str, Sequence[str]], None]): + logger.info("send dataset %s to tenant queue", self._dataset_id) + if self._tenant_isolated_task_queue.get_task_key(): + # Add to waiting queue using List operations (lpush) + self._tenant_isolated_task_queue.push_tasks( + [ + asdict( + DocumentTask( + tenant_id=self._tenant_id, dataset_id=self._dataset_id, document_ids=self._document_ids + ) + ) + ] + ) + logger.info("push tasks: %s - %s", self._dataset_id, self._document_ids) + else: + # Set flag and execute task + self._tenant_isolated_task_queue.set_task_waiting_time() + task_func.delay( # type: ignore + tenant_id=self._tenant_id, dataset_id=self._dataset_id, document_ids=self._document_ids + ) + logger.info("init tasks: %s - %s", self._dataset_id, self._document_ids) + + def _send_to_default_tenant_queue(self): + self._send_to_tenant_queue(normal_document_indexing_task) + + def _send_to_priority_tenant_queue(self): + self._send_to_tenant_queue(priority_document_indexing_task) + + def _send_to_priority_direct_queue(self): + self._send_to_direct_queue(priority_document_indexing_task) + + def _dispatch(self): + logger.info( + "dispatch args: %s - %s - %s", + self._tenant_id, + self.features.billing.enabled, + self.features.billing.subscription.plan, + ) + # dispatch to different indexing queue with tenant isolation when billing enabled + if self.features.billing.enabled: + if self.features.billing.subscription.plan == CloudPlan.SANDBOX: + # dispatch to normal pipeline queue with tenant self sub queue for sandbox plan + self._send_to_default_tenant_queue() + else: + # dispatch to priority pipeline queue with tenant self sub queue for other plans + self._send_to_priority_tenant_queue() + else: + # dispatch to priority queue without tenant isolation for others, e.g.: self-hosted or enterprise + self._send_to_priority_direct_queue() + + def delay(self): + self._dispatch() diff --git a/api/services/end_user_service.py b/api/services/end_user_service.py index aa4a2e46ec..81098e95bb 100644 --- a/api/services/end_user_service.py +++ b/api/services/end_user_service.py @@ -1,11 +1,15 @@ +import logging from collections.abc import Mapping +from sqlalchemy import case from sqlalchemy.orm import Session from core.app.entities.app_invoke_entities import InvokeFrom from extensions.ext_database import db from models.model import App, DefaultEndUserSessionID, EndUser +logger = logging.getLogger(__name__) + class EndUserService: """ @@ -32,18 +36,36 @@ class EndUserService: user_id = DefaultEndUserSessionID.DEFAULT_SESSION_ID with Session(db.engine, expire_on_commit=False) as session: + # Query with ORDER BY to prioritize exact type matches while maintaining backward compatibility + # This single query approach is more efficient than separate queries end_user = ( session.query(EndUser) .where( EndUser.tenant_id == tenant_id, EndUser.app_id == app_id, EndUser.session_id == user_id, - EndUser.type == type, + ) + .order_by( + # Prioritize records with matching type (0 = match, 1 = no match) + case((EndUser.type == type, 0), else_=1) ) .first() ) - if end_user is None: + if end_user: + # If found a legacy end user with different type, update it for future consistency + if end_user.type != type: + logger.info( + "Upgrading legacy EndUser %s from type=%s to %s for session_id=%s", + end_user.id, + end_user.type, + type, + user_id, + ) + end_user.type = type + session.commit() + else: + # Create new end user if none exists end_user = EndUser( tenant_id=tenant_id, app_id=app_id, diff --git a/api/services/enterprise/enterprise_service.py b/api/services/enterprise/enterprise_service.py index 974aa849db..83d0fcf296 100644 --- a/api/services/enterprise/enterprise_service.py +++ b/api/services/enterprise/enterprise_service.py @@ -92,16 +92,6 @@ class EnterpriseService: return ret - @classmethod - def get_app_access_mode_by_code(cls, app_code: str) -> WebAppSettings: - if not app_code: - raise ValueError("app_code must be provided.") - params = {"appCode": app_code} - data = EnterpriseRequest.send_request("GET", "/webapp/access-mode/code", params=params) - if not data: - raise ValueError("No data found.") - return WebAppSettings.model_validate(data) - @classmethod def update_app_access_mode(cls, app_id: str, access_mode: str): if not app_id: diff --git a/api/services/entities/knowledge_entities/knowledge_entities.py b/api/services/entities/knowledge_entities/knowledge_entities.py index b9a210740d..131e90e195 100644 --- a/api/services/entities/knowledge_entities/knowledge_entities.py +++ b/api/services/entities/knowledge_entities/knowledge_entities.py @@ -158,6 +158,7 @@ class MetadataDetail(BaseModel): class DocumentMetadataOperation(BaseModel): document_id: str metadata_list: list[MetadataDetail] + partial_update: bool = False class MetadataOperationData(BaseModel): diff --git a/api/services/errors/app.py b/api/services/errors/app.py index 338636d9b6..24e4760acc 100644 --- a/api/services/errors/app.py +++ b/api/services/errors/app.py @@ -18,7 +18,29 @@ class WorkflowIdFormatError(Exception): pass -class InvokeDailyRateLimitError(Exception): - """Raised when daily rate limit is exceeded for workflow invocations.""" +class InvokeRateLimitError(Exception): + """Raised when rate limit is exceeded for workflow invocations.""" pass + + +class QuotaExceededError(ValueError): + """Raised when billing quota is exceeded for a feature.""" + + def __init__(self, feature: str, tenant_id: str, required: int): + self.feature = feature + self.tenant_id = tenant_id + self.required = required + super().__init__(f"Quota exceeded for feature '{feature}' (tenant: {tenant_id}). Required: {required}") + + +class TriggerNodeLimitExceededError(ValueError): + """Raised when trigger node count exceeds the plan limit.""" + + def __init__(self, count: int, limit: int): + self.count = count + self.limit = limit + super().__init__( + f"Trigger node count ({count}) exceeds the limit ({limit}) for your subscription plan. " + f"Please upgrade your plan or reduce the number of trigger nodes." + ) diff --git a/api/services/errors/file.py b/api/services/errors/file.py index 29f3f44eec..bf9d65a25b 100644 --- a/api/services/errors/file.py +++ b/api/services/errors/file.py @@ -11,3 +11,7 @@ class FileTooLargeError(BaseServiceError): class UnsupportedFileTypeError(BaseServiceError): pass + + +class BlockedFileExtensionError(BaseServiceError): + description = "File extension '{extension}' is not allowed for security reasons" diff --git a/api/services/external_knowledge_service.py b/api/services/external_knowledge_service.py index 5cd3b471f9..27936f6278 100644 --- a/api/services/external_knowledge_service.py +++ b/api/services/external_knowledge_service.py @@ -62,7 +62,7 @@ class ExternalDatasetService: tenant_id=tenant_id, created_by=user_id, updated_by=user_id, - name=args.get("name"), + name=str(args.get("name")), description=args.get("description", ""), settings=json.dumps(args.get("settings"), ensure_ascii=False), ) @@ -163,7 +163,7 @@ class ExternalDatasetService: external_knowledge_api = ( db.session.query(ExternalKnowledgeApis).filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() ) - if external_knowledge_api is None: + if external_knowledge_api is None or external_knowledge_api.settings is None: raise ValueError("api template not found") settings = json.loads(external_knowledge_api.settings) for setting in settings: @@ -257,12 +257,16 @@ class ExternalDatasetService: db.session.add(dataset) db.session.flush() + if args.get("external_knowledge_id") is None: + raise ValueError("external_knowledge_id is required") + if args.get("external_knowledge_api_id") is None: + raise ValueError("external_knowledge_api_id is required") external_knowledge_binding = ExternalKnowledgeBindings( tenant_id=tenant_id, dataset_id=dataset.id, - external_knowledge_api_id=args.get("external_knowledge_api_id"), - external_knowledge_id=args.get("external_knowledge_id"), + external_knowledge_api_id=args.get("external_knowledge_api_id") or "", + external_knowledge_id=args.get("external_knowledge_id") or "", created_by=user_id, ) db.session.add(external_knowledge_binding) @@ -290,7 +294,7 @@ class ExternalDatasetService: .filter_by(id=external_knowledge_binding.external_knowledge_api_id) .first() ) - if not external_knowledge_api: + if external_knowledge_api is None or external_knowledge_api.settings is None: raise ValueError("external api template not found") settings = json.loads(external_knowledge_api.settings) diff --git a/api/services/feature_service.py b/api/services/feature_service.py index 148442f76e..8035adc734 100644 --- a/api/services/feature_service.py +++ b/api/services/feature_service.py @@ -3,12 +3,13 @@ from enum import StrEnum from pydantic import BaseModel, ConfigDict, Field from configs import dify_config +from enums.cloud_plan import CloudPlan from services.billing_service import BillingService from services.enterprise.enterprise_service import EnterpriseService class SubscriptionModel(BaseModel): - plan: str = "sandbox" + plan: str = CloudPlan.SANDBOX interval: str = "" @@ -53,6 +54,12 @@ class LicenseLimitationModel(BaseModel): return (self.limit - self.size) >= required +class Quota(BaseModel): + usage: int = 0 + limit: int = 0 + reset_date: int = -1 + + class LicenseStatus(StrEnum): NONE = "none" INACTIVE = "inactive" @@ -128,6 +135,8 @@ class FeatureModel(BaseModel): webapp_copyright_enabled: bool = False workspace_members: LicenseLimitationModel = LicenseLimitationModel(enabled=False, size=0, limit=0) is_allow_transfer_workspace: bool = True + trigger_event: Quota = Quota(usage=0, limit=3000, reset_date=0) + api_rate_limit: Quota = Quota(usage=0, limit=5000, reset_date=0) # pydantic configs model_config = ConfigDict(protected_namespaces=()) knowledge_pipeline: KnowledgePipeline = KnowledgePipeline() @@ -186,7 +195,7 @@ class FeatureService: knowledge_rate_limit.enabled = True limit_info = BillingService.get_knowledge_rate_limit(tenant_id) knowledge_rate_limit.limit = limit_info.get("limit", 10) - knowledge_rate_limit.subscription_plan = limit_info.get("subscription_plan", "sandbox") + knowledge_rate_limit.subscription_plan = limit_info.get("subscription_plan", CloudPlan.SANDBOX) return knowledge_rate_limit @classmethod @@ -235,16 +244,28 @@ class FeatureService: def _fulfill_params_from_billing_api(cls, features: FeatureModel, tenant_id: str): billing_info = BillingService.get_info(tenant_id) + features_usage_info = BillingService.get_tenant_feature_plan_usage_info(tenant_id) + features.billing.enabled = billing_info["enabled"] features.billing.subscription.plan = billing_info["subscription"]["plan"] features.billing.subscription.interval = billing_info["subscription"]["interval"] features.education.activated = billing_info["subscription"].get("education", False) - if features.billing.subscription.plan != "sandbox": + if features.billing.subscription.plan != CloudPlan.SANDBOX: features.webapp_copyright_enabled = True else: features.is_allow_transfer_workspace = False + if "trigger_event" in features_usage_info: + features.trigger_event.usage = features_usage_info["trigger_event"]["usage"] + features.trigger_event.limit = features_usage_info["trigger_event"]["limit"] + features.trigger_event.reset_date = features_usage_info["trigger_event"].get("reset_date", -1) + + if "api_rate_limit" in features_usage_info: + features.api_rate_limit.usage = features_usage_info["api_rate_limit"]["usage"] + features.api_rate_limit.limit = features_usage_info["api_rate_limit"]["limit"] + features.api_rate_limit.reset_date = features_usage_info["api_rate_limit"].get("reset_date", -1) + if "members" in billing_info: features.members.size = billing_info["members"]["size"] features.members.limit = billing_info["members"]["limit"] diff --git a/api/services/file_service.py b/api/services/file_service.py index dd6a829ea2..b0c5a32c9f 100644 --- a/api/services/file_service.py +++ b/api/services/file_service.py @@ -23,7 +23,7 @@ from models import Account from models.enums import CreatorUserRole from models.model import EndUser, UploadFile -from .errors.file import FileTooLargeError, UnsupportedFileTypeError +from .errors.file import BlockedFileExtensionError, FileTooLargeError, UnsupportedFileTypeError PREVIEW_WORDS_LIMIT = 3000 @@ -59,6 +59,10 @@ class FileService: if len(filename) > 200: filename = filename.split(".")[0][:200] + "." + extension + # check if extension is in blacklist + if extension and extension in dify_config.UPLOAD_FILE_EXTENSION_BLACKLIST: + raise BlockedFileExtensionError(f"File extension '.{extension}' is not allowed for security reasons") + if source == "datasets" and extension not in DOCUMENT_EXTENSIONS: raise UnsupportedFileTypeError() diff --git a/api/services/hit_testing_service.py b/api/services/hit_testing_service.py index 337181728c..cdbd2355ca 100644 --- a/api/services/hit_testing_service.py +++ b/api/services/hit_testing_service.py @@ -82,7 +82,12 @@ class HitTestingService: logger.debug("Hit testing retrieve in %s seconds", end - start) dataset_query = DatasetQuery( - dataset_id=dataset.id, content=query, source="hit_testing", created_by_role="account", created_by=account.id + dataset_id=dataset.id, + content=query, + source="hit_testing", + source_app_id=None, + created_by_role="account", + created_by=account.id, ) db.session.add(dataset_query) @@ -118,7 +123,12 @@ class HitTestingService: logger.debug("External knowledge hit testing retrieve in %s seconds", end - start) dataset_query = DatasetQuery( - dataset_id=dataset.id, content=query, source="hit_testing", created_by_role="account", created_by=account.id + dataset_id=dataset.id, + content=query, + source="hit_testing", + source_app_id=None, + created_by_role="account", + created_by=account.id, ) db.session.add(dataset_query) diff --git a/api/services/metadata_service.py b/api/services/metadata_service.py index b369994d2d..3329ac349c 100644 --- a/api/services/metadata_service.py +++ b/api/services/metadata_service.py @@ -206,7 +206,10 @@ class MetadataService: document = DocumentService.get_document(dataset.id, operation.document_id) if document is None: raise ValueError("Document not found.") - doc_metadata = {} + if operation.partial_update: + doc_metadata = copy.deepcopy(document.doc_metadata) if document.doc_metadata else {} + else: + doc_metadata = {} for metadata_value in operation.metadata_list: doc_metadata[metadata_value.name] = metadata_value.value if dataset.built_in_field_enabled: @@ -219,9 +222,21 @@ class MetadataService: db.session.add(document) db.session.commit() # deal metadata binding - db.session.query(DatasetMetadataBinding).filter_by(document_id=operation.document_id).delete() + if not operation.partial_update: + db.session.query(DatasetMetadataBinding).filter_by(document_id=operation.document_id).delete() + current_user, current_tenant_id = current_account_with_tenant() for metadata_value in operation.metadata_list: + # check if binding already exists + if operation.partial_update: + existing_binding = ( + db.session.query(DatasetMetadataBinding) + .filter_by(document_id=operation.document_id, metadata_id=metadata_value.id) + .first() + ) + if existing_binding: + continue + dataset_metadata_binding = DatasetMetadataBinding( tenant_id=current_tenant_id, dataset_id=dataset.id, diff --git a/api/services/ops_service.py b/api/services/ops_service.py index e490b7ed3c..a2c8e9118e 100644 --- a/api/services/ops_service.py +++ b/api/services/ops_service.py @@ -29,6 +29,8 @@ class OpsService: if not app: return None tenant_id = app.tenant_id + if trace_config_data.tracing_config is None: + raise ValueError("Tracing config cannot be None.") decrypt_tracing_config = OpsTraceManager.decrypt_tracing_config( tenant_id, tracing_provider, trace_config_data.tracing_config ) diff --git a/api/services/plugin/plugin_parameter_service.py b/api/services/plugin/plugin_parameter_service.py index f08bbaf5b8..c517d9f966 100644 --- a/api/services/plugin/plugin_parameter_service.py +++ b/api/services/plugin/plugin_parameter_service.py @@ -10,9 +10,7 @@ from core.tools.tool_manager import ToolManager from core.tools.utils.encryption import create_tool_provider_encrypter from core.trigger.entities.api_entities import TriggerProviderSubscriptionApiEntity from core.trigger.entities.entities import SubscriptionBuilder -from core.trigger.trigger_manager import TriggerManager from extensions.ext_database import db -from models.provider_ids import TriggerProviderID from models.tools import BuiltinToolProvider from services.trigger.trigger_provider_service import TriggerProviderService from services.trigger.trigger_subscription_builder_service import TriggerSubscriptionBuilderService @@ -84,7 +82,6 @@ class PluginParameterService: credentials = encrypter.decrypt(db_record.credentials) credential_type = db_record.credential_type case "trigger": - provider_controller = TriggerManager.get_trigger_provider(tenant_id, TriggerProviderID(provider)) subscription: TriggerProviderSubscriptionApiEntity | SubscriptionBuilder | None if credential_id: subscription = TriggerSubscriptionBuilderService.get_subscription_builder(credential_id) @@ -100,8 +97,6 @@ class PluginParameterService: credentials = subscription.credentials credential_type = subscription.credential_type or CredentialType.UNAUTHORIZED - case _: - raise ValueError(f"Invalid provider type: {provider_type}") return ( DynamicSelectClient() diff --git a/api/services/rag_pipeline/rag_pipeline.py b/api/services/rag_pipeline/rag_pipeline.py index f6dddd75a3..097d16e2a7 100644 --- a/api/services/rag_pipeline/rag_pipeline.py +++ b/api/services/rag_pipeline/rag_pipeline.py @@ -9,7 +9,7 @@ from typing import Any, Union, cast from uuid import uuid4 from flask_login import current_user -from sqlalchemy import func, or_, select +from sqlalchemy import func, select from sqlalchemy.orm import Session, sessionmaker import contexts @@ -94,6 +94,7 @@ class RagPipelineService: self._node_execution_service_repo = DifyAPIRepositoryFactory.create_api_workflow_node_execution_repository( session_maker ) + self._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker) @classmethod def get_pipeline_templates(cls, type: str = "built-in", language: str = "en-US") -> dict: @@ -1015,48 +1016,21 @@ class RagPipelineService: :param args: request args """ limit = int(args.get("limit", 20)) + last_id = args.get("last_id") - base_query = db.session.query(WorkflowRun).where( - WorkflowRun.tenant_id == pipeline.tenant_id, - WorkflowRun.app_id == pipeline.id, - or_( - WorkflowRun.triggered_from == WorkflowRunTriggeredFrom.RAG_PIPELINE_RUN.value, - WorkflowRun.triggered_from == WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING.value, - ), + triggered_from_values = [ + WorkflowRunTriggeredFrom.RAG_PIPELINE_RUN, + WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING, + ] + + return self._workflow_run_repo.get_paginated_workflow_runs( + tenant_id=pipeline.tenant_id, + app_id=pipeline.id, + triggered_from=triggered_from_values, + limit=limit, + last_id=last_id, ) - if args.get("last_id"): - last_workflow_run = base_query.where( - WorkflowRun.id == args.get("last_id"), - ).first() - - if not last_workflow_run: - raise ValueError("Last workflow run not exists") - - workflow_runs = ( - base_query.where( - WorkflowRun.created_at < last_workflow_run.created_at, WorkflowRun.id != last_workflow_run.id - ) - .order_by(WorkflowRun.created_at.desc()) - .limit(limit) - .all() - ) - else: - workflow_runs = base_query.order_by(WorkflowRun.created_at.desc()).limit(limit).all() - - has_more = False - if len(workflow_runs) == limit: - current_page_first_workflow_run = workflow_runs[-1] - rest_count = base_query.where( - WorkflowRun.created_at < current_page_first_workflow_run.created_at, - WorkflowRun.id != current_page_first_workflow_run.id, - ).count() - - if rest_count > 0: - has_more = True - - return InfiniteScrollPagination(data=workflow_runs, limit=limit, has_more=has_more) - def get_rag_pipeline_workflow_run(self, pipeline: Pipeline, run_id: str) -> WorkflowRun | None: """ Get workflow run detail @@ -1064,18 +1038,12 @@ class RagPipelineService: :param app_model: app model :param run_id: workflow run id """ - workflow_run = ( - db.session.query(WorkflowRun) - .where( - WorkflowRun.tenant_id == pipeline.tenant_id, - WorkflowRun.app_id == pipeline.id, - WorkflowRun.id == run_id, - ) - .first() + return self._workflow_run_repo.get_workflow_run_by_id( + tenant_id=pipeline.tenant_id, + app_id=pipeline.id, + run_id=run_id, ) - return workflow_run - def get_rag_pipeline_workflow_run_node_executions( self, pipeline: Pipeline, @@ -1151,13 +1119,19 @@ class RagPipelineService: with Session(db.engine) as session: rag_pipeline_dsl_service = RagPipelineDslService(session) dsl = rag_pipeline_dsl_service.export_rag_pipeline_dsl(pipeline=pipeline, include_secret=True) - + if args.get("icon_info") is None: + args["icon_info"] = {} + if args.get("description") is None: + raise ValueError("Description is required") + if args.get("name") is None: + raise ValueError("Name is required") pipeline_customized_template = PipelineCustomizedTemplate( - name=args.get("name"), - description=args.get("description"), - icon=args.get("icon_info"), + name=args.get("name") or "", + description=args.get("description") or "", + icon=args.get("icon_info") or {}, tenant_id=pipeline.tenant_id, yaml_content=dsl, + install_count=0, position=max_position + 1 if max_position else 1, chunk_structure=dataset.chunk_structure, language="en-US", @@ -1297,8 +1271,8 @@ class RagPipelineService: ) providers_map = {provider.plugin_id: provider.to_dict() for provider in providers} - plugin_manifests = marketplace.batch_fetch_plugin_manifests(plugin_ids) - plugin_manifests_map = {manifest.plugin_id: manifest for manifest in plugin_manifests} + plugin_manifests = marketplace.batch_fetch_plugin_by_ids(plugin_ids) + plugin_manifests_map = {manifest["plugin_id"]: manifest for manifest in plugin_manifests} installed_plugin_list = [] uninstalled_plugin_list = [] @@ -1308,14 +1282,7 @@ class RagPipelineService: else: plugin_manifest = plugin_manifests_map.get(plugin_id) if plugin_manifest: - uninstalled_plugin_list.append( - { - "plugin_id": plugin_id, - "name": plugin_manifest.name, - "icon": plugin_manifest.icon, - "plugin_unique_identifier": plugin_manifest.latest_package_identifier, - } - ) + uninstalled_plugin_list.append(plugin_manifest) # Build recommended plugins list return { diff --git a/api/services/rag_pipeline/rag_pipeline_task_proxy.py b/api/services/rag_pipeline/rag_pipeline_task_proxy.py new file mode 100644 index 0000000000..94dd7941da --- /dev/null +++ b/api/services/rag_pipeline/rag_pipeline_task_proxy.py @@ -0,0 +1,106 @@ +import json +import logging +from collections.abc import Callable, Sequence +from functools import cached_property + +from core.app.entities.rag_pipeline_invoke_entities import RagPipelineInvokeEntity +from core.rag.pipeline.queue import TenantIsolatedTaskQueue +from enums.cloud_plan import CloudPlan +from extensions.ext_database import db +from services.feature_service import FeatureService +from services.file_service import FileService +from tasks.rag_pipeline.priority_rag_pipeline_run_task import priority_rag_pipeline_run_task +from tasks.rag_pipeline.rag_pipeline_run_task import rag_pipeline_run_task + +logger = logging.getLogger(__name__) + + +class RagPipelineTaskProxy: + # Default uploaded file name for rag pipeline invoke entities + _RAG_PIPELINE_INVOKE_ENTITIES_FILE_NAME = "rag_pipeline_invoke_entities.json" + + def __init__( + self, dataset_tenant_id: str, user_id: str, rag_pipeline_invoke_entities: Sequence[RagPipelineInvokeEntity] + ): + self._dataset_tenant_id = dataset_tenant_id + self._user_id = user_id + self._rag_pipeline_invoke_entities = rag_pipeline_invoke_entities + self._tenant_isolated_task_queue = TenantIsolatedTaskQueue(dataset_tenant_id, "pipeline") + + @cached_property + def features(self): + return FeatureService.get_features(self._dataset_tenant_id) + + def _upload_invoke_entities(self) -> str: + text = [item.model_dump() for item in self._rag_pipeline_invoke_entities] + # Convert list to proper JSON string + json_text = json.dumps(text) + upload_file = FileService(db.engine).upload_text( + json_text, self._RAG_PIPELINE_INVOKE_ENTITIES_FILE_NAME, self._user_id, self._dataset_tenant_id + ) + return upload_file.id + + def _send_to_direct_queue(self, upload_file_id: str, task_func: Callable[[str, str], None]): + logger.info("send file %s to direct queue", upload_file_id) + task_func.delay( # type: ignore + rag_pipeline_invoke_entities_file_id=upload_file_id, + tenant_id=self._dataset_tenant_id, + ) + + def _send_to_tenant_queue(self, upload_file_id: str, task_func: Callable[[str, str], None]): + logger.info("send file %s to tenant queue", upload_file_id) + if self._tenant_isolated_task_queue.get_task_key(): + # Add to waiting queue using List operations (lpush) + self._tenant_isolated_task_queue.push_tasks([upload_file_id]) + logger.info("push tasks: %s", upload_file_id) + else: + # Set flag and execute task + self._tenant_isolated_task_queue.set_task_waiting_time() + task_func.delay( # type: ignore + rag_pipeline_invoke_entities_file_id=upload_file_id, + tenant_id=self._dataset_tenant_id, + ) + logger.info("init tasks: %s", upload_file_id) + + def _send_to_default_tenant_queue(self, upload_file_id: str): + self._send_to_tenant_queue(upload_file_id, rag_pipeline_run_task) + + def _send_to_priority_tenant_queue(self, upload_file_id: str): + self._send_to_tenant_queue(upload_file_id, priority_rag_pipeline_run_task) + + def _send_to_priority_direct_queue(self, upload_file_id: str): + self._send_to_direct_queue(upload_file_id, priority_rag_pipeline_run_task) + + def _dispatch(self): + upload_file_id = self._upload_invoke_entities() + if not upload_file_id: + raise ValueError("upload_file_id is empty") + + logger.info( + "dispatch args: %s - %s - %s", + self._dataset_tenant_id, + self.features.billing.enabled, + self.features.billing.subscription.plan, + ) + + # dispatch to different pipeline queue with tenant isolation when billing enabled + if self.features.billing.enabled: + if self.features.billing.subscription.plan == CloudPlan.SANDBOX: + # dispatch to normal pipeline queue with tenant isolation for sandbox plan + self._send_to_default_tenant_queue(upload_file_id) + else: + # dispatch to priority pipeline queue with tenant isolation for other plans + self._send_to_priority_tenant_queue(upload_file_id) + else: + # dispatch to priority pipeline queue without tenant isolation for others, e.g.: self-hosted or enterprise + self._send_to_priority_direct_queue(upload_file_id) + + def delay(self): + if not self._rag_pipeline_invoke_entities: + logger.warning( + "Received empty rag pipeline invoke entities, no tasks delivered: %s %s", + self._dataset_tenant_id, + self._user_id, + ) + return + self._dispatch() diff --git a/api/services/rag_pipeline/rag_pipeline_transform_service.py b/api/services/rag_pipeline/rag_pipeline_transform_service.py index d79ab71668..22025dd44a 100644 --- a/api/services/rag_pipeline/rag_pipeline_transform_service.py +++ b/api/services/rag_pipeline/rag_pipeline_transform_service.py @@ -322,9 +322,9 @@ class RagPipelineTransformService: datasource_info=data_source_info, input_data={}, created_by=document.created_by, - created_at=document.created_at, datasource_node_id=file_node_id, ) + document_pipeline_execution_log.created_at = document.created_at db.session.add(document) db.session.add(document_pipeline_execution_log) elif document.data_source_type == "notion_import": @@ -350,9 +350,9 @@ class RagPipelineTransformService: datasource_info=data_source_info, input_data={}, created_by=document.created_by, - created_at=document.created_at, datasource_node_id=notion_node_id, ) + document_pipeline_execution_log.created_at = document.created_at db.session.add(document) db.session.add(document_pipeline_execution_log) elif document.data_source_type == "website_crawl": @@ -379,8 +379,8 @@ class RagPipelineTransformService: datasource_info=data_source_info, input_data={}, created_by=document.created_by, - created_at=document.created_at, datasource_node_id=datasource_node_id, ) + document_pipeline_execution_log.created_at = document.created_at db.session.add(document) db.session.add(document_pipeline_execution_log) diff --git a/api/services/rag_pipeline/transform/website-crawl-general-economy.yml b/api/services/rag_pipeline/transform/website-crawl-general-economy.yml index 241d94c95d..a0f4b3bdd8 100644 --- a/api/services/rag_pipeline/transform/website-crawl-general-economy.yml +++ b/api/services/rag_pipeline/transform/website-crawl-general-economy.yml @@ -126,7 +126,7 @@ workflow: type: mixed value: '{{#rag.1752491761974.jina_use_sitemap#}}' plugin_id: langgenius/jina_datasource - provider_name: jina + provider_name: jinareader provider_type: website_crawl selected: false title: Jina Reader diff --git a/api/services/rag_pipeline/transform/website-crawl-general-high-quality.yml b/api/services/rag_pipeline/transform/website-crawl-general-high-quality.yml index 52b8f822c0..f58679fb6c 100644 --- a/api/services/rag_pipeline/transform/website-crawl-general-high-quality.yml +++ b/api/services/rag_pipeline/transform/website-crawl-general-high-quality.yml @@ -126,7 +126,7 @@ workflow: type: mixed value: '{{#rag.1752491761974.jina_use_sitemap#}}' plugin_id: langgenius/jina_datasource - provider_name: jina + provider_name: jinareader provider_type: website_crawl selected: false title: Jina Reader diff --git a/api/services/rag_pipeline/transform/website-crawl-parentchild.yml b/api/services/rag_pipeline/transform/website-crawl-parentchild.yml index 5d609bd12b..85b1cfd87d 100644 --- a/api/services/rag_pipeline/transform/website-crawl-parentchild.yml +++ b/api/services/rag_pipeline/transform/website-crawl-parentchild.yml @@ -419,7 +419,7 @@ workflow: type: mixed value: '{{#rag.1752491761974.jina_use_sitemap#}}' plugin_id: langgenius/jina_datasource - provider_name: jina + provider_name: jinareader provider_type: website_crawl selected: false title: Jina Reader diff --git a/api/services/tools/api_tools_manage_service.py b/api/services/tools/api_tools_manage_service.py index f0a0bcde1b..250d29f335 100644 --- a/api/services/tools/api_tools_manage_service.py +++ b/api/services/tools/api_tools_manage_service.py @@ -306,7 +306,7 @@ class ApiToolManageService: if name in masked_credentials and value == masked_credentials[name]: credentials[name] = original_credentials[name] - credentials = encrypter.encrypt(credentials) + credentials = dict(encrypter.encrypt(credentials)) provider.credentials_str = json.dumps(credentials) db.session.add(provider) diff --git a/api/services/tools/builtin_tools_manage_service.py b/api/services/tools/builtin_tools_manage_service.py index 1543b1a02e..783f2f0d21 100644 --- a/api/services/tools/builtin_tools_manage_service.py +++ b/api/services/tools/builtin_tools_manage_service.py @@ -353,7 +353,7 @@ class BuiltinToolManageService: decrypt_credential = encrypter.mask_plugin_credentials(encrypter.decrypt(provider.credentials)) credential_entity = ToolTransformService.convert_builtin_provider_to_credential_entity( provider=provider, - credentials=decrypt_credential, + credentials=dict(decrypt_credential), ) credentials.append(credential_entity) return credentials diff --git a/api/services/tools/mcp_tools_manage_service.py b/api/services/tools/mcp_tools_manage_service.py index 92c33c1a49..7eedf76aed 100644 --- a/api/services/tools/mcp_tools_manage_service.py +++ b/api/services/tools/mcp_tools_manage_service.py @@ -1,86 +1,119 @@ import hashlib import json +import logging +from collections.abc import Mapping from datetime import datetime +from enum import StrEnum from typing import Any +from urllib.parse import urlparse -from sqlalchemy import or_ +from pydantic import BaseModel, Field +from sqlalchemy import or_, select from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session +from core.entities.mcp_provider import MCPAuthentication, MCPConfiguration, MCPProviderEntity from core.helper import encrypter from core.helper.provider_cache import NoOpProviderCredentialCache +from core.mcp.auth.auth_flow import auth +from core.mcp.auth_client import MCPClientWithAuthRetry from core.mcp.error import MCPAuthError, MCPError -from core.mcp.mcp_client import MCPClient from core.tools.entities.api_entities import ToolProviderApiEntity -from core.tools.entities.common_entities import I18nObject -from core.tools.entities.tool_entities import ToolProviderType -from core.tools.mcp_tool.provider import MCPToolProviderController from core.tools.utils.encryption import ProviderConfigEncrypter -from extensions.ext_database import db from models.tools import MCPToolProvider from services.tools.tools_transform_service import ToolTransformService +logger = logging.getLogger(__name__) + +# Constants UNCHANGED_SERVER_URL_PLACEHOLDER = "[__HIDDEN__]" +CLIENT_NAME = "Dify" +EMPTY_TOOLS_JSON = "[]" +EMPTY_CREDENTIALS_JSON = "{}" + + +class OAuthDataType(StrEnum): + """Types of OAuth data that can be saved.""" + + TOKENS = "tokens" + CLIENT_INFO = "client_info" + CODE_VERIFIER = "code_verifier" + MIXED = "mixed" + + +class ReconnectResult(BaseModel): + """Result of reconnecting to an MCP provider""" + + authed: bool = Field(description="Whether the provider is authenticated") + tools: str = Field(description="JSON string of tool list") + encrypted_credentials: str = Field(description="JSON string of encrypted credentials") + + +class ServerUrlValidationResult(BaseModel): + """Result of server URL validation check""" + + needs_validation: bool + validation_passed: bool = False + reconnect_result: ReconnectResult | None = None + encrypted_server_url: str | None = None + server_url_hash: str | None = None + + @property + def should_update_server_url(self) -> bool: + """Check if server URL should be updated based on validation result""" + return self.needs_validation and self.validation_passed and self.reconnect_result is not None class MCPToolManageService: - """ - Service class for managing mcp tools. - """ + """Service class for managing MCP tools and providers.""" - @staticmethod - def _encrypt_headers(headers: dict[str, str], tenant_id: str) -> dict[str, str]: + def __init__(self, session: Session): + self._session = session + + # ========== Provider CRUD Operations ========== + + def get_provider( + self, *, provider_id: str | None = None, server_identifier: str | None = None, tenant_id: str + ) -> MCPToolProvider: """ - Encrypt headers using ProviderConfigEncrypter with all headers as SECRET_INPUT. + Get MCP provider by ID or server identifier. Args: - headers: Dictionary of headers to encrypt - tenant_id: Tenant ID for encryption + provider_id: Provider ID (UUID) + server_identifier: Server identifier + tenant_id: Tenant ID Returns: - Dictionary with all headers encrypted + MCPToolProvider instance + + Raises: + ValueError: If provider not found """ - if not headers: - return {} + if server_identifier: + stmt = select(MCPToolProvider).where( + MCPToolProvider.tenant_id == tenant_id, MCPToolProvider.server_identifier == server_identifier + ) + else: + stmt = select(MCPToolProvider).where( + MCPToolProvider.tenant_id == tenant_id, MCPToolProvider.id == provider_id + ) - from core.entities.provider_entities import BasicProviderConfig - from core.helper.provider_cache import NoOpProviderCredentialCache - from core.tools.utils.encryption import create_provider_encrypter - - # Create dynamic config for all headers as SECRET_INPUT - config = [BasicProviderConfig(type=BasicProviderConfig.Type.SECRET_INPUT, name=key) for key in headers] - - encrypter_instance, _ = create_provider_encrypter( - tenant_id=tenant_id, - config=config, - cache=NoOpProviderCredentialCache(), - ) - - return encrypter_instance.encrypt(headers) - - @staticmethod - def get_mcp_provider_by_provider_id(provider_id: str, tenant_id: str) -> MCPToolProvider: - res = ( - db.session.query(MCPToolProvider) - .where(MCPToolProvider.tenant_id == tenant_id, MCPToolProvider.id == provider_id) - .first() - ) - if not res: + provider = self._session.scalar(stmt) + if not provider: raise ValueError("MCP tool not found") - return res + return provider - @staticmethod - def get_mcp_provider_by_server_identifier(server_identifier: str, tenant_id: str) -> MCPToolProvider: - res = ( - db.session.query(MCPToolProvider) - .where(MCPToolProvider.tenant_id == tenant_id, MCPToolProvider.server_identifier == server_identifier) - .first() - ) - if not res: - raise ValueError("MCP tool not found") - return res + def get_provider_entity(self, provider_id: str, tenant_id: str, by_server_id: bool = False) -> MCPProviderEntity: + """Get provider entity by ID or server identifier.""" + if by_server_id: + db_provider = self.get_provider(server_identifier=provider_id, tenant_id=tenant_id) + else: + db_provider = self.get_provider(provider_id=provider_id, tenant_id=tenant_id) + return db_provider.to_entity() - @staticmethod - def create_mcp_provider( + def create_provider( + self, + *, tenant_id: str, name: str, server_url: str, @@ -89,37 +122,30 @@ class MCPToolManageService: icon_type: str, icon_background: str, server_identifier: str, - timeout: float, - sse_read_timeout: float, + configuration: MCPConfiguration, + authentication: MCPAuthentication | None = None, headers: dict[str, str] | None = None, ) -> ToolProviderApiEntity: - server_url_hash = hashlib.sha256(server_url.encode()).hexdigest() - existing_provider = ( - db.session.query(MCPToolProvider) - .where( - MCPToolProvider.tenant_id == tenant_id, - or_( - MCPToolProvider.name == name, - MCPToolProvider.server_url_hash == server_url_hash, - MCPToolProvider.server_identifier == server_identifier, - ), - ) - .first() - ) - if existing_provider: - if existing_provider.name == name: - raise ValueError(f"MCP tool {name} already exists") - if existing_provider.server_url_hash == server_url_hash: - raise ValueError(f"MCP tool {server_url} already exists") - if existing_provider.server_identifier == server_identifier: - raise ValueError(f"MCP tool {server_identifier} already exists") - encrypted_server_url = encrypter.encrypt_token(tenant_id, server_url) - # Encrypt headers - encrypted_headers = None - if headers: - encrypted_headers_dict = MCPToolManageService._encrypt_headers(headers, tenant_id) - encrypted_headers = json.dumps(encrypted_headers_dict) + """Create a new MCP provider.""" + # Validate URL format + if not self._is_valid_url(server_url): + raise ValueError("Server URL is not valid.") + server_url_hash = hashlib.sha256(server_url.encode()).hexdigest() + + # Check for existing provider + self._check_provider_exists(tenant_id, name, server_url_hash, server_identifier) + + # Encrypt sensitive data + encrypted_server_url = encrypter.encrypt_token(tenant_id, server_url) + encrypted_headers = self._prepare_encrypted_dict(headers, tenant_id) if headers else None + encrypted_credentials = None + if authentication is not None and authentication.client_id: + encrypted_credentials = self._build_and_encrypt_credentials( + authentication.client_id, authentication.client_secret, tenant_id + ) + + # Create provider mcp_tool = MCPToolProvider( tenant_id=tenant_id, name=name, @@ -127,93 +153,23 @@ class MCPToolManageService: server_url_hash=server_url_hash, user_id=user_id, authed=False, - tools="[]", - icon=json.dumps({"content": icon, "background": icon_background}) if icon_type == "emoji" else icon, + tools=EMPTY_TOOLS_JSON, + icon=self._prepare_icon(icon, icon_type, icon_background), server_identifier=server_identifier, - timeout=timeout, - sse_read_timeout=sse_read_timeout, + timeout=configuration.timeout, + sse_read_timeout=configuration.sse_read_timeout, encrypted_headers=encrypted_headers, - ) - db.session.add(mcp_tool) - db.session.commit() - return ToolTransformService.mcp_provider_to_user_provider(mcp_tool, for_list=True) - - @staticmethod - def retrieve_mcp_tools(tenant_id: str, for_list: bool = False) -> list[ToolProviderApiEntity]: - mcp_providers = ( - db.session.query(MCPToolProvider) - .where(MCPToolProvider.tenant_id == tenant_id) - .order_by(MCPToolProvider.name) - .all() - ) - return [ - ToolTransformService.mcp_provider_to_user_provider(mcp_provider, for_list=for_list) - for mcp_provider in mcp_providers - ] - - @classmethod - def list_mcp_tool_from_remote_server(cls, tenant_id: str, provider_id: str) -> ToolProviderApiEntity: - mcp_provider = cls.get_mcp_provider_by_provider_id(provider_id, tenant_id) - server_url = mcp_provider.decrypted_server_url - authed = mcp_provider.authed - headers = mcp_provider.decrypted_headers - timeout = mcp_provider.timeout - sse_read_timeout = mcp_provider.sse_read_timeout - - try: - with MCPClient( - server_url, - provider_id, - tenant_id, - authed=authed, - for_list=True, - headers=headers, - timeout=timeout, - sse_read_timeout=sse_read_timeout, - ) as mcp_client: - tools = mcp_client.list_tools() - except MCPAuthError: - raise ValueError("Please auth the tool first") - except MCPError as e: - raise ValueError(f"Failed to connect to MCP server: {e}") - - try: - mcp_provider = cls.get_mcp_provider_by_provider_id(provider_id, tenant_id) - mcp_provider.tools = json.dumps([tool.model_dump() for tool in tools]) - mcp_provider.authed = True - mcp_provider.updated_at = datetime.now() - db.session.commit() - except Exception: - db.session.rollback() - raise - - user = mcp_provider.load_user() - if not mcp_provider.icon: - raise ValueError("MCP provider icon is required") - return ToolProviderApiEntity( - id=mcp_provider.id, - name=mcp_provider.name, - tools=ToolTransformService.mcp_tool_to_user_tool(mcp_provider, tools), - type=ToolProviderType.MCP, - icon=mcp_provider.icon, - author=user.name if user else "Anonymous", - server_url=mcp_provider.masked_server_url, - updated_at=int(mcp_provider.updated_at.timestamp()), - description=I18nObject(en_US="", zh_Hans=""), - label=I18nObject(en_US=mcp_provider.name, zh_Hans=mcp_provider.name), - plugin_unique_identifier=mcp_provider.server_identifier, + encrypted_credentials=encrypted_credentials, ) - @classmethod - def delete_mcp_tool(cls, tenant_id: str, provider_id: str): - mcp_tool = cls.get_mcp_provider_by_provider_id(provider_id, tenant_id) + self._session.add(mcp_tool) + self._session.flush() + mcp_providers = ToolTransformService.mcp_provider_to_user_provider(mcp_tool, for_list=True) + return mcp_providers - db.session.delete(mcp_tool) - db.session.commit() - - @classmethod - def update_mcp_provider( - cls, + def update_provider( + self, + *, tenant_id: str, provider_id: str, name: str, @@ -222,129 +178,557 @@ class MCPToolManageService: icon_type: str, icon_background: str, server_identifier: str, - timeout: float | None = None, - sse_read_timeout: float | None = None, headers: dict[str, str] | None = None, - ): - mcp_provider = cls.get_mcp_provider_by_provider_id(provider_id, tenant_id) + configuration: MCPConfiguration, + authentication: MCPAuthentication | None = None, + validation_result: ServerUrlValidationResult | None = None, + ) -> None: + """ + Update an MCP provider. - reconnect_result = None + Args: + validation_result: Pre-validation result from validate_server_url_change. + If provided and contains reconnect_result, it will be used + instead of performing network operations. + """ + mcp_provider = self.get_provider(provider_id=provider_id, tenant_id=tenant_id) + + # Check for duplicate name (excluding current provider) + if name != mcp_provider.name: + stmt = select(MCPToolProvider).where( + MCPToolProvider.tenant_id == tenant_id, + MCPToolProvider.name == name, + MCPToolProvider.id != provider_id, + ) + existing_provider = self._session.scalar(stmt) + if existing_provider: + raise ValueError(f"MCP tool {name} already exists") + + # Get URL update data from validation result encrypted_server_url = None server_url_hash = None + reconnect_result = None - if UNCHANGED_SERVER_URL_PLACEHOLDER not in server_url: - encrypted_server_url = encrypter.encrypt_token(tenant_id, server_url) - server_url_hash = hashlib.sha256(server_url.encode()).hexdigest() - - if server_url_hash != mcp_provider.server_url_hash: - reconnect_result = cls._re_connect_mcp_provider(server_url, provider_id, tenant_id) + if validation_result and validation_result.encrypted_server_url: + # Use all data from validation result + encrypted_server_url = validation_result.encrypted_server_url + server_url_hash = validation_result.server_url_hash + reconnect_result = validation_result.reconnect_result try: + # Update basic fields mcp_provider.updated_at = datetime.now() mcp_provider.name = name - mcp_provider.icon = ( - json.dumps({"content": icon, "background": icon_background}) if icon_type == "emoji" else icon - ) + mcp_provider.icon = self._prepare_icon(icon, icon_type, icon_background) mcp_provider.server_identifier = server_identifier - if encrypted_server_url is not None and server_url_hash is not None: + # Update server URL if changed + if encrypted_server_url and server_url_hash: mcp_provider.server_url = encrypted_server_url mcp_provider.server_url_hash = server_url_hash if reconnect_result: - mcp_provider.authed = reconnect_result["authed"] - mcp_provider.tools = reconnect_result["tools"] - mcp_provider.encrypted_credentials = reconnect_result["encrypted_credentials"] + mcp_provider.authed = reconnect_result.authed + mcp_provider.tools = reconnect_result.tools + mcp_provider.encrypted_credentials = reconnect_result.encrypted_credentials - if timeout is not None: - mcp_provider.timeout = timeout - if sse_read_timeout is not None: - mcp_provider.sse_read_timeout = sse_read_timeout + # Update optional configuration fields + self._update_optional_fields(mcp_provider, configuration) + + # Update headers if provided if headers is not None: - # Merge masked headers from frontend with existing real values - if headers: - # existing decrypted and masked headers - existing_decrypted = mcp_provider.decrypted_headers - existing_masked = mcp_provider.masked_headers + mcp_provider.encrypted_headers = self._process_headers(headers, mcp_provider, tenant_id) - # Build final headers: if value equals masked existing, keep original decrypted value - final_headers: dict[str, str] = {} - for key, incoming_value in headers.items(): - if ( - key in existing_masked - and key in existing_decrypted - and isinstance(incoming_value, str) - and incoming_value == existing_masked.get(key) - ): - # unchanged, use original decrypted value - final_headers[key] = str(existing_decrypted[key]) - else: - final_headers[key] = incoming_value + # Update credentials if provided + if authentication and authentication.client_id: + mcp_provider.encrypted_credentials = self._process_credentials(authentication, mcp_provider, tenant_id) - encrypted_headers_dict = MCPToolManageService._encrypt_headers(final_headers, tenant_id) - mcp_provider.encrypted_headers = json.dumps(encrypted_headers_dict) - else: - # Explicitly clear headers if empty dict passed - mcp_provider.encrypted_headers = None - db.session.commit() + # Flush changes to database + self._session.flush() except IntegrityError as e: - db.session.rollback() - error_msg = str(e.orig) - if "unique_mcp_provider_name" in error_msg: - raise ValueError(f"MCP tool {name} already exists") - if "unique_mcp_provider_server_url" in error_msg: - raise ValueError(f"MCP tool {server_url} already exists") - if "unique_mcp_provider_server_identifier" in error_msg: - raise ValueError(f"MCP tool {server_identifier} already exists") - raise - except Exception: - db.session.rollback() - raise + self._handle_integrity_error(e, name, server_url, server_identifier) - @classmethod - def update_mcp_provider_credentials( - cls, mcp_provider: MCPToolProvider, credentials: dict[str, Any], authed: bool = False - ): - provider_controller = MCPToolProviderController.from_db(mcp_provider) + def delete_provider(self, *, tenant_id: str, provider_id: str) -> None: + """Delete an MCP provider.""" + mcp_tool = self.get_provider(provider_id=provider_id, tenant_id=tenant_id) + self._session.delete(mcp_tool) + + def list_providers( + self, *, tenant_id: str, for_list: bool = False, include_sensitive: bool = True + ) -> list[ToolProviderApiEntity]: + """List all MCP providers for a tenant. + + Args: + tenant_id: Tenant ID + for_list: If True, return provider ID; if False, return server identifier + include_sensitive: If False, skip expensive decryption operations (default: True for backward compatibility) + """ + from models.account import Account + + stmt = select(MCPToolProvider).where(MCPToolProvider.tenant_id == tenant_id).order_by(MCPToolProvider.name) + mcp_providers = self._session.scalars(stmt).all() + + if not mcp_providers: + return [] + + # Batch query all users to avoid N+1 problem + user_ids = {provider.user_id for provider in mcp_providers} + users = self._session.query(Account).where(Account.id.in_(user_ids)).all() + user_name_map = {user.id: user.name for user in users} + + return [ + ToolTransformService.mcp_provider_to_user_provider( + provider, + for_list=for_list, + user_name=user_name_map.get(provider.user_id), + include_sensitive=include_sensitive, + ) + for provider in mcp_providers + ] + + # ========== Tool Operations ========== + + def list_provider_tools(self, *, tenant_id: str, provider_id: str) -> ToolProviderApiEntity: + """List tools from remote MCP server.""" + # Load provider and convert to entity + db_provider = self.get_provider(provider_id=provider_id, tenant_id=tenant_id) + provider_entity = db_provider.to_entity() + + # Verify authentication + if not provider_entity.authed: + raise ValueError("Please auth the tool first") + + # Prepare headers with auth token + headers = self._prepare_auth_headers(provider_entity) + + # Retrieve tools from remote server + server_url = provider_entity.decrypt_server_url() + try: + tools = self._retrieve_remote_mcp_tools(server_url, headers, provider_entity) + except MCPError as e: + raise ValueError(f"Failed to connect to MCP server: {e}") + + # Update database with retrieved tools + db_provider.tools = json.dumps([tool.model_dump() for tool in tools]) + db_provider.authed = True + db_provider.updated_at = datetime.now() + self._session.flush() + + # Build API response + return self._build_tool_provider_response(db_provider, provider_entity, tools) + + # ========== OAuth and Credentials Operations ========== + + def update_provider_credentials( + self, *, provider_id: str, tenant_id: str, credentials: dict[str, Any], authed: bool | None = None + ) -> None: + """ + Update provider credentials with encryption. + + Args: + provider_id: Provider ID + tenant_id: Tenant ID + credentials: Credentials to save + authed: Whether provider is authenticated (None means keep current state) + """ + from core.tools.mcp_tool.provider import MCPToolProviderController + + # Get provider from current session + provider = self.get_provider(provider_id=provider_id, tenant_id=tenant_id) + + # Encrypt new credentials + provider_controller = MCPToolProviderController.from_db(provider) tool_configuration = ProviderConfigEncrypter( - tenant_id=mcp_provider.tenant_id, + tenant_id=provider.tenant_id, config=list(provider_controller.get_credentials_schema()), provider_config_cache=NoOpProviderCredentialCache(), ) - credentials = tool_configuration.encrypt(credentials) - mcp_provider.updated_at = datetime.now() - mcp_provider.encrypted_credentials = json.dumps({**mcp_provider.credentials, **credentials}) - mcp_provider.authed = authed - if not authed: - mcp_provider.tools = "[]" - db.session.commit() + encrypted_credentials = tool_configuration.encrypt(credentials) - @classmethod - def _re_connect_mcp_provider(cls, server_url: str, provider_id: str, tenant_id: str): - # Get the existing provider to access headers and timeout settings - mcp_provider = cls.get_mcp_provider_by_provider_id(provider_id, tenant_id) - headers = mcp_provider.decrypted_headers - timeout = mcp_provider.timeout - sse_read_timeout = mcp_provider.sse_read_timeout + # Update provider + provider.updated_at = datetime.now() + provider.encrypted_credentials = json.dumps({**provider.credentials, **encrypted_credentials}) + + if authed is not None: + provider.authed = authed + if not authed: + provider.tools = EMPTY_TOOLS_JSON + + # Flush changes to database + self._session.flush() + + def save_oauth_data( + self, provider_id: str, tenant_id: str, data: dict[str, Any], data_type: OAuthDataType = OAuthDataType.MIXED + ) -> None: + """ + Save OAuth-related data (tokens, client info, code verifier). + + Args: + provider_id: Provider ID + tenant_id: Tenant ID + data: Data to save (tokens, client info, or code verifier) + data_type: Type of OAuth data to save + """ + # Determine if this makes the provider authenticated + authed = ( + data_type == OAuthDataType.TOKENS or (data_type == OAuthDataType.MIXED and "access_token" in data) or None + ) + + # update_provider_credentials will validate provider existence + self.update_provider_credentials(provider_id=provider_id, tenant_id=tenant_id, credentials=data, authed=authed) + + def clear_provider_credentials(self, *, provider_id: str, tenant_id: str) -> None: + """ + Clear all credentials for a provider. + + Args: + provider_id: Provider ID + tenant_id: Tenant ID + """ + # Get provider from current session + provider = self.get_provider(provider_id=provider_id, tenant_id=tenant_id) + + provider.tools = EMPTY_TOOLS_JSON + provider.encrypted_credentials = EMPTY_CREDENTIALS_JSON + provider.updated_at = datetime.now() + provider.authed = False + + # ========== Private Helper Methods ========== + + def _check_provider_exists(self, tenant_id: str, name: str, server_url_hash: str, server_identifier: str) -> None: + """Check if provider with same attributes already exists.""" + stmt = select(MCPToolProvider).where( + MCPToolProvider.tenant_id == tenant_id, + or_( + MCPToolProvider.name == name, + MCPToolProvider.server_url_hash == server_url_hash, + MCPToolProvider.server_identifier == server_identifier, + ), + ) + existing_provider = self._session.scalar(stmt) + + if existing_provider: + if existing_provider.name == name: + raise ValueError(f"MCP tool {name} already exists") + if existing_provider.server_url_hash == server_url_hash: + raise ValueError("MCP tool with this server URL already exists") + if existing_provider.server_identifier == server_identifier: + raise ValueError(f"MCP tool {server_identifier} already exists") + + def _prepare_icon(self, icon: str, icon_type: str, icon_background: str) -> str: + """Prepare icon data for storage.""" + if icon_type == "emoji": + return json.dumps({"content": icon, "background": icon_background}) + return icon + + def _encrypt_dict_fields(self, data: dict[str, Any], secret_fields: list[str], tenant_id: str) -> Mapping[str, str]: + """Encrypt specified fields in a dictionary. + + Args: + data: Dictionary containing data to encrypt + secret_fields: List of field names to encrypt + tenant_id: Tenant ID for encryption + + Returns: + JSON string of encrypted data + """ + from core.entities.provider_entities import BasicProviderConfig + from core.tools.utils.encryption import create_provider_encrypter + + # Create config for secret fields + config = [ + BasicProviderConfig(type=BasicProviderConfig.Type.SECRET_INPUT, name=field) for field in secret_fields + ] + + encrypter_instance, _ = create_provider_encrypter( + tenant_id=tenant_id, + config=config, + cache=NoOpProviderCredentialCache(), + ) + + encrypted_data = encrypter_instance.encrypt(data) + return encrypted_data + + def _prepare_encrypted_dict(self, headers: dict[str, str], tenant_id: str) -> str: + """Encrypt headers and prepare for storage.""" + # All headers are treated as secret + return json.dumps(self._encrypt_dict_fields(headers, list(headers.keys()), tenant_id)) + + def _prepare_auth_headers(self, provider_entity: MCPProviderEntity) -> dict[str, str]: + """Prepare headers with OAuth token if available.""" + headers = provider_entity.decrypt_headers() + tokens = provider_entity.retrieve_tokens() + if tokens: + headers["Authorization"] = f"{tokens.token_type.capitalize()} {tokens.access_token}" + return headers + + def _retrieve_remote_mcp_tools( + self, + server_url: str, + headers: dict[str, str], + provider_entity: MCPProviderEntity, + ): + """Retrieve tools from remote MCP server.""" + with MCPClientWithAuthRetry( + server_url=server_url, + headers=headers, + timeout=provider_entity.timeout, + sse_read_timeout=provider_entity.sse_read_timeout, + provider_entity=provider_entity, + ) as mcp_client: + return mcp_client.list_tools() + + def execute_auth_actions(self, auth_result: Any) -> dict[str, str]: + """ + Execute the actions returned by the auth function. + + This method processes the AuthResult and performs the necessary database operations. + + Args: + auth_result: The result from the auth function + + Returns: + The response from the auth result + """ + from core.mcp.entities import AuthAction, AuthActionType + + action: AuthAction + for action in auth_result.actions: + if action.provider_id is None or action.tenant_id is None: + continue + + if action.action_type == AuthActionType.SAVE_CLIENT_INFO: + self.save_oauth_data(action.provider_id, action.tenant_id, action.data, OAuthDataType.CLIENT_INFO) + elif action.action_type == AuthActionType.SAVE_TOKENS: + self.save_oauth_data(action.provider_id, action.tenant_id, action.data, OAuthDataType.TOKENS) + elif action.action_type == AuthActionType.SAVE_CODE_VERIFIER: + self.save_oauth_data(action.provider_id, action.tenant_id, action.data, OAuthDataType.CODE_VERIFIER) + + return auth_result.response + + def auth_with_actions( + self, + provider_entity: MCPProviderEntity, + authorization_code: str | None = None, + resource_metadata_url: str | None = None, + scope_hint: str | None = None, + ) -> dict[str, str]: + """ + Perform authentication and execute all resulting actions. + + This method is used by MCPClientWithAuthRetry for automatic re-authentication. + + Args: + provider_entity: The MCP provider entity + authorization_code: Optional authorization code + resource_metadata_url: Optional Protected Resource Metadata URL from WWW-Authenticate + scope_hint: Optional scope hint from WWW-Authenticate header + + Returns: + Response dictionary from auth result + """ + auth_result = auth( + provider_entity, + authorization_code, + resource_metadata_url=resource_metadata_url, + scope_hint=scope_hint, + ) + return self.execute_auth_actions(auth_result) + + def _reconnect_provider(self, *, server_url: str, provider: MCPToolProvider) -> ReconnectResult: + """Attempt to reconnect to MCP provider with new server URL.""" + provider_entity = provider.to_entity() + headers = provider_entity.headers try: - with MCPClient( - server_url, - provider_id, - tenant_id, - authed=False, - for_list=True, - headers=headers, - timeout=timeout, - sse_read_timeout=sse_read_timeout, - ) as mcp_client: - tools = mcp_client.list_tools() - return { - "authed": True, - "tools": json.dumps([tool.model_dump() for tool in tools]), - "encrypted_credentials": "{}", - } + tools = self._retrieve_remote_mcp_tools(server_url, headers, provider_entity) + return ReconnectResult( + authed=True, + tools=json.dumps([tool.model_dump() for tool in tools]), + encrypted_credentials=EMPTY_CREDENTIALS_JSON, + ) except MCPAuthError: - return {"authed": False, "tools": "[]", "encrypted_credentials": "{}"} + return ReconnectResult(authed=False, tools=EMPTY_TOOLS_JSON, encrypted_credentials=EMPTY_CREDENTIALS_JSON) except MCPError as e: raise ValueError(f"Failed to re-connect MCP server: {e}") from e + + def validate_server_url_change( + self, *, tenant_id: str, provider_id: str, new_server_url: str + ) -> ServerUrlValidationResult: + """ + Validate server URL change by attempting to connect to the new server. + This method should be called BEFORE update_provider to perform network operations + outside of the database transaction. + + Returns: + ServerUrlValidationResult: Validation result with connection status and tools if successful + """ + # Handle hidden/unchanged URL + if UNCHANGED_SERVER_URL_PLACEHOLDER in new_server_url: + return ServerUrlValidationResult(needs_validation=False) + + # Validate URL format + if not self._is_valid_url(new_server_url): + raise ValueError("Server URL is not valid.") + + # Always encrypt and hash the URL + encrypted_server_url = encrypter.encrypt_token(tenant_id, new_server_url) + new_server_url_hash = hashlib.sha256(new_server_url.encode()).hexdigest() + + # Get current provider + provider = self.get_provider(provider_id=provider_id, tenant_id=tenant_id) + + # Check if URL is actually different + if new_server_url_hash == provider.server_url_hash: + # URL hasn't changed, but still return the encrypted data + return ServerUrlValidationResult( + needs_validation=False, encrypted_server_url=encrypted_server_url, server_url_hash=new_server_url_hash + ) + + # Perform validation by attempting to connect + reconnect_result = self._reconnect_provider(server_url=new_server_url, provider=provider) + return ServerUrlValidationResult( + needs_validation=True, + validation_passed=True, + reconnect_result=reconnect_result, + encrypted_server_url=encrypted_server_url, + server_url_hash=new_server_url_hash, + ) + + def _build_tool_provider_response( + self, db_provider: MCPToolProvider, provider_entity: MCPProviderEntity, tools: list + ) -> ToolProviderApiEntity: + """Build API response for tool provider.""" + user = db_provider.load_user() + response = provider_entity.to_api_response( + user_name=user.name if user else None, + ) + response["tools"] = ToolTransformService.mcp_tool_to_user_tool(db_provider, tools) + response["plugin_unique_identifier"] = provider_entity.provider_id + return ToolProviderApiEntity(**response) + + def _handle_integrity_error( + self, error: IntegrityError, name: str, server_url: str, server_identifier: str + ) -> None: + """Handle database integrity errors with user-friendly messages.""" + error_msg = str(error.orig) + if "unique_mcp_provider_name" in error_msg: + raise ValueError(f"MCP tool {name} already exists") + if "unique_mcp_provider_server_url" in error_msg: + raise ValueError(f"MCP tool {server_url} already exists") + if "unique_mcp_provider_server_identifier" in error_msg: + raise ValueError(f"MCP tool {server_identifier} already exists") + raise + + def _is_valid_url(self, url: str) -> bool: + """Validate URL format.""" + if not url: + return False + try: + parsed = urlparse(url) + return all([parsed.scheme, parsed.netloc]) and parsed.scheme in ["http", "https"] + except (ValueError, TypeError): + return False + + def _update_optional_fields(self, mcp_provider: MCPToolProvider, configuration: MCPConfiguration) -> None: + """Update optional configuration fields using setattr for cleaner code.""" + field_mapping = {"timeout": configuration.timeout, "sse_read_timeout": configuration.sse_read_timeout} + + for field, value in field_mapping.items(): + if value is not None: + setattr(mcp_provider, field, value) + + def _process_headers(self, headers: dict[str, str], mcp_provider: MCPToolProvider, tenant_id: str) -> str | None: + """Process headers update, handling empty dict to clear headers.""" + if not headers: + return None + + # Merge with existing headers to preserve masked values + final_headers = self._merge_headers_with_masked(incoming_headers=headers, mcp_provider=mcp_provider) + return self._prepare_encrypted_dict(final_headers, tenant_id) + + def _process_credentials( + self, authentication: MCPAuthentication, mcp_provider: MCPToolProvider, tenant_id: str + ) -> str: + """Process credentials update, handling masked values.""" + # Merge with existing credentials + final_client_id, final_client_secret = self._merge_credentials_with_masked( + authentication.client_id, authentication.client_secret, mcp_provider + ) + + # Build and encrypt + return self._build_and_encrypt_credentials(final_client_id, final_client_secret, tenant_id) + + def _merge_headers_with_masked( + self, incoming_headers: dict[str, str], mcp_provider: MCPToolProvider + ) -> dict[str, str]: + """Merge incoming headers with existing ones, preserving unchanged masked values. + + Args: + incoming_headers: Headers from frontend (may contain masked values) + mcp_provider: The MCP provider instance + + Returns: + Final headers dict with proper values (original for unchanged masked, new for changed) + """ + mcp_provider_entity = mcp_provider.to_entity() + existing_decrypted = mcp_provider_entity.decrypt_headers() + existing_masked = mcp_provider_entity.masked_headers() + + return { + key: (str(existing_decrypted[key]) if key in existing_masked and value == existing_masked[key] else value) + for key, value in incoming_headers.items() + if key in existing_decrypted or value != existing_masked.get(key) + } + + def _merge_credentials_with_masked( + self, + client_id: str, + client_secret: str | None, + mcp_provider: MCPToolProvider, + ) -> tuple[ + str, + str | None, + ]: + """Merge incoming credentials with existing ones, preserving unchanged masked values. + + Args: + client_id: Client ID from frontend (may be masked) + client_secret: Client secret from frontend (may be masked) + mcp_provider: The MCP provider instance + + Returns: + Tuple of (final_client_id, final_client_secret) + """ + mcp_provider_entity = mcp_provider.to_entity() + existing_decrypted = mcp_provider_entity.decrypt_credentials() + existing_masked = mcp_provider_entity.masked_credentials() + + # Check if client_id is masked and unchanged + final_client_id = client_id + if existing_masked.get("client_id") and client_id == existing_masked["client_id"]: + # Use existing decrypted value + final_client_id = existing_decrypted.get("client_id", client_id) + + # Check if client_secret is masked and unchanged + final_client_secret = client_secret + if existing_masked.get("client_secret") and client_secret == existing_masked["client_secret"]: + # Use existing decrypted value + final_client_secret = existing_decrypted.get("client_secret", client_secret) + + return final_client_id, final_client_secret + + def _build_and_encrypt_credentials(self, client_id: str, client_secret: str | None, tenant_id: str) -> str: + """Build credentials and encrypt sensitive fields.""" + # Create a flat structure with all credential data + credentials_data = { + "client_id": client_id, + "client_name": CLIENT_NAME, + "is_dynamic_registration": False, + } + secret_fields = [] + if client_secret is not None: + credentials_data["encrypted_client_secret"] = client_secret + secret_fields = ["encrypted_client_secret"] + client_info = self._encrypt_dict_fields(credentials_data, secret_fields, tenant_id) + return json.dumps({"client_information": client_info}) diff --git a/api/services/tools/tools_transform_service.py b/api/services/tools/tools_transform_service.py index ed04f41ba3..3e976234ba 100644 --- a/api/services/tools/tools_transform_service.py +++ b/api/services/tools/tools_transform_service.py @@ -3,6 +3,7 @@ import logging from collections.abc import Mapping from typing import Any, Union +from pydantic import ValidationError from yarl import URL from configs import dify_config @@ -223,40 +224,59 @@ class ToolTransformService: ) @staticmethod - def mcp_provider_to_user_provider(db_provider: MCPToolProvider, for_list: bool = False) -> ToolProviderApiEntity: - user = db_provider.load_user() - return ToolProviderApiEntity( - id=db_provider.server_identifier if not for_list else db_provider.id, - author=user.name if user else "Anonymous", - name=db_provider.name, - icon=db_provider.provider_icon, - type=ToolProviderType.MCP, - is_team_authorization=db_provider.authed, - server_url=db_provider.masked_server_url, - tools=ToolTransformService.mcp_tool_to_user_tool( - db_provider, [MCPTool.model_validate(tool) for tool in json.loads(db_provider.tools)] - ), - updated_at=int(db_provider.updated_at.timestamp()), - label=I18nObject(en_US=db_provider.name, zh_Hans=db_provider.name), - description=I18nObject(en_US="", zh_Hans=""), - server_identifier=db_provider.server_identifier, - timeout=db_provider.timeout, - sse_read_timeout=db_provider.sse_read_timeout, - masked_headers=db_provider.masked_headers, - original_headers=db_provider.decrypted_headers, - ) + def mcp_provider_to_user_provider( + db_provider: MCPToolProvider, + for_list: bool = False, + user_name: str | None = None, + include_sensitive: bool = True, + ) -> ToolProviderApiEntity: + from core.entities.mcp_provider import MCPConfiguration + + # Use provided user_name to avoid N+1 query, fallback to load_user() if not provided + if user_name is None: + user = db_provider.load_user() + user_name = user.name if user else None + + # Convert to entity and use its API response method + provider_entity = db_provider.to_entity() + + response = provider_entity.to_api_response(user_name=user_name, include_sensitive=include_sensitive) + try: + mcp_tools = [MCPTool(**tool) for tool in json.loads(db_provider.tools)] + except (ValidationError, json.JSONDecodeError): + mcp_tools = [] + # Add additional fields specific to the transform + response["id"] = db_provider.server_identifier if not for_list else db_provider.id + response["tools"] = ToolTransformService.mcp_tool_to_user_tool(db_provider, mcp_tools, user_name=user_name) + response["server_identifier"] = db_provider.server_identifier + + # Convert configuration dict to MCPConfiguration object + if "configuration" in response and isinstance(response["configuration"], dict): + response["configuration"] = MCPConfiguration( + timeout=float(response["configuration"]["timeout"]), + sse_read_timeout=float(response["configuration"]["sse_read_timeout"]), + ) + + return ToolProviderApiEntity(**response) @staticmethod - def mcp_tool_to_user_tool(mcp_provider: MCPToolProvider, tools: list[MCPTool]) -> list[ToolApiEntity]: - user = mcp_provider.load_user() + def mcp_tool_to_user_tool( + mcp_provider: MCPToolProvider, tools: list[MCPTool], user_name: str | None = None + ) -> list[ToolApiEntity]: + # Use provided user_name to avoid N+1 query, fallback to load_user() if not provided + if user_name is None: + user = mcp_provider.load_user() + user_name = user.name if user else "Anonymous" + return [ ToolApiEntity( - author=user.name if user else "Anonymous", + author=user_name or "Anonymous", name=tool.name, label=I18nObject(en_US=tool.name, zh_Hans=tool.name), description=I18nObject(en_US=tool.description or "", zh_Hans=tool.description or ""), parameters=ToolTransformService.convert_mcp_schema_to_parameter(tool.inputSchema), labels=[], + output_schema=tool.outputSchema or {}, ) for tool in tools ] @@ -403,7 +423,7 @@ class ToolTransformService: ) @staticmethod - def convert_mcp_schema_to_parameter(schema: dict) -> list["ToolParameter"]: + def convert_mcp_schema_to_parameter(schema: dict[str, Any]) -> list["ToolParameter"]: """ Convert MCP JSON schema to tool parameters @@ -412,7 +432,7 @@ class ToolTransformService: """ def create_parameter( - name: str, description: str, param_type: str, required: bool, input_schema: dict | None = None + name: str, description: str, param_type: str, required: bool, input_schema: dict[str, Any] | None = None ) -> ToolParameter: """Create a ToolParameter instance with given attributes""" input_schema_dict: dict[str, Any] = {"input_schema": input_schema} if input_schema else {} @@ -427,7 +447,9 @@ class ToolTransformService: **input_schema_dict, ) - def process_properties(props: dict, required: list, prefix: str = "") -> list[ToolParameter]: + def process_properties( + props: dict[str, dict[str, Any]], required: list[str], prefix: str = "" + ) -> list[ToolParameter]: """Process properties recursively""" TYPE_MAPPING = {"integer": "number", "float": "number"} COMPLEX_TYPES = ["array", "object"] diff --git a/api/services/tools/workflow_tools_manage_service.py b/api/services/tools/workflow_tools_manage_service.py index b1cc963681..5413725798 100644 --- a/api/services/tools/workflow_tools_manage_service.py +++ b/api/services/tools/workflow_tools_manage_service.py @@ -14,7 +14,6 @@ from core.tools.utils.workflow_configuration_sync import WorkflowToolConfigurati from core.tools.workflow_as_tool.provider import WorkflowToolProviderController from core.tools.workflow_as_tool.tool import WorkflowTool from extensions.ext_database import db -from libs.uuid_utils import uuidv7 from models.model import App from models.tools import WorkflowToolProvider from models.workflow import Workflow @@ -67,7 +66,6 @@ class WorkflowToolManageService: with Session(db.engine, expire_on_commit=False) as session, session.begin(): workflow_tool_provider = WorkflowToolProvider( - id=str(uuidv7()), tenant_id=tenant_id, user_id=user_id, app_id=workflow_app_id, diff --git a/api/services/trigger/app_trigger_service.py b/api/services/trigger/app_trigger_service.py new file mode 100644 index 0000000000..6d5a719f63 --- /dev/null +++ b/api/services/trigger/app_trigger_service.py @@ -0,0 +1,46 @@ +""" +AppTrigger management service. + +Handles AppTrigger model CRUD operations and status management. +This service centralizes all AppTrigger-related business logic. +""" + +import logging + +from sqlalchemy import update +from sqlalchemy.orm import Session + +from extensions.ext_database import db +from models.enums import AppTriggerStatus +from models.trigger import AppTrigger + +logger = logging.getLogger(__name__) + + +class AppTriggerService: + """Service for managing AppTrigger lifecycle and status.""" + + @staticmethod + def mark_tenant_triggers_rate_limited(tenant_id: str) -> None: + """ + Mark all enabled triggers for a tenant as rate limited due to quota exceeded. + + This method is called when a tenant's quota is exhausted. It updates all + enabled triggers to RATE_LIMITED status to prevent further executions until + quota is restored. + + Args: + tenant_id: Tenant ID whose triggers should be marked as rate limited + + """ + try: + with Session(db.engine) as session: + session.execute( + update(AppTrigger) + .where(AppTrigger.tenant_id == tenant_id, AppTrigger.status == AppTriggerStatus.ENABLED) + .values(status=AppTriggerStatus.RATE_LIMITED) + ) + session.commit() + logger.info("Marked all enabled triggers as rate limited for tenant %s", tenant_id) + except Exception: + logger.exception("Failed to mark all enabled triggers as rate limited for tenant %s", tenant_id) diff --git a/api/services/trigger/schedule_service.py b/api/services/trigger/schedule_service.py index f5e5ec6e01..b49d14f860 100644 --- a/api/services/trigger/schedule_service.py +++ b/api/services/trigger/schedule_service.py @@ -1,7 +1,8 @@ import json import logging +from collections.abc import Mapping from datetime import datetime -from typing import Optional +from typing import Any from sqlalchemy import select from sqlalchemy.orm import Session @@ -13,6 +14,7 @@ from libs.schedule_utils import calculate_next_run_at, convert_12h_to_24h from models.account import Account, TenantAccountJoin from models.trigger import WorkflowSchedulePlan from models.workflow import Workflow +from services.errors.account import AccountNotFoundError logger = logging.getLogger(__name__) @@ -123,7 +125,7 @@ class ScheduleService: session.flush() @staticmethod - def get_tenant_owner(session: Session, tenant_id: str) -> Optional[Account]: + def get_tenant_owner(session: Session, tenant_id: str) -> Account: """ Returns an account to execute scheduled workflows on behalf of the tenant. Prioritizes owner over admin to ensure proper authorization hierarchy. @@ -143,7 +145,12 @@ class ScheduleService: ).scalar_one_or_none() if result: - return session.get(Account, result.account_id) + account = session.get(Account, result.account_id) + if not account: + raise AccountNotFoundError(f"Account not found: {result.account_id}") + return account + else: + raise AccountNotFoundError(f"Account not found for tenant: {tenant_id}") @staticmethod def update_next_run_at( @@ -169,7 +176,35 @@ class ScheduleService: return next_run_at @staticmethod - def extract_schedule_config(workflow: Workflow) -> Optional[ScheduleConfig]: + def to_schedule_config(node_config: Mapping[str, Any]) -> ScheduleConfig: + """ + Converts user-friendly visual schedule settings to cron expression. + Maintains consistency with frontend UI expectations while supporting croniter's extended syntax. + """ + node_data = node_config.get("data", {}) + mode = node_data.get("mode", "visual") + timezone = node_data.get("timezone", "UTC") + node_id = node_config.get("id", "start") + + cron_expression = None + if mode == "cron": + cron_expression = node_data.get("cron_expression") + if not cron_expression: + raise ScheduleConfigError("Cron expression is required for cron mode") + elif mode == "visual": + frequency = str(node_data.get("frequency")) + if not frequency: + raise ScheduleConfigError("Frequency is required for visual mode") + visual_config = VisualConfig(**node_data.get("visual_config", {})) + cron_expression = ScheduleService.visual_to_cron(frequency=frequency, visual_config=visual_config) + if not cron_expression: + raise ScheduleConfigError("Cron expression is required for visual mode") + else: + raise ScheduleConfigError(f"Invalid schedule mode: {mode}") + return ScheduleConfig(node_id=node_id, cron_expression=cron_expression, timezone=timezone) + + @staticmethod + def extract_schedule_config(workflow: Workflow) -> ScheduleConfig | None: """ Extracts schedule configuration from workflow graph. @@ -223,6 +258,8 @@ class ScheduleService: return ScheduleConfig(node_id=node_id, cron_expression=cron_expression, timezone=timezone) + return None + @staticmethod def visual_to_cron(frequency: str, visual_config: VisualConfig) -> str: """ diff --git a/api/services/trigger/trigger_provider_service.py b/api/services/trigger/trigger_provider_service.py index 0e543bb039..6079d47bbf 100644 --- a/api/services/trigger/trigger_provider_service.py +++ b/api/services/trigger/trigger_provider_service.py @@ -3,7 +3,7 @@ import logging import time as _time import uuid from collections.abc import Mapping -from typing import Any, Optional +from typing import Any from sqlalchemy import desc, func from sqlalchemy.orm import Session @@ -99,7 +99,11 @@ class TriggerProviderService: controller=provider_controller, subscription=subscription, ) - subscription.credentials = dict(encrypter.mask_credentials(dict(subscription.credentials))) + subscription.credentials = dict( + encrypter.mask_credentials(dict(encrypter.decrypt(subscription.credentials))) + ) + subscription.properties = dict(encrypter.mask_credentials(dict(encrypter.decrypt(subscription.properties)))) + subscription.parameters = dict(encrypter.mask_credentials(dict(encrypter.decrypt(subscription.parameters)))) count = workflows_in_use_map.get(subscription.id) subscription.workflows_in_use = count if count is not None else 0 @@ -117,7 +121,7 @@ class TriggerProviderService: parameters: Mapping[str, Any], properties: Mapping[str, Any], credentials: Mapping[str, str], - subscription_id: Optional[str] = None, + subscription_id: str | None = None, credential_expires_at: int = -1, expires_at: int = -1, ) -> Mapping[str, Any]: @@ -177,19 +181,21 @@ class TriggerProviderService: # Create provider record subscription = TriggerSubscription( - id=subscription_id or str(uuid.uuid4()), tenant_id=tenant_id, user_id=user_id, name=name, endpoint_id=endpoint_id, provider_id=str(provider_id), - parameters=parameters, - properties=properties_encrypter.encrypt(dict(properties)), - credentials=credential_encrypter.encrypt(dict(credentials)) if credential_encrypter else {}, + parameters=dict(parameters), + properties=dict(properties_encrypter.encrypt(dict(properties))), + credentials=dict(credential_encrypter.encrypt(dict(credentials))) + if credential_encrypter + else {}, credential_type=credential_type.value, credential_expires_at=credential_expires_at, expires_at=expires_at, ) + subscription.id = subscription_id or str(uuid.uuid4()) session.add(subscription) session.commit() @@ -435,7 +441,7 @@ class TriggerProviderService: return {"result": "success", "expires_at": int(refreshed.expires_at)} @classmethod - def get_oauth_client(cls, tenant_id: str, provider_id: TriggerProviderID) -> Optional[Mapping[str, Any]]: + def get_oauth_client(cls, tenant_id: str, provider_id: TriggerProviderID) -> Mapping[str, Any] | None: """ Get OAuth client configuration for a provider. First tries tenant-level OAuth, then falls back to system OAuth. @@ -471,7 +477,7 @@ class TriggerProviderService: is_verified = PluginService.is_plugin_verified(tenant_id, provider_id.plugin_id) if not is_verified: - return oauth_params + return None # Check for system-level OAuth client system_client: TriggerOAuthSystemClient | None = ( @@ -488,13 +494,29 @@ class TriggerProviderService: return oauth_params + @classmethod + def is_oauth_system_client_exists(cls, tenant_id: str, provider_id: TriggerProviderID) -> bool: + """ + Check if system OAuth client exists for a trigger provider. + """ + is_verified = PluginService.is_plugin_verified(tenant_id, provider_id.plugin_id) + if not is_verified: + return False + with Session(db.engine, expire_on_commit=False) as session: + system_client: TriggerOAuthSystemClient | None = ( + session.query(TriggerOAuthSystemClient) + .filter_by(plugin_id=provider_id.plugin_id, provider=provider_id.provider_name) + .first() + ) + return system_client is not None + @classmethod def save_custom_oauth_client_params( cls, tenant_id: str, provider_id: TriggerProviderID, - client_params: Optional[Mapping[str, Any]] = None, - enabled: Optional[bool] = None, + client_params: Mapping[str, Any] | None = None, + enabled: bool | None = None, ) -> Mapping[str, Any]: """ Save or update custom OAuth client parameters for a trigger provider. diff --git a/api/services/trigger/trigger_request_service.py b/api/services/trigger/trigger_request_service.py index 4753e7c24e..91a838c265 100644 --- a/api/services/trigger/trigger_request_service.py +++ b/api/services/trigger/trigger_request_service.py @@ -61,5 +61,5 @@ class TriggerHttpRequestCachingService: """ storage.save( f"{cls._TRIGGER_STORAGE_PATH}/{request_id}.payload", - TypeAdapter(Mapping[str, Any]).dump_json(payload), + TypeAdapter(Mapping[str, Any]).dump_json(payload), # type: ignore ) diff --git a/api/services/trigger/trigger_service.py b/api/services/trigger/trigger_service.py index 8a04a3c642..7f12c2e19c 100644 --- a/api/services/trigger/trigger_service.py +++ b/api/services/trigger/trigger_service.py @@ -6,7 +6,7 @@ from typing import Any from flask import Request, Response from pydantic import BaseModel -from sqlalchemy import and_, select +from sqlalchemy import select from sqlalchemy.orm import Session from core.plugin.entities.plugin_daemon import CredentialType @@ -22,7 +22,7 @@ from extensions.ext_database import db from extensions.ext_redis import redis_client from models.model import App from models.provider_ids import TriggerProviderID -from models.trigger import AppTrigger, AppTriggerStatus, TriggerSubscription, WorkflowPluginTrigger +from models.trigger import TriggerSubscription, WorkflowPluginTrigger from models.workflow import Workflow from services.trigger.trigger_provider_service import TriggerProviderService from services.trigger.trigger_request_service import TriggerHttpRequestCachingService @@ -148,68 +148,6 @@ class TriggerService: ) return dispatch_response.response - @classmethod - def get_subscriber_triggers( - cls, tenant_id: str, subscription_id: str, event_name: str - ) -> list[WorkflowPluginTrigger]: - """ - Get WorkflowPluginTriggers for a subscription and trigger. - - Args: - tenant_id: Tenant ID - subscription_id: Subscription ID - event_name: Event name - """ - with Session(db.engine, expire_on_commit=False) as session: - subscribers = session.scalars( - select(WorkflowPluginTrigger) - .join( - AppTrigger, - and_( - AppTrigger.tenant_id == WorkflowPluginTrigger.tenant_id, - AppTrigger.app_id == WorkflowPluginTrigger.app_id, - AppTrigger.node_id == WorkflowPluginTrigger.node_id, - ), - ) - .where( - WorkflowPluginTrigger.tenant_id == tenant_id, - WorkflowPluginTrigger.subscription_id == subscription_id, - WorkflowPluginTrigger.event_name == event_name, - AppTrigger.status == AppTriggerStatus.ENABLED, - ) - ).all() - return list(subscribers) - - @classmethod - def delete_plugin_trigger_by_subscription( - cls, - session: Session, - tenant_id: str, - subscription_id: str, - ) -> None: - """Delete a plugin trigger by tenant_id and subscription_id within an existing session - - Args: - session: Database session - tenant_id: The tenant ID - subscription_id: The subscription ID - - Raises: - NotFound: If plugin trigger not found - """ - # Find plugin trigger using indexed columns - plugin_trigger = session.scalar( - select(WorkflowPluginTrigger).where( - WorkflowPluginTrigger.tenant_id == tenant_id, - WorkflowPluginTrigger.subscription_id == subscription_id, - ) - ) - - if not plugin_trigger: - return - - session.delete(plugin_trigger) - @classmethod def sync_plugin_trigger_relationships(cls, app: App, workflow: Workflow): """ @@ -272,7 +210,7 @@ class TriggerService: for node_info in nodes_in_graph: node_id = node_info["node_id"] # firstly check if the node exists in cache - if not redis_client.get(f"{cls.__PLUGIN_TRIGGER_NODE_CACHE_KEY__}:{node_id}"): + if not redis_client.get(f"{cls.__PLUGIN_TRIGGER_NODE_CACHE_KEY__}:{app.id}:{node_id}"): not_found_in_cache.append(node_info) continue @@ -317,7 +255,7 @@ class TriggerService: subscription_id=node_info["subscription_id"], ) redis_client.set( - f"{cls.__PLUGIN_TRIGGER_NODE_CACHE_KEY__}:{node_info['node_id']}", + f"{cls.__PLUGIN_TRIGGER_NODE_CACHE_KEY__}:{app.id}:{node_info['node_id']}", cache.model_dump_json(), ex=60 * 60, ) @@ -347,7 +285,7 @@ class TriggerService: subscription_id=node_info["subscription_id"], ) redis_client.set( - f"{cls.__PLUGIN_TRIGGER_NODE_CACHE_KEY__}:{node_id}", + f"{cls.__PLUGIN_TRIGGER_NODE_CACHE_KEY__}:{app.id}:{node_id}", cache.model_dump_json(), ex=60 * 60, ) @@ -357,12 +295,9 @@ class TriggerService: for node_id in nodes_id_in_db: if node_id not in nodes_id_in_graph: session.delete(nodes_id_in_db[node_id]) - redis_client.delete(f"{cls.__PLUGIN_TRIGGER_NODE_CACHE_KEY__}:{node_id}") + redis_client.delete(f"{cls.__PLUGIN_TRIGGER_NODE_CACHE_KEY__}:{app.id}:{node_id}") session.commit() except Exception: - import logging - - logger = logging.getLogger(__name__) logger.exception("Failed to sync plugin trigger relationships for app %s", app.id) raise finally: diff --git a/api/services/trigger/trigger_subscription_builder_service.py b/api/services/trigger/trigger_subscription_builder_service.py index 3b61d01648..571393c782 100644 --- a/api/services/trigger/trigger_subscription_builder_service.py +++ b/api/services/trigger/trigger_subscription_builder_service.py @@ -472,8 +472,9 @@ class TriggerSubscriptionBuilderService: response=response, ) return response - except Exception as e: - error_response = Response(status=500, response=str(e)) + except Exception: + logger.exception("Error during validation endpoint dispatch for endpoint_id=%s", endpoint_id) + error_response = Response(status=500, response="An internal error has occurred.") cls.append_log(endpoint_id=endpoint_id, request=request, response=error_response) return error_response diff --git a/api/services/trigger/trigger_subscription_operator_service.py b/api/services/trigger/trigger_subscription_operator_service.py new file mode 100644 index 0000000000..5d7785549e --- /dev/null +++ b/api/services/trigger/trigger_subscription_operator_service.py @@ -0,0 +1,70 @@ +from sqlalchemy import and_, select +from sqlalchemy.orm import Session + +from extensions.ext_database import db +from models.enums import AppTriggerStatus +from models.trigger import AppTrigger, WorkflowPluginTrigger + + +class TriggerSubscriptionOperatorService: + @classmethod + def get_subscriber_triggers( + cls, tenant_id: str, subscription_id: str, event_name: str + ) -> list[WorkflowPluginTrigger]: + """ + Get WorkflowPluginTriggers for a subscription and trigger. + + Args: + tenant_id: Tenant ID + subscription_id: Subscription ID + event_name: Event name + """ + with Session(db.engine, expire_on_commit=False) as session: + subscribers = session.scalars( + select(WorkflowPluginTrigger) + .join( + AppTrigger, + and_( + AppTrigger.tenant_id == WorkflowPluginTrigger.tenant_id, + AppTrigger.app_id == WorkflowPluginTrigger.app_id, + AppTrigger.node_id == WorkflowPluginTrigger.node_id, + ), + ) + .where( + WorkflowPluginTrigger.tenant_id == tenant_id, + WorkflowPluginTrigger.subscription_id == subscription_id, + WorkflowPluginTrigger.event_name == event_name, + AppTrigger.status == AppTriggerStatus.ENABLED, + ) + ).all() + return list(subscribers) + + @classmethod + def delete_plugin_trigger_by_subscription( + cls, + session: Session, + tenant_id: str, + subscription_id: str, + ) -> None: + """Delete a plugin trigger by tenant_id and subscription_id within an existing session + + Args: + session: Database session + tenant_id: The tenant ID + subscription_id: The subscription ID + + Raises: + NotFound: If plugin trigger not found + """ + # Find plugin trigger using indexed columns + plugin_trigger = session.scalar( + select(WorkflowPluginTrigger).where( + WorkflowPluginTrigger.tenant_id == tenant_id, + WorkflowPluginTrigger.subscription_id == subscription_id, + ) + ) + + if not plugin_trigger: + return + + session.delete(plugin_trigger) diff --git a/api/services/trigger/webhook_service.py b/api/services/trigger/webhook_service.py index eaf0e051bb..6e0ee7a191 100644 --- a/api/services/trigger/webhook_service.py +++ b/api/services/trigger/webhook_service.py @@ -18,6 +18,7 @@ from core.file.models import FileTransferMethod from core.tools.tool_file_manager import ToolFileManager from core.variables.types import SegmentType from core.workflow.enums import NodeType +from enums.quota_type import QuotaType from extensions.ext_database import db from extensions.ext_redis import redis_client from factories import file_factory @@ -27,6 +28,8 @@ from models.trigger import AppTrigger, WorkflowWebhookTrigger from models.workflow import Workflow from services.async_workflow_service import AsyncWorkflowService from services.end_user_service import EndUserService +from services.errors.app import QuotaExceededError +from services.trigger.app_trigger_service import AppTriggerService from services.workflow.entities import WebhookTriggerData logger = logging.getLogger(__name__) @@ -67,7 +70,7 @@ class WebhookService: with Session(db.engine) as session: # Get webhook trigger webhook_trigger = ( - session.query(WorkflowWebhookTrigger).filter(WorkflowWebhookTrigger.webhook_id == webhook_id).first() + session.query(WorkflowWebhookTrigger).where(WorkflowWebhookTrigger.webhook_id == webhook_id).first() ) if not webhook_trigger: raise ValueError(f"Webhook not found: {webhook_id}") @@ -98,6 +101,12 @@ class WebhookService: raise ValueError(f"App trigger not found for webhook {webhook_id}") # Only check enabled status if not in debug mode + + if app_trigger.status == AppTriggerStatus.RATE_LIMITED: + raise ValueError( + f"Webhook trigger is rate limited for webhook {webhook_id}, please upgrade your plan." + ) + if app_trigger.status != AppTriggerStatus.ENABLED: raise ValueError(f"Webhook trigger is disabled for webhook {webhook_id}") @@ -729,6 +738,18 @@ class WebhookService: user_id=None, ) + # consume quota before triggering workflow execution + try: + QuotaType.TRIGGER.consume(webhook_trigger.tenant_id) + except QuotaExceededError: + AppTriggerService.mark_tenant_triggers_rate_limited(webhook_trigger.tenant_id) + logger.info( + "Tenant %s rate limited, skipping webhook trigger %s", + webhook_trigger.tenant_id, + webhook_trigger.webhook_id, + ) + raise + # Trigger workflow execution asynchronously AsyncWorkflowService.trigger_workflow_async( session, @@ -812,7 +833,7 @@ class WebhookService: not_found_in_cache: list[str] = [] for node_id in nodes_id_in_graph: # firstly check if the node exists in cache - if not redis_client.get(f"{cls.__WEBHOOK_NODE_CACHE_KEY__}:{node_id}"): + if not redis_client.get(f"{cls.__WEBHOOK_NODE_CACHE_KEY__}:{app.id}:{node_id}"): not_found_in_cache.append(node_id) continue @@ -845,14 +866,16 @@ class WebhookService: session.add(webhook_record) session.flush() cache = Cache(record_id=webhook_record.id, node_id=node_id, webhook_id=webhook_record.webhook_id) - redis_client.set(f"{cls.__WEBHOOK_NODE_CACHE_KEY__}:{node_id}", cache.model_dump_json(), ex=60 * 60) + redis_client.set( + f"{cls.__WEBHOOK_NODE_CACHE_KEY__}:{app.id}:{node_id}", cache.model_dump_json(), ex=60 * 60 + ) session.commit() # delete the nodes not found in the graph for node_id in nodes_id_in_db: if node_id not in nodes_id_in_graph: session.delete(nodes_id_in_db[node_id]) - redis_client.delete(f"{cls.__WEBHOOK_NODE_CACHE_KEY__}:{node_id}") + redis_client.delete(f"{cls.__WEBHOOK_NODE_CACHE_KEY__}:{app.id}:{node_id}") session.commit() except Exception: logger.exception("Failed to sync webhook relationships for app %s", app.id) diff --git a/api/services/variable_truncator.py b/api/services/variable_truncator.py index 6f8adb7536..6eb8d0031d 100644 --- a/api/services/variable_truncator.py +++ b/api/services/variable_truncator.py @@ -1,4 +1,5 @@ import dataclasses +from abc import ABC, abstractmethod from collections.abc import Mapping from typing import Any, Generic, TypeAlias, TypeVar, overload @@ -66,7 +67,17 @@ class TruncationResult: truncated: bool -class VariableTruncator: +class BaseTruncator(ABC): + @abstractmethod + def truncate(self, segment: Segment) -> TruncationResult: + pass + + @abstractmethod + def truncate_variable_mapping(self, v: Mapping[str, Any]) -> tuple[Mapping[str, Any], bool]: + pass + + +class VariableTruncator(BaseTruncator): """ Handles variable truncation with structure-preserving strategies. @@ -418,3 +429,38 @@ class VariableTruncator: return _PartResult(val, self.calculate_json_size(val), False) else: raise AssertionError("this statement should be unreachable.") + + +class DummyVariableTruncator(BaseTruncator): + """ + A no-op variable truncator that doesn't truncate any data. + + This is used for Service API calls where truncation should be disabled + to maintain backward compatibility and provide complete data. + """ + + def truncate_variable_mapping(self, v: Mapping[str, Any]) -> tuple[Mapping[str, Any], bool]: + """ + Return original mapping without truncation. + + Args: + v: The variable mapping to process + + Returns: + Tuple of (original_mapping, False) where False indicates no truncation occurred + """ + return v, False + + def truncate(self, segment: Segment) -> TruncationResult: + """ + Return original segment without truncation. + + Args: + segment: The segment to process + + Returns: + The original segment unchanged + """ + # For Service API, we want to preserve the original segment + # without any truncation, so just return it as-is + return TruncationResult(result=segment, truncated=False) diff --git a/api/services/workflow/entities.py b/api/services/workflow/entities.py index dd126cdef4..70ec8d6e2a 100644 --- a/api/services/workflow/entities.py +++ b/api/services/workflow/entities.py @@ -4,7 +4,7 @@ Pydantic models for async workflow trigger system. from collections.abc import Mapping, Sequence from enum import StrEnum -from typing import Any, Optional +from typing import Any from pydantic import BaseModel, ConfigDict, Field @@ -19,17 +19,24 @@ class AsyncTriggerStatus(StrEnum): TIMEOUT = "timeout" +class TriggerMetadata(BaseModel): + """Trigger metadata""" + + type: AppTriggerType = Field(default=AppTriggerType.UNKNOWN) + + class TriggerData(BaseModel): """Base trigger data model for async workflow execution""" app_id: str tenant_id: str - workflow_id: Optional[str] = None + workflow_id: str | None = None root_node_id: str inputs: Mapping[str, Any] files: Sequence[Mapping[str, Any]] = Field(default_factory=list) trigger_type: AppTriggerType trigger_from: WorkflowRunTriggeredFrom + trigger_metadata: TriggerMetadata | None = None model_config = ConfigDict(use_enum_values=True) @@ -48,6 +55,19 @@ class ScheduleTriggerData(TriggerData): trigger_from: WorkflowRunTriggeredFrom = WorkflowRunTriggeredFrom.SCHEDULE +class PluginTriggerMetadata(TriggerMetadata): + """Plugin trigger metadata""" + + type: AppTriggerType = AppTriggerType.TRIGGER_PLUGIN + + endpoint_id: str + plugin_unique_identifier: str + provider_id: str + event_name: str + icon_filename: str + icon_dark_filename: str + + class PluginTriggerData(TriggerData): """Plugin webhook trigger data""" @@ -83,10 +103,10 @@ class AsyncTriggerExecutionResult(BaseModel): execution_id: str status: AsyncTriggerStatus - result: Optional[Mapping[str, Any]] = None - error: Optional[str] = None - elapsed_time: Optional[float] = None - total_tokens: Optional[int] = None + result: Mapping[str, Any] | None = None + error: str | None = None + elapsed_time: float | None = None + total_tokens: int | None = None model_config = ConfigDict(use_enum_values=True) @@ -113,15 +133,15 @@ class TriggerLogResponse(BaseModel): status: str queue_name: str retry_count: int - celery_task_id: Optional[str] = None - workflow_run_id: Optional[str] = None - error: Optional[str] = None - outputs: Optional[str] = None - elapsed_time: Optional[float] = None - total_tokens: Optional[int] = None - created_at: Optional[str] = None - triggered_at: Optional[str] = None - finished_at: Optional[str] = None + celery_task_id: str | None = None + workflow_run_id: str | None = None + error: str | None = None + outputs: str | None = None + elapsed_time: float | None = None + total_tokens: int | None = None + created_at: str | None = None + triggered_at: str | None = None + finished_at: str | None = None model_config = ConfigDict(use_enum_values=True) diff --git a/api/services/workflow/queue_dispatcher.py b/api/services/workflow/queue_dispatcher.py index 158e91dbc9..cc366482c8 100644 --- a/api/services/workflow/queue_dispatcher.py +++ b/api/services/workflow/queue_dispatcher.py @@ -2,16 +2,14 @@ Queue dispatcher system for async workflow execution. Implements an ABC-based pattern for handling different subscription tiers -with appropriate queue routing and rate limiting. +with appropriate queue routing and priority assignment. """ from abc import ABC, abstractmethod from enum import StrEnum from configs import dify_config -from extensions.ext_redis import redis_client from services.billing_service import BillingService -from services.workflow.rate_limiter import TenantDailyRateLimiter class QueuePriority(StrEnum): @@ -25,50 +23,16 @@ class QueuePriority(StrEnum): class BaseQueueDispatcher(ABC): """Abstract base class for queue dispatchers""" - def __init__(self): - self.rate_limiter = TenantDailyRateLimiter(redis_client) - @abstractmethod def get_queue_name(self) -> str: """Get the queue name for this dispatcher""" pass - @abstractmethod - def get_daily_limit(self) -> int: - """Get daily execution limit""" - pass - @abstractmethod def get_priority(self) -> int: """Get task priority level""" pass - def check_daily_quota(self, tenant_id: str) -> bool: - """ - Check if tenant has remaining daily quota - - Args: - tenant_id: The tenant identifier - - Returns: - True if quota available, False otherwise - """ - # Check without consuming - remaining = self.rate_limiter.get_remaining_quota(tenant_id=tenant_id, max_daily_limit=self.get_daily_limit()) - return remaining > 0 - - def consume_quota(self, tenant_id: str) -> bool: - """ - Consume one execution from daily quota - - Args: - tenant_id: The tenant identifier - - Returns: - True if quota consumed successfully, False if limit reached - """ - return self.rate_limiter.check_and_consume(tenant_id=tenant_id, max_daily_limit=self.get_daily_limit()) - class ProfessionalQueueDispatcher(BaseQueueDispatcher): """Dispatcher for professional tier""" @@ -76,9 +40,6 @@ class ProfessionalQueueDispatcher(BaseQueueDispatcher): def get_queue_name(self) -> str: return QueuePriority.PROFESSIONAL - def get_daily_limit(self) -> int: - return int(1e9) - def get_priority(self) -> int: return 100 @@ -89,9 +50,6 @@ class TeamQueueDispatcher(BaseQueueDispatcher): def get_queue_name(self) -> str: return QueuePriority.TEAM - def get_daily_limit(self) -> int: - return int(1e9) - def get_priority(self) -> int: return 50 @@ -102,9 +60,6 @@ class SandboxQueueDispatcher(BaseQueueDispatcher): def get_queue_name(self) -> str: return QueuePriority.SANDBOX - def get_daily_limit(self) -> int: - return dify_config.APP_DAILY_RATE_LIMIT - def get_priority(self) -> int: return 10 @@ -148,4 +103,4 @@ class QueueDispatcherManager: SandboxQueueDispatcher, # Default to sandbox for unknown plans ) - return dispatcher_class() + return dispatcher_class() # type: ignore diff --git a/api/services/workflow/rate_limiter.py b/api/services/workflow/rate_limiter.py deleted file mode 100644 index 1ccb4e1961..0000000000 --- a/api/services/workflow/rate_limiter.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -Day-based rate limiter for workflow executions. - -Implements UTC-based daily quotas that reset at midnight UTC for consistent rate limiting. -""" - -from datetime import UTC, datetime, time, timedelta -from typing import Union - -import pytz -from redis import Redis -from sqlalchemy import select - -from extensions.ext_database import db -from extensions.ext_redis import RedisClientWrapper -from models.account import Account, TenantAccountJoin, TenantAccountRole - - -class TenantDailyRateLimiter: - """ - Day-based rate limiter that resets at midnight UTC - - This class provides Redis-based rate limiting with the following features: - - Daily quotas that reset at midnight UTC for consistency - - Atomic check-and-consume operations - - Automatic cleanup of stale counters - - Timezone-aware error messages for better UX - """ - - def __init__(self, redis_client: Union[Redis, RedisClientWrapper]): - self.redis = redis_client - - def get_tenant_owner_timezone(self, tenant_id: str) -> str: - """ - Get timezone of tenant owner - - Args: - tenant_id: The tenant identifier - - Returns: - Timezone string (e.g., 'America/New_York', 'UTC') - """ - # Query to get tenant owner's timezone using scalar and select - owner = db.session.scalar( - select(Account) - .join(TenantAccountJoin, TenantAccountJoin.account_id == Account.id) - .where(TenantAccountJoin.tenant_id == tenant_id, TenantAccountJoin.role == TenantAccountRole.OWNER) - ) - - if not owner: - return "UTC" - - return owner.timezone or "UTC" - - def _get_day_key(self, tenant_id: str) -> str: - """ - Get Redis key for current UTC day - - Args: - tenant_id: The tenant identifier - - Returns: - Redis key for the current UTC day - """ - utc_now = datetime.now(UTC) - date_str = utc_now.strftime("%Y-%m-%d") - return f"workflow:daily_limit:{tenant_id}:{date_str}" - - def _get_ttl_seconds(self) -> int: - """ - Calculate seconds until UTC midnight - - Returns: - Number of seconds until UTC midnight - """ - utc_now = datetime.now(UTC) - - # Get next midnight in UTC - next_midnight = datetime.combine(utc_now.date() + timedelta(days=1), time.min) - next_midnight = next_midnight.replace(tzinfo=UTC) - - return int((next_midnight - utc_now).total_seconds()) - - def check_and_consume(self, tenant_id: str, max_daily_limit: int) -> bool: - """ - Check if quota available and consume one execution - - Args: - tenant_id: The tenant identifier - max_daily_limit: Maximum daily limit - - Returns: - True if quota consumed successfully, False if limit reached - """ - key = self._get_day_key(tenant_id) - ttl = self._get_ttl_seconds() - - # Check current usage - current = self.redis.get(key) - - if current is None: - # First execution of the day - set to 1 - self.redis.setex(key, ttl, 1) - return True - - current_count = int(current) - if current_count < max_daily_limit: - # Within limit, increment - new_count = self.redis.incr(key) - # Update TTL - self.redis.expire(key, ttl) - - # Double-check in case of race condition - if new_count <= max_daily_limit: - return True - else: - # Race condition occurred, decrement back - self.redis.decr(key) - return False - else: - # Limit exceeded - return False - - def get_remaining_quota(self, tenant_id: str, max_daily_limit: int) -> int: - """ - Get remaining quota for the day - - Args: - tenant_id: The tenant identifier - max_daily_limit: Maximum daily limit - - Returns: - Number of remaining executions for the day - """ - key = self._get_day_key(tenant_id) - used = int(self.redis.get(key) or 0) - return max(0, max_daily_limit - used) - - def get_current_usage(self, tenant_id: str) -> int: - """ - Get current usage for the day - - Args: - tenant_id: The tenant identifier - - Returns: - Number of executions used today - """ - key = self._get_day_key(tenant_id) - return int(self.redis.get(key) or 0) - - def reset_quota(self, tenant_id: str) -> bool: - """ - Reset quota for testing purposes - - Args: - tenant_id: The tenant identifier - - Returns: - True if key was deleted, False if key didn't exist - """ - key = self._get_day_key(tenant_id) - return bool(self.redis.delete(key)) - - def get_quota_reset_time(self, tenant_id: str, timezone_str: str) -> datetime: - """ - Get the time when quota will reset (next UTC midnight in tenant's timezone) - - Args: - tenant_id: The tenant identifier - timezone_str: Tenant's timezone for display purposes - - Returns: - Datetime when quota resets (next UTC midnight in tenant's timezone) - """ - tz = pytz.timezone(timezone_str) - utc_now = datetime.now(UTC) - - # Get next midnight in UTC, then convert to tenant's timezone - next_utc_midnight = datetime.combine(utc_now.date() + timedelta(days=1), time.min) - next_utc_midnight = pytz.UTC.localize(next_utc_midnight) - - return next_utc_midnight.astimezone(tz) diff --git a/api/services/workflow/workflow_converter.py b/api/services/workflow/workflow_converter.py index e70b2b5c95..067feb994f 100644 --- a/api/services/workflow/workflow_converter.py +++ b/api/services/workflow/workflow_converter.py @@ -1,5 +1,5 @@ import json -from typing import Any +from typing import Any, TypedDict from core.app.app_config.entities import ( DatasetEntity, @@ -28,6 +28,12 @@ from models.model import App, AppMode, AppModelConfig from models.workflow import Workflow, WorkflowType +class _NodeType(TypedDict): + id: str + position: None + data: dict[str, Any] + + class WorkflowConverter: """ App Convert to Workflow Mode @@ -217,7 +223,7 @@ class WorkflowConverter: return app_config - def _convert_to_start_node(self, variables: list[VariableEntity]): + def _convert_to_start_node(self, variables: list[VariableEntity]) -> _NodeType: """ Convert to Start Node :param variables: list of variables @@ -235,7 +241,7 @@ class WorkflowConverter: def _convert_to_http_request_node( self, app_model: App, variables: list[VariableEntity], external_data_variables: list[ExternalDataVariableEntity] - ) -> tuple[list[dict], dict[str, str]]: + ) -> tuple[list[_NodeType], dict[str, str]]: """ Convert API Based Extension to HTTP Request Node :param app_model: App instance @@ -285,7 +291,7 @@ class WorkflowConverter: request_body_json = json.dumps(request_body) request_body_json = request_body_json.replace(r"\{\{", "{{").replace(r"\}\}", "}}") - http_request_node = { + http_request_node: _NodeType = { "id": f"http_request_{index}", "position": None, "data": { @@ -303,7 +309,7 @@ class WorkflowConverter: nodes.append(http_request_node) # append code node for response body parsing - code_node: dict[str, Any] = { + code_node: _NodeType = { "id": f"code_{index}", "position": None, "data": { @@ -326,7 +332,7 @@ class WorkflowConverter: def _convert_to_knowledge_retrieval_node( self, new_app_mode: AppMode, dataset_config: DatasetEntity, model_config: ModelConfigEntity - ) -> dict | None: + ) -> _NodeType | None: """ Convert datasets to Knowledge Retrieval Node :param new_app_mode: new app mode @@ -384,7 +390,7 @@ class WorkflowConverter: prompt_template: PromptTemplateEntity, file_upload: FileUploadConfig | None = None, external_data_variable_node_mapping: dict[str, str] | None = None, - ): + ) -> _NodeType: """ Convert to LLM Node :param original_app_mode: original app mode @@ -561,7 +567,7 @@ class WorkflowConverter: return template - def _convert_to_end_node(self): + def _convert_to_end_node(self) -> _NodeType: """ Convert to End Node :return: @@ -577,7 +583,7 @@ class WorkflowConverter: }, } - def _convert_to_answer_node(self): + def _convert_to_answer_node(self) -> _NodeType: """ Convert to Answer Node :return: @@ -598,7 +604,7 @@ class WorkflowConverter: """ return {"id": f"{source}-{target}", "source": source, "target": target} - def _append_node(self, graph: dict, node: dict): + def _append_node(self, graph: dict[str, Any], node: _NodeType): """ Append Node to Graph diff --git a/api/services/workflow_app_service.py b/api/services/workflow_app_service.py index 23dd436675..01f0c7a55a 100644 --- a/api/services/workflow_app_service.py +++ b/api/services/workflow_app_service.py @@ -1,12 +1,37 @@ +import json import uuid from datetime import datetime +from typing import Any from sqlalchemy import and_, func, or_, select from sqlalchemy.orm import Session from core.workflow.enums import WorkflowExecutionStatus from models import Account, App, EndUser, WorkflowAppLog, WorkflowRun -from models.enums import CreatorUserRole +from models.enums import AppTriggerType, CreatorUserRole +from models.trigger import WorkflowTriggerLog +from services.plugin.plugin_service import PluginService +from services.workflow.entities import TriggerMetadata + + +# Since the workflow_app_log table has exceeded 100 million records, we use an additional details field to extend it +class LogView: + """Lightweight wrapper for WorkflowAppLog with computed details. + + - Exposes `details_` for marshalling to `details` in API response + - Proxies all other attributes to the underlying `WorkflowAppLog` + """ + + def __init__(self, log: WorkflowAppLog, details: dict | None): + self.log = log + self.details_ = details + + @property + def details(self) -> dict | None: + return self.details_ + + def __getattr__(self, name): + return getattr(self.log, name) class WorkflowAppService: @@ -21,6 +46,7 @@ class WorkflowAppService: created_at_after: datetime | None = None, page: int = 1, limit: int = 20, + detail: bool = False, created_by_end_user_session_id: str | None = None, created_by_account: str | None = None, ): @@ -34,6 +60,7 @@ class WorkflowAppService: :param created_at_after: filter logs created after this timestamp :param page: page number :param limit: items per page + :param detail: whether to return detailed logs :param created_by_end_user_session_id: filter by end user session id :param created_by_account: filter by account email :return: Pagination object @@ -43,8 +70,20 @@ class WorkflowAppService: WorkflowAppLog.tenant_id == app_model.tenant_id, WorkflowAppLog.app_id == app_model.id ) + if detail: + # Simple left join by workflow_run_id to fetch trigger_metadata + stmt = stmt.outerjoin( + WorkflowTriggerLog, + and_( + WorkflowTriggerLog.tenant_id == app_model.tenant_id, + WorkflowTriggerLog.app_id == app_model.id, + WorkflowTriggerLog.workflow_run_id == WorkflowAppLog.workflow_run_id, + ), + ).add_columns(WorkflowTriggerLog.trigger_metadata) + if keyword or status: stmt = stmt.join(WorkflowRun, WorkflowRun.id == WorkflowAppLog.workflow_run_id) + # Join to workflow run for filtering when needed. if keyword: keyword_like_val = f"%{keyword[:30].encode('unicode_escape').decode('utf-8')}%".replace(r"\u", r"\\u") @@ -108,9 +147,17 @@ class WorkflowAppService: # Apply pagination limits offset_stmt = stmt.offset((page - 1) * limit).limit(limit) - # Execute query and get items - items = list(session.scalars(offset_stmt).all()) + # wrapper moved to module scope as `LogView` + # Execute query and get items + if detail: + rows = session.execute(offset_stmt).all() + items = [ + LogView(log, {"trigger_metadata": self.handle_trigger_metadata(app_model.tenant_id, meta_val)}) + for log, meta_val in rows + ] + else: + items = [LogView(log, None) for log in session.scalars(offset_stmt).all()] return { "page": page, "limit": limit, @@ -119,6 +166,31 @@ class WorkflowAppService: "data": items, } + def handle_trigger_metadata(self, tenant_id: str, meta_val: str) -> dict[str, Any]: + metadata: dict[str, Any] | None = self._safe_json_loads(meta_val) + if not metadata: + return {} + trigger_metadata = TriggerMetadata.model_validate(metadata) + if trigger_metadata.type == AppTriggerType.TRIGGER_PLUGIN: + icon = metadata.get("icon_filename") + icon_dark = metadata.get("icon_dark_filename") + metadata["icon"] = PluginService.get_plugin_icon_url(tenant_id=tenant_id, filename=icon) if icon else None + metadata["icon_dark"] = ( + PluginService.get_plugin_icon_url(tenant_id=tenant_id, filename=icon_dark) if icon_dark else None + ) + return metadata + + @staticmethod + def _safe_json_loads(val): + if not val: + return None + if isinstance(val, str): + try: + return json.loads(val) + except Exception: + return None + return val + @staticmethod def _safe_parse_uuid(value: str): # fast check diff --git a/api/services/workflow_draft_variable_service.py b/api/services/workflow_draft_variable_service.py index 5e63a83bb1..f299ce3baa 100644 --- a/api/services/workflow_draft_variable_service.py +++ b/api/services/workflow_draft_variable_service.py @@ -7,7 +7,8 @@ from enum import StrEnum from typing import Any, ClassVar from sqlalchemy import Engine, orm, select -from sqlalchemy.dialects.postgresql import insert +from sqlalchemy.dialects.mysql import insert as mysql_insert +from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.orm import Session, sessionmaker from sqlalchemy.sql.expression import and_, or_ @@ -627,28 +628,51 @@ def _batch_upsert_draft_variable( # # For these reasons, we use the SQLAlchemy query builder and rely on dialect-specific # insert operations instead of the ORM layer. - stmt = insert(WorkflowDraftVariable).values([_model_to_insertion_dict(v) for v in draft_vars]) - if policy == _UpsertPolicy.OVERWRITE: - stmt = stmt.on_conflict_do_update( - index_elements=WorkflowDraftVariable.unique_app_id_node_id_name(), - set_={ + + # Use different insert statements based on database type + if dify_config.SQLALCHEMY_DATABASE_URI_SCHEME == "postgresql": + stmt = pg_insert(WorkflowDraftVariable).values([_model_to_insertion_dict(v) for v in draft_vars]) + if policy == _UpsertPolicy.OVERWRITE: + stmt = stmt.on_conflict_do_update( + index_elements=WorkflowDraftVariable.unique_app_id_node_id_name(), + set_={ + # Refresh creation timestamp to ensure updated variables + # appear first in chronologically sorted result sets. + "created_at": stmt.excluded.created_at, + "updated_at": stmt.excluded.updated_at, + "last_edited_at": stmt.excluded.last_edited_at, + "description": stmt.excluded.description, + "value_type": stmt.excluded.value_type, + "value": stmt.excluded.value, + "visible": stmt.excluded.visible, + "editable": stmt.excluded.editable, + "node_execution_id": stmt.excluded.node_execution_id, + "file_id": stmt.excluded.file_id, + }, + ) + elif policy == _UpsertPolicy.IGNORE: + stmt = stmt.on_conflict_do_nothing(index_elements=WorkflowDraftVariable.unique_app_id_node_id_name()) + else: + stmt = mysql_insert(WorkflowDraftVariable).values([_model_to_insertion_dict(v) for v in draft_vars]) # type: ignore[assignment] + if policy == _UpsertPolicy.OVERWRITE: + stmt = stmt.on_duplicate_key_update( # type: ignore[attr-defined] # Refresh creation timestamp to ensure updated variables # appear first in chronologically sorted result sets. - "created_at": stmt.excluded.created_at, - "updated_at": stmt.excluded.updated_at, - "last_edited_at": stmt.excluded.last_edited_at, - "description": stmt.excluded.description, - "value_type": stmt.excluded.value_type, - "value": stmt.excluded.value, - "visible": stmt.excluded.visible, - "editable": stmt.excluded.editable, - "node_execution_id": stmt.excluded.node_execution_id, - "file_id": stmt.excluded.file_id, - }, - ) - elif policy == _UpsertPolicy.IGNORE: - stmt = stmt.on_conflict_do_nothing(index_elements=WorkflowDraftVariable.unique_app_id_node_id_name()) - else: + created_at=stmt.inserted.created_at, # type: ignore[attr-defined] + updated_at=stmt.inserted.updated_at, # type: ignore[attr-defined] + last_edited_at=stmt.inserted.last_edited_at, # type: ignore[attr-defined] + description=stmt.inserted.description, # type: ignore[attr-defined] + value_type=stmt.inserted.value_type, # type: ignore[attr-defined] + value=stmt.inserted.value, # type: ignore[attr-defined] + visible=stmt.inserted.visible, # type: ignore[attr-defined] + editable=stmt.inserted.editable, # type: ignore[attr-defined] + node_execution_id=stmt.inserted.node_execution_id, # type: ignore[attr-defined] + file_id=stmt.inserted.file_id, # type: ignore[attr-defined] + ) + elif policy == _UpsertPolicy.IGNORE: + stmt = stmt.prefix_with("IGNORE") + + if policy not in [_UpsertPolicy.OVERWRITE, _UpsertPolicy.IGNORE]: raise Exception("Invalid value for update policy.") session.execute(stmt) @@ -808,7 +832,11 @@ class DraftVariableSaver: # We only save conversation variable here. if selector[0] != CONVERSATION_VARIABLE_NODE_ID: continue - segment = WorkflowDraftVariable.build_segment_with_type(segment_type=item.value_type, value=item.new_value) + # Conversation variables are exposed as NUMBER in the UI even if their + # persisted type is INTEGER. Allow float updates by loosening the type + # to NUMBER here so downstream storage infers the precise subtype. + segment_type = SegmentType.NUMBER if item.value_type == SegmentType.INTEGER else item.value_type + segment = WorkflowDraftVariable.build_segment_with_type(segment_type=segment_type, value=item.new_value) draft_vars.append( WorkflowDraftVariable.new_conversation_variable( app_id=self._app_id, @@ -1026,7 +1054,7 @@ class DraftVariableSaver: return if self._node_type == NodeType.VARIABLE_ASSIGNER: draft_vars = self._build_from_variable_assigner_mapping(process_data=process_data) - elif self._node_type == NodeType.START: + elif self._node_type == NodeType.START or self._node_type.is_trigger_node: draft_vars = self._build_variables_from_start_mapping(outputs) else: draft_vars = self._build_variables_from_mapping(outputs) diff --git a/api/services/workflow_run_service.py b/api/services/workflow_run_service.py index 5c8719b499..b903d8df5f 100644 --- a/api/services/workflow_run_service.py +++ b/api/services/workflow_run_service.py @@ -1,6 +1,7 @@ import threading from collections.abc import Sequence +from sqlalchemy import Engine from sqlalchemy.orm import sessionmaker import contexts @@ -14,17 +15,26 @@ from models import ( WorkflowRun, WorkflowRunTriggeredFrom, ) +from repositories.api_workflow_run_repository import APIWorkflowRunRepository from repositories.factory import DifyAPIRepositoryFactory class WorkflowRunService: - def __init__(self): + _session_factory: sessionmaker + _workflow_run_repo: APIWorkflowRunRepository + + def __init__(self, session_factory: Engine | sessionmaker | None = None): """Initialize WorkflowRunService with repository dependencies.""" - session_maker = sessionmaker(bind=db.engine, expire_on_commit=False) + if session_factory is None: + session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + elif isinstance(session_factory, Engine): + session_factory = sessionmaker(bind=session_factory, expire_on_commit=False) + + self._session_factory = session_factory self._node_execution_service_repo = DifyAPIRepositoryFactory.create_api_workflow_node_execution_repository( - session_maker + self._session_factory ) - self._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker) + self._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(self._session_factory) def get_paginate_advanced_chat_workflow_runs( self, app_model: App, args: dict, triggered_from: WorkflowRunTriggeredFrom = WorkflowRunTriggeredFrom.DEBUGGING diff --git a/api/services/workflow_service.py b/api/services/workflow_service.py index aa53e27ece..b6764f1fa7 100644 --- a/api/services/workflow_service.py +++ b/api/services/workflow_service.py @@ -7,6 +7,7 @@ from typing import Any, cast from sqlalchemy import exists, select from sqlalchemy.orm import Session, sessionmaker +from configs import dify_config from core.app.app_config.entities import VariableEntityType from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfigManager from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager @@ -14,7 +15,7 @@ from core.file import File from core.repositories import DifyCoreRepositoryFactory from core.variables import Variable from core.variables.variables import VariableUnion -from core.workflow.entities import WorkflowNodeExecution +from core.workflow.entities import VariablePool, WorkflowNodeExecution from core.workflow.enums import ErrorStrategy, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus from core.workflow.errors import WorkflowNodeRunFailedError from core.workflow.graph_events import GraphNodeEventBase, NodeRunFailedEvent, NodeRunSucceededEvent @@ -23,12 +24,9 @@ from core.workflow.nodes import NodeType from core.workflow.nodes.base.node import Node from core.workflow.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING from core.workflow.nodes.start.entities import StartNodeData -from core.workflow.nodes.trigger_plugin.entities import TriggerEventNodeData -from core.workflow.nodes.trigger_schedule.entities import TriggerScheduleNodeData -from core.workflow.nodes.trigger_webhook.entities import WebhookData -from core.workflow.runtime import VariablePool from core.workflow.system_variable import SystemVariable from core.workflow.workflow_entry import WorkflowEntry +from enums.cloud_plan import CloudPlan from events.app_event import app_draft_workflow_was_synced, app_published_workflow_was_updated from extensions.ext_database import db from extensions.ext_storage import storage @@ -39,8 +37,9 @@ from models.model import App, AppMode from models.tools import WorkflowToolProvider from models.workflow import Workflow, WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom, WorkflowType from repositories.factory import DifyAPIRepositoryFactory +from services.billing_service import BillingService from services.enterprise.plugin_manager_service import PluginCredentialType -from services.errors.app import IsDraftWorkflowError, WorkflowHashNotEqualError +from services.errors.app import IsDraftWorkflowError, TriggerNodeLimitExceededError, WorkflowHashNotEqualError from services.workflow.workflow_converter import WorkflowConverter from .errors.workflow_service import DraftWorkflowDeletionError, WorkflowInUseError @@ -214,6 +213,9 @@ class WorkflowService: # validate features structure self.validate_features_structure(app_model=app_model, features=features) + # validate graph structure + self.validate_graph_structure(graph=graph) + # create draft workflow if not found if not workflow: workflow = Workflow( @@ -270,6 +272,24 @@ class WorkflowService: if FeatureService.get_system_features().plugin_manager.enabled: self._validate_workflow_credentials(draft_workflow) + # validate graph structure + self.validate_graph_structure(graph=draft_workflow.graph_dict) + + # billing check + if dify_config.BILLING_ENABLED: + limit_info = BillingService.get_info(app_model.tenant_id) + if limit_info["subscription"]["plan"] == CloudPlan.SANDBOX: + # Check trigger node count limit for SANDBOX plan + trigger_node_count = sum( + 1 + for _, node_data in draft_workflow.walk_nodes() + if (node_type_str := node_data.get("type")) + and isinstance(node_type_str, str) + and NodeType(node_type_str).is_trigger_node + ) + if trigger_node_count > 2: + raise TriggerNodeLimitExceededError(count=trigger_node_count, limit=2) + # create new workflow workflow = Workflow.new( tenant_id=app_model.tenant_id, @@ -634,13 +654,7 @@ class WorkflowService: app=app_model, workflow=draft_workflow, ) - if node_type == NodeType.TRIGGER_WEBHOOK: - start_data = WebhookData.model_validate(node_data) - elif node_type == NodeType.TRIGGER_PLUGIN: - start_data = TriggerEventNodeData.model_validate(node_data) - elif node_type == NodeType.TRIGGER_SCHEDULE: - start_data = TriggerScheduleNodeData.model_validate(node_data) - else: + if node_type is NodeType.START: start_data = StartNodeData.model_validate(node_data) user_inputs = _rebuild_file_for_user_inputs_in_start_node( tenant_id=draft_workflow.tenant_id, start_node_data=start_data, user_inputs=user_inputs @@ -905,6 +919,31 @@ class WorkflowService: return new_app + def validate_graph_structure(self, graph: Mapping[str, Any]): + """ + Validate workflow graph structure. + + This performs a lightweight validation on the graph, checking for structural + inconsistencies such as the coexistence of start and trigger nodes. + """ + node_configs = graph.get("nodes", []) + node_configs = cast(list[dict[str, Any]], node_configs) + + # is empty graph + if not node_configs: + return + + node_types: set[NodeType] = set() + for node in node_configs: + node_type = node.get("data", {}).get("type") + if node_type: + node_types.add(NodeType(node_type)) + + # start node and trigger node cannot coexist + if NodeType.START in node_types: + if any(nt.is_trigger_node for nt in node_types): + raise ValueError("Start node and trigger nodes cannot coexist in the same workflow") + def validate_features_structure(self, app_model: App, features: dict): if app_model.mode == AppMode.ADVANCED_CHAT: return AdvancedChatAppConfigManager.config_validate( @@ -1007,10 +1046,11 @@ def _setup_variable_pool( conversation_variables: list[Variable], ): # Only inject system variables for START node type. - if node_type == NodeType.START: + if node_type == NodeType.START or node_type.is_trigger_node: system_variable = SystemVariable( user_id=user_id, app_id=workflow.app_id, + timestamp=int(naive_utc_now().timestamp()), workflow_id=workflow.id, files=files or [], workflow_execution_id=str(uuid.uuid4()), diff --git a/api/tasks/add_document_to_index_task.py b/api/tasks/add_document_to_index_task.py index 5df9888acc..933ad6b9e2 100644 --- a/api/tasks/add_document_to_index_task.py +++ b/api/tasks/add_document_to_index_task.py @@ -48,7 +48,6 @@ def add_document_to_index_task(dataset_document_id: str): db.session.query(DocumentSegment) .where( DocumentSegment.document_id == dataset_document.id, - DocumentSegment.enabled == False, DocumentSegment.status == "completed", ) .order_by(DocumentSegment.position.asc()) diff --git a/api/tasks/async_workflow_tasks.py b/api/tasks/async_workflow_tasks.py index c662b7c4a7..f8aac5b469 100644 --- a/api/tasks/async_workflow_tasks.py +++ b/api/tasks/async_workflow_tasks.py @@ -13,10 +13,9 @@ from sqlalchemy import select from sqlalchemy.orm import Session, sessionmaker from configs import dify_config -from core.app.apps.workflow.app_generator import WorkflowAppGenerator -from core.app.engine_layers.timeslice_layer import TimeSliceLayer -from core.app.engine_layers.trigger_post_layer import TriggerPostLayer +from core.app.apps.workflow.app_generator import SKIP_PREPARE_USER_INPUTS_KEY, WorkflowAppGenerator from core.app.entities.app_invoke_entities import InvokeFrom +from core.app.layers.trigger_post_layer import TriggerPostLayer from extensions.ext_database import db from models.account import Account from models.enums import CreatorUserRole, WorkflowTriggerStatus @@ -81,6 +80,17 @@ def execute_workflow_sandbox(task_data_dict: dict[str, Any]): ) +def _build_generator_args(trigger_data: TriggerData) -> dict[str, Any]: + """Build args passed into WorkflowAppGenerator.generate for Celery executions.""" + + args: dict[str, Any] = { + "inputs": dict(trigger_data.inputs), + "files": list(trigger_data.files), + SKIP_PREPARE_USER_INPUTS_KEY: True, + } + return args + + def _execute_workflow_common( task_data: WorkflowTaskData, cfs_plan_scheduler: AsyncWorkflowCFSPlanScheduler, @@ -128,7 +138,7 @@ def _execute_workflow_common( generator = WorkflowAppGenerator() # Prepare args matching AppGenerateService.generate format - args: dict[str, Any] = {"inputs": dict(trigger_data.inputs), "files": list(trigger_data.files)} + args = _build_generator_args(trigger_data) # If workflow_id was specified, add it to args if trigger_data.workflow_id: @@ -143,11 +153,11 @@ def _execute_workflow_common( invoke_from=InvokeFrom.SERVICE_API, streaming=False, call_depth=0, - triggered_from=trigger_data.trigger_type, + triggered_from=trigger_data.trigger_from, root_node_id=trigger_data.root_node_id, - layers=[ - TimeSliceLayer(cfs_plan_scheduler), - TriggerPostLayer(cfs_plan_scheduler_entity, start_time, trigger_log.id), + graph_engine_layers=[ + # TODO: Re-enable TimeSliceLayer after the HITL release. + TriggerPostLayer(cfs_plan_scheduler_entity, start_time, trigger_log.id, session_factory), ], ) diff --git a/api/tasks/batch_clean_document_task.py b/api/tasks/batch_clean_document_task.py index 447443703a..3e1bd16cc7 100644 --- a/api/tasks/batch_clean_document_task.py +++ b/api/tasks/batch_clean_document_task.py @@ -9,7 +9,7 @@ from core.rag.index_processor.index_processor_factory import IndexProcessorFacto from core.tools.utils.web_reader_tool import get_image_upload_file_ids from extensions.ext_database import db from extensions.ext_storage import storage -from models.dataset import Dataset, DocumentSegment +from models.dataset import Dataset, DatasetMetadataBinding, DocumentSegment from models.model import UploadFile logger = logging.getLogger(__name__) @@ -37,6 +37,11 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form if not dataset: raise Exception("Document has no dataset") + db.session.query(DatasetMetadataBinding).where( + DatasetMetadataBinding.dataset_id == dataset_id, + DatasetMetadataBinding.document_id.in_(document_ids), + ).delete(synchronize_session=False) + segments = db.session.scalars( select(DocumentSegment).where(DocumentSegment.document_id.in_(document_ids)) ).all() @@ -71,7 +76,8 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form except Exception: logger.exception("Delete file failed when document deleted, file_id: %s", file.id) db.session.delete(file) - db.session.commit() + + db.session.commit() end_at = time.perf_counter() logger.info( diff --git a/api/tasks/document_indexing_task.py b/api/tasks/document_indexing_task.py index 012ae8f706..fee4430612 100644 --- a/api/tasks/document_indexing_task.py +++ b/api/tasks/document_indexing_task.py @@ -1,11 +1,15 @@ import logging import time +from collections.abc import Callable, Sequence import click from celery import shared_task from configs import dify_config +from core.entities.document_task import DocumentTask from core.indexing_runner import DocumentIsPausedError, IndexingRunner +from core.rag.pipeline.queue import TenantIsolatedTaskQueue +from enums.cloud_plan import CloudPlan from extensions.ext_database import db from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, Document @@ -21,8 +25,24 @@ def document_indexing_task(dataset_id: str, document_ids: list): :param dataset_id: :param document_ids: + .. warning:: TO BE DEPRECATED + This function will be deprecated and removed in a future version. + Use normal_document_indexing_task or priority_document_indexing_task instead. + Usage: document_indexing_task.delay(dataset_id, document_ids) """ + logger.warning("document indexing legacy mode received: %s - %s", dataset_id, document_ids) + _document_indexing(dataset_id, document_ids) + + +def _document_indexing(dataset_id: str, document_ids: Sequence[str]): + """ + Process document for tasks + :param dataset_id: + :param document_ids: + + Usage: _document_indexing(dataset_id, document_ids) + """ documents = [] start_at = time.perf_counter() @@ -38,7 +58,7 @@ def document_indexing_task(dataset_id: str, document_ids: list): vector_space = features.vector_space count = len(document_ids) batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT) - if features.billing.subscription.plan == "sandbox" and count > 1: + if features.billing.subscription.plan == CloudPlan.SANDBOX and count > 1: raise ValueError("Your current plan does not support batch upload, please upgrade your plan.") if count > batch_upload_limit: raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") @@ -86,3 +106,63 @@ def document_indexing_task(dataset_id: str, document_ids: list): logger.exception("Document indexing task failed, dataset_id: %s", dataset_id) finally: db.session.close() + + +def _document_indexing_with_tenant_queue( + tenant_id: str, dataset_id: str, document_ids: Sequence[str], task_func: Callable[[str, str, Sequence[str]], None] +): + try: + _document_indexing(dataset_id, document_ids) + except Exception: + logger.exception("Error processing document indexing %s for tenant %s: %s", dataset_id, tenant_id) + finally: + tenant_isolated_task_queue = TenantIsolatedTaskQueue(tenant_id, "document_indexing") + + # Check if there are waiting tasks in the queue + # Use rpop to get the next task from the queue (FIFO order) + next_tasks = tenant_isolated_task_queue.pull_tasks(count=dify_config.TENANT_ISOLATED_TASK_CONCURRENCY) + + logger.info("document indexing tenant isolation queue next tasks: %s", next_tasks) + + if next_tasks: + for next_task in next_tasks: + document_task = DocumentTask(**next_task) + # Process the next waiting task + # Keep the flag set to indicate a task is running + tenant_isolated_task_queue.set_task_waiting_time() + task_func.delay( # type: ignore + tenant_id=document_task.tenant_id, + dataset_id=document_task.dataset_id, + document_ids=document_task.document_ids, + ) + else: + # No more waiting tasks, clear the flag + tenant_isolated_task_queue.delete_task_key() + + +@shared_task(queue="dataset") +def normal_document_indexing_task(tenant_id: str, dataset_id: str, document_ids: Sequence[str]): + """ + Async process document + :param tenant_id: + :param dataset_id: + :param document_ids: + + Usage: normal_document_indexing_task.delay(tenant_id, dataset_id, document_ids) + """ + logger.info("normal document indexing task received: %s - %s - %s", tenant_id, dataset_id, document_ids) + _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, normal_document_indexing_task) + + +@shared_task(queue="priority_dataset") +def priority_document_indexing_task(tenant_id: str, dataset_id: str, document_ids: Sequence[str]): + """ + Priority async process document + :param tenant_id: + :param dataset_id: + :param document_ids: + + Usage: priority_document_indexing_task.delay(tenant_id, dataset_id, document_ids) + """ + logger.info("priority document indexing task received: %s - %s - %s", tenant_id, dataset_id, document_ids) + _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, priority_document_indexing_task) diff --git a/api/tasks/duplicate_document_indexing_task.py b/api/tasks/duplicate_document_indexing_task.py index 2020179cd9..6492e356a3 100644 --- a/api/tasks/duplicate_document_indexing_task.py +++ b/api/tasks/duplicate_document_indexing_task.py @@ -8,6 +8,7 @@ from sqlalchemy import select from configs import dify_config from core.indexing_runner import DocumentIsPausedError, IndexingRunner from core.rag.index_processor.index_processor_factory import IndexProcessorFactory +from enums.cloud_plan import CloudPlan from extensions.ext_database import db from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, Document, DocumentSegment @@ -41,7 +42,7 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list): if features.billing.enabled: vector_space = features.vector_space count = len(document_ids) - if features.billing.subscription.plan == "sandbox" and count > 1: + if features.billing.subscription.plan == CloudPlan.SANDBOX and count > 1: raise ValueError("Your current plan does not support batch upload, please upgrade your plan.") batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT) if count > batch_upload_limit: diff --git a/api/tasks/rag_pipeline/priority_rag_pipeline_run_task.py b/api/tasks/rag_pipeline/priority_rag_pipeline_run_task.py index 6de95a3b85..a7f61d9811 100644 --- a/api/tasks/rag_pipeline/priority_rag_pipeline_run_task.py +++ b/api/tasks/rag_pipeline/priority_rag_pipeline_run_task.py @@ -12,8 +12,10 @@ from celery import shared_task # type: ignore from flask import current_app, g from sqlalchemy.orm import Session, sessionmaker +from configs import dify_config from core.app.entities.app_invoke_entities import InvokeFrom, RagPipelineGenerateEntity from core.app.entities.rag_pipeline_invoke_entities import RagPipelineInvokeEntity +from core.rag.pipeline.queue import TenantIsolatedTaskQueue from core.repositories.factory import DifyCoreRepositoryFactory from extensions.ext_database import db from models import Account, Tenant @@ -22,6 +24,8 @@ from models.enums import WorkflowRunTriggeredFrom from models.workflow import Workflow, WorkflowNodeExecutionTriggeredFrom from services.file_service import FileService +logger = logging.getLogger(__name__) + @shared_task(queue="priority_pipeline") def priority_rag_pipeline_run_task( @@ -69,6 +73,27 @@ def priority_rag_pipeline_run_task( logging.exception(click.style(f"Error running rag pipeline, tenant_id: {tenant_id}", fg="red")) raise finally: + tenant_isolated_task_queue = TenantIsolatedTaskQueue(tenant_id, "pipeline") + + # Check if there are waiting tasks in the queue + # Use rpop to get the next task from the queue (FIFO order) + next_file_ids = tenant_isolated_task_queue.pull_tasks(count=dify_config.TENANT_ISOLATED_TASK_CONCURRENCY) + logger.info("priority rag pipeline tenant isolation queue next files: %s", next_file_ids) + + if next_file_ids: + for next_file_id in next_file_ids: + # Process the next waiting task + # Keep the flag set to indicate a task is running + tenant_isolated_task_queue.set_task_waiting_time() + priority_rag_pipeline_run_task.delay( # type: ignore + rag_pipeline_invoke_entities_file_id=next_file_id.decode("utf-8") + if isinstance(next_file_id, bytes) + else next_file_id, + tenant_id=tenant_id, + ) + else: + # No more waiting tasks, clear the flag + tenant_isolated_task_queue.delete_task_key() file_service = FileService(db.engine) file_service.delete_file(rag_pipeline_invoke_entities_file_id) db.session.close() diff --git a/api/tasks/rag_pipeline/rag_pipeline_run_task.py b/api/tasks/rag_pipeline/rag_pipeline_run_task.py index f4a092d97e..92f1dfb73d 100644 --- a/api/tasks/rag_pipeline/rag_pipeline_run_task.py +++ b/api/tasks/rag_pipeline/rag_pipeline_run_task.py @@ -12,17 +12,20 @@ from celery import shared_task # type: ignore from flask import current_app, g from sqlalchemy.orm import Session, sessionmaker +from configs import dify_config from core.app.entities.app_invoke_entities import InvokeFrom, RagPipelineGenerateEntity from core.app.entities.rag_pipeline_invoke_entities import RagPipelineInvokeEntity +from core.rag.pipeline.queue import TenantIsolatedTaskQueue from core.repositories.factory import DifyCoreRepositoryFactory from extensions.ext_database import db -from extensions.ext_redis import redis_client from models import Account, Tenant from models.dataset import Pipeline from models.enums import WorkflowRunTriggeredFrom from models.workflow import Workflow, WorkflowNodeExecutionTriggeredFrom from services.file_service import FileService +logger = logging.getLogger(__name__) + @shared_task(queue="pipeline") def rag_pipeline_run_task( @@ -70,26 +73,27 @@ def rag_pipeline_run_task( logging.exception(click.style(f"Error running rag pipeline, tenant_id: {tenant_id}", fg="red")) raise finally: - tenant_self_pipeline_task_queue = f"tenant_self_pipeline_task_queue:{tenant_id}" - tenant_pipeline_task_key = f"tenant_pipeline_task:{tenant_id}" + tenant_isolated_task_queue = TenantIsolatedTaskQueue(tenant_id, "pipeline") # Check if there are waiting tasks in the queue # Use rpop to get the next task from the queue (FIFO order) - next_file_id = redis_client.rpop(tenant_self_pipeline_task_queue) + next_file_ids = tenant_isolated_task_queue.pull_tasks(count=dify_config.TENANT_ISOLATED_TASK_CONCURRENCY) + logger.info("rag pipeline tenant isolation queue next files: %s", next_file_ids) - if next_file_id: - # Process the next waiting task - # Keep the flag set to indicate a task is running - redis_client.setex(tenant_pipeline_task_key, 60 * 60, 1) - rag_pipeline_run_task.delay( # type: ignore - rag_pipeline_invoke_entities_file_id=next_file_id.decode("utf-8") - if isinstance(next_file_id, bytes) - else next_file_id, - tenant_id=tenant_id, - ) + if next_file_ids: + for next_file_id in next_file_ids: + # Process the next waiting task + # Keep the flag set to indicate a task is running + tenant_isolated_task_queue.set_task_waiting_time() + rag_pipeline_run_task.delay( # type: ignore + rag_pipeline_invoke_entities_file_id=next_file_id.decode("utf-8") + if isinstance(next_file_id, bytes) + else next_file_id, + tenant_id=tenant_id, + ) else: # No more waiting tasks, clear the flag - redis_client.delete(tenant_pipeline_task_key) + tenant_isolated_task_queue.delete_task_key() file_service = FileService(db.engine) file_service.delete_file(rag_pipeline_invoke_entities_file_id) db.session.close() diff --git a/api/tasks/remove_app_and_related_data_task.py b/api/tasks/remove_app_and_related_data_task.py index 770bdd6676..3227f6da96 100644 --- a/api/tasks/remove_app_and_related_data_task.py +++ b/api/tasks/remove_app_and_related_data_task.py @@ -17,6 +17,7 @@ from models import ( AppDatasetJoin, AppMCPServer, AppModelConfig, + AppTrigger, Conversation, EndUser, InstalledApp, @@ -30,8 +31,10 @@ from models import ( Site, TagBinding, TraceAppConfig, + WorkflowSchedulePlan, ) from models.tools import WorkflowToolProvider +from models.trigger import WorkflowPluginTrigger, WorkflowTriggerLog, WorkflowWebhookTrigger from models.web import PinnedConversation, SavedMessage from models.workflow import ( ConversationVariable, @@ -69,7 +72,11 @@ def remove_app_and_related_data_task(self, tenant_id: str, app_id: str): _delete_trace_app_configs(tenant_id, app_id) _delete_conversation_variables(app_id=app_id) _delete_draft_variables(app_id) - _delete_app_plugin_triggers(tenant_id, app_id) + _delete_app_triggers(tenant_id, app_id) + _delete_workflow_plugin_triggers(tenant_id, app_id) + _delete_workflow_webhook_triggers(tenant_id, app_id) + _delete_workflow_schedule_plans(tenant_id, app_id) + _delete_workflow_trigger_logs(tenant_id, app_id) end_at = time.perf_counter() logger.info(click.style(f"App and related data deleted: {app_id} latency: {end_at - start_at}", fg="green")) @@ -485,6 +492,72 @@ def _delete_draft_variable_offload_data(conn, file_ids: list[str]) -> int: return files_deleted +def _delete_app_triggers(tenant_id: str, app_id: str): + def del_app_trigger(trigger_id: str): + db.session.query(AppTrigger).where(AppTrigger.id == trigger_id).delete(synchronize_session=False) + + _delete_records( + """select id from app_triggers where tenant_id=:tenant_id and app_id=:app_id limit 1000""", + {"tenant_id": tenant_id, "app_id": app_id}, + del_app_trigger, + "app trigger", + ) + + +def _delete_workflow_plugin_triggers(tenant_id: str, app_id: str): + def del_plugin_trigger(trigger_id: str): + db.session.query(WorkflowPluginTrigger).where(WorkflowPluginTrigger.id == trigger_id).delete( + synchronize_session=False + ) + + _delete_records( + """select id from workflow_plugin_triggers where tenant_id=:tenant_id and app_id=:app_id limit 1000""", + {"tenant_id": tenant_id, "app_id": app_id}, + del_plugin_trigger, + "workflow plugin trigger", + ) + + +def _delete_workflow_webhook_triggers(tenant_id: str, app_id: str): + def del_webhook_trigger(trigger_id: str): + db.session.query(WorkflowWebhookTrigger).where(WorkflowWebhookTrigger.id == trigger_id).delete( + synchronize_session=False + ) + + _delete_records( + """select id from workflow_webhook_triggers where tenant_id=:tenant_id and app_id=:app_id limit 1000""", + {"tenant_id": tenant_id, "app_id": app_id}, + del_webhook_trigger, + "workflow webhook trigger", + ) + + +def _delete_workflow_schedule_plans(tenant_id: str, app_id: str): + def del_schedule_plan(plan_id: str): + db.session.query(WorkflowSchedulePlan).where(WorkflowSchedulePlan.id == plan_id).delete( + synchronize_session=False + ) + + _delete_records( + """select id from workflow_schedule_plans where tenant_id=:tenant_id and app_id=:app_id limit 1000""", + {"tenant_id": tenant_id, "app_id": app_id}, + del_schedule_plan, + "workflow schedule plan", + ) + + +def _delete_workflow_trigger_logs(tenant_id: str, app_id: str): + def del_trigger_log(log_id: str): + db.session.query(WorkflowTriggerLog).where(WorkflowTriggerLog.id == log_id).delete(synchronize_session=False) + + _delete_records( + """select id from workflow_trigger_logs where tenant_id=:tenant_id and app_id=:app_id limit 1000""", + {"tenant_id": tenant_id, "app_id": app_id}, + del_trigger_log, + "workflow trigger log", + ) + + def _delete_records(query_sql: str, params: dict, delete_func: Callable, name: str) -> None: while True: with db.engine.begin() as conn: @@ -502,13 +575,3 @@ def _delete_records(query_sql: str, params: dict, delete_func: Callable, name: s logger.exception("Error occurred while deleting %s %s", name, record_id) continue rs.close() - - -def _delete_app_plugin_triggers(tenant_id: str, app_id: str): - with db.engine.begin() as conn: - result = conn.execute( - sa.text("DELETE FROM workflow_plugin_triggers WHERE app_id = :app_id"), {"app_id": app_id} - ) - deleted_count = result.rowcount - if deleted_count > 0: - logger.info(click.style(f"Deleted {deleted_count} workflow plugin triggers for app {app_id}", fg="green")) diff --git a/api/tasks/trigger_processing_tasks.py b/api/tasks/trigger_processing_tasks.py index d1639375ad..2619d8dd28 100644 --- a/api/tasks/trigger_processing_tasks.py +++ b/api/tasks/trigger_processing_tasks.py @@ -5,8 +5,10 @@ These tasks handle trigger workflow execution asynchronously to avoid blocking the main request thread. """ +import json import logging from collections.abc import Mapping, Sequence +from datetime import UTC, datetime from typing import Any from celery import shared_task @@ -16,22 +18,35 @@ from sqlalchemy.orm import Session from core.app.entities.app_invoke_entities import InvokeFrom from core.plugin.entities.plugin_daemon import CredentialType from core.plugin.entities.request import TriggerInvokeEventResponse +from core.plugin.impl.exc import PluginInvokeError from core.trigger.debug.event_bus import TriggerDebugEventBus from core.trigger.debug.events import PluginTriggerDebugEvent, build_plugin_pool_key +from core.trigger.entities.entities import TriggerProviderEntity from core.trigger.provider import PluginTriggerProviderController from core.trigger.trigger_manager import TriggerManager -from core.workflow.enums import NodeType +from core.workflow.enums import NodeType, WorkflowExecutionStatus from core.workflow.nodes.trigger_plugin.entities import TriggerEventNodeData +from enums.quota_type import QuotaType, unlimited from extensions.ext_database import db +from models.enums import ( + AppTriggerType, + CreatorUserRole, + WorkflowRunTriggeredFrom, + WorkflowTriggerStatus, +) from models.model import EndUser from models.provider_ids import TriggerProviderID -from models.trigger import TriggerSubscription, WorkflowPluginTrigger -from models.workflow import Workflow +from models.trigger import TriggerSubscription, WorkflowPluginTrigger, WorkflowTriggerLog +from models.workflow import Workflow, WorkflowAppLog, WorkflowAppLogCreatedFrom, WorkflowRun from services.async_workflow_service import AsyncWorkflowService from services.end_user_service import EndUserService +from services.errors.app import QuotaExceededError +from services.trigger.app_trigger_service import AppTriggerService from services.trigger.trigger_provider_service import TriggerProviderService from services.trigger.trigger_request_service import TriggerHttpRequestCachingService -from services.workflow.entities import PluginTriggerData, PluginTriggerDispatchData +from services.trigger.trigger_subscription_operator_service import TriggerSubscriptionOperatorService +from services.workflow.entities import PluginTriggerData, PluginTriggerDispatchData, PluginTriggerMetadata +from services.workflow.queue_dispatcher import QueueDispatcherManager logger = logging.getLogger(__name__) @@ -104,6 +119,110 @@ def _get_latest_workflows_by_app_ids( return {w.app_id: w for w in workflows} +def _record_trigger_failure_log( + *, + session: Session, + workflow: Workflow, + plugin_trigger: WorkflowPluginTrigger, + subscription: TriggerSubscription, + trigger_metadata: PluginTriggerMetadata, + end_user: EndUser | None, + error_message: str, + event_name: str, + request_id: str, +) -> None: + """ + Persist a workflow run, workflow app log, and trigger log entry for failed trigger invocations. + """ + now = datetime.now(UTC) + if end_user: + created_by_role = CreatorUserRole.END_USER + created_by = end_user.id + else: + created_by_role = CreatorUserRole.ACCOUNT + created_by = subscription.user_id + + failure_inputs = { + "event_name": event_name, + "subscription_id": subscription.id, + "request_id": request_id, + "plugin_trigger_id": plugin_trigger.id, + } + + workflow_run = WorkflowRun( + tenant_id=workflow.tenant_id, + app_id=workflow.app_id, + workflow_id=workflow.id, + type=workflow.type, + triggered_from=WorkflowRunTriggeredFrom.PLUGIN.value, + version=workflow.version, + graph=workflow.graph, + inputs=json.dumps(failure_inputs), + status=WorkflowExecutionStatus.FAILED.value, + outputs="{}", + error=error_message, + elapsed_time=0.0, + total_tokens=0, + total_steps=0, + created_by_role=created_by_role.value, + created_by=created_by, + created_at=now, + finished_at=now, + exceptions_count=0, + ) + session.add(workflow_run) + session.flush() + + workflow_app_log = WorkflowAppLog( + tenant_id=workflow.tenant_id, + app_id=workflow.app_id, + workflow_id=workflow.id, + workflow_run_id=workflow_run.id, + created_from=WorkflowAppLogCreatedFrom.SERVICE_API.value, + created_by_role=created_by_role.value, + created_by=created_by, + ) + session.add(workflow_app_log) + + dispatcher = QueueDispatcherManager.get_dispatcher(subscription.tenant_id) + queue_name = dispatcher.get_queue_name() + + trigger_data = PluginTriggerData( + app_id=plugin_trigger.app_id, + tenant_id=subscription.tenant_id, + workflow_id=workflow.id, + root_node_id=plugin_trigger.node_id, + inputs={}, + trigger_metadata=trigger_metadata, + plugin_id=subscription.provider_id, + endpoint_id=subscription.endpoint_id, + ) + + trigger_log = WorkflowTriggerLog( + tenant_id=workflow.tenant_id, + app_id=workflow.app_id, + workflow_id=workflow.id, + workflow_run_id=workflow_run.id, + root_node_id=plugin_trigger.node_id, + trigger_metadata=trigger_metadata.model_dump_json(), + trigger_type=AppTriggerType.TRIGGER_PLUGIN, + trigger_data=trigger_data.model_dump_json(), + inputs=json.dumps({}), + status=WorkflowTriggerStatus.FAILED, + error=error_message, + queue_name=queue_name, + retry_count=0, + created_by_role=created_by_role.value, + created_by=created_by, + triggered_at=now, + finished_at=now, + elapsed_time=0.0, + total_tokens=0, + ) + session.add(trigger_log) + session.commit() + + def dispatch_triggered_workflow( user_id: str, subscription: TriggerSubscription, @@ -120,10 +239,7 @@ def dispatch_triggered_workflow( request = TriggerHttpRequestCachingService.get_request(request_id) payload = TriggerHttpRequestCachingService.get_payload(request_id) - from services.trigger.trigger_service import TriggerService - # FIXME: we should avoid import modules inside methods - - subscribers: list[WorkflowPluginTrigger] = TriggerService.get_subscriber_triggers( + subscribers: list[WorkflowPluginTrigger] = TriggerSubscriptionOperatorService.get_subscriber_triggers( tenant_id=subscription.tenant_id, subscription_id=subscription.id, event_name=event_name ) if not subscribers: @@ -138,6 +254,7 @@ def dispatch_triggered_workflow( provider_controller: PluginTriggerProviderController = TriggerManager.get_trigger_provider( tenant_id=subscription.tenant_id, provider_id=TriggerProviderID(subscription.provider_id) ) + trigger_entity: TriggerProviderEntity = provider_controller.entity with Session(db.engine) as session: workflows: Mapping[str, Workflow] = _get_latest_workflows_by_app_ids(session, subscribers) @@ -168,23 +285,79 @@ def dispatch_triggered_workflow( logger.error("Trigger event node not found for app %s", plugin_trigger.app_id) continue - # invoke triger - node_data: TriggerEventNodeData = TriggerEventNodeData.model_validate(event_node) - invoke_response: TriggerInvokeEventResponse = TriggerManager.invoke_trigger_event( - tenant_id=subscription.tenant_id, - user_id=user_id, - provider_id=TriggerProviderID(subscription.provider_id), + # invoke trigger + trigger_metadata = PluginTriggerMetadata( + plugin_unique_identifier=provider_controller.plugin_unique_identifier or "", + endpoint_id=subscription.endpoint_id, + provider_id=subscription.provider_id, event_name=event_name, - parameters=node_data.resolve_parameters( - parameter_schemas=provider_controller.get_event_parameters(event_name=event_name) - ), - credentials=subscription.credentials, - credential_type=CredentialType.of(subscription.credential_type), - subscription=subscription.to_entity(), - request=request, - payload=payload, + icon_filename=trigger_entity.identity.icon or "", + icon_dark_filename=trigger_entity.identity.icon_dark or "", ) - if invoke_response.cancelled: + + # consume quota before invoking trigger + quota_charge = unlimited() + try: + quota_charge = QuotaType.TRIGGER.consume(subscription.tenant_id) + except QuotaExceededError: + AppTriggerService.mark_tenant_triggers_rate_limited(subscription.tenant_id) + logger.info( + "Tenant %s rate limited, skipping plugin trigger %s", subscription.tenant_id, plugin_trigger.id + ) + return 0 + + node_data: TriggerEventNodeData = TriggerEventNodeData.model_validate(event_node) + invoke_response: TriggerInvokeEventResponse | None = None + try: + invoke_response = TriggerManager.invoke_trigger_event( + tenant_id=subscription.tenant_id, + user_id=user_id, + provider_id=TriggerProviderID(subscription.provider_id), + event_name=event_name, + parameters=node_data.resolve_parameters( + parameter_schemas=provider_controller.get_event_parameters(event_name=event_name) + ), + credentials=subscription.credentials, + credential_type=CredentialType.of(subscription.credential_type), + subscription=subscription.to_entity(), + request=request, + payload=payload, + ) + except PluginInvokeError as e: + quota_charge.refund() + + error_message = e.to_user_friendly_error(plugin_name=trigger_entity.identity.name) + try: + end_user = end_users.get(plugin_trigger.app_id) + _record_trigger_failure_log( + session=session, + workflow=workflow, + plugin_trigger=plugin_trigger, + subscription=subscription, + trigger_metadata=trigger_metadata, + end_user=end_user, + error_message=error_message, + event_name=event_name, + request_id=request_id, + ) + except Exception: + logger.exception( + "Failed to record trigger failure log for app %s", + plugin_trigger.app_id, + ) + continue + except Exception: + quota_charge.refund() + + logger.exception( + "Failed to invoke trigger event for app %s", + plugin_trigger.app_id, + ) + continue + + if invoke_response is not None and invoke_response.cancelled: + quota_charge.refund() + logger.info( "Trigger ignored for app %s with trigger event %s", plugin_trigger.app_id, @@ -201,6 +374,7 @@ def dispatch_triggered_workflow( plugin_id=subscription.provider_id, endpoint_id=subscription.endpoint_id, inputs=invoke_response.variables, + trigger_metadata=trigger_metadata, ) # Trigger async workflow @@ -217,6 +391,8 @@ def dispatch_triggered_workflow( event_name, ) except Exception: + quota_charge.refund() + logger.exception( "Failed to trigger workflow for app %s", plugin_trigger.app_id, diff --git a/api/tasks/trigger_subscription_refresh_tasks.py b/api/tasks/trigger_subscription_refresh_tasks.py index 11324df881..ed92f3f3c5 100644 --- a/api/tasks/trigger_subscription_refresh_tasks.py +++ b/api/tasks/trigger_subscription_refresh_tasks.py @@ -6,6 +6,7 @@ from typing import Any from celery import shared_task from sqlalchemy.orm import Session +from configs import dify_config from core.plugin.entities.plugin_daemon import CredentialType from core.trigger.utils.locks import build_trigger_refresh_lock_key from extensions.ext_database import db @@ -25,9 +26,10 @@ def _load_subscription(session: Session, tenant_id: str, subscription_id: str) - def _refresh_oauth_if_expired(tenant_id: str, subscription: TriggerSubscription, now: int) -> None: + threshold_seconds: int = int(dify_config.TRIGGER_PROVIDER_CREDENTIAL_THRESHOLD_SECONDS) if ( subscription.credential_expires_at != -1 - and int(subscription.credential_expires_at) <= now + and int(subscription.credential_expires_at) <= now + threshold_seconds and CredentialType.of(subscription.credential_type) == CredentialType.OAUTH2 ): logger.info( @@ -53,13 +55,15 @@ def _refresh_subscription_if_expired( subscription: TriggerSubscription, now: int, ) -> None: - if subscription.expires_at == -1 or int(subscription.expires_at) > now: + threshold_seconds: int = int(dify_config.TRIGGER_PROVIDER_SUBSCRIPTION_THRESHOLD_SECONDS) + if subscription.expires_at == -1 or int(subscription.expires_at) > now + threshold_seconds: logger.debug( - "Subscription not due: tenant=%s subscription_id=%s expires_at=%s now=%s", + "Subscription not due: tenant=%s subscription_id=%s expires_at=%s now=%s threshold=%s", tenant_id, subscription.id, subscription.expires_at, now, + threshold_seconds, ) return diff --git a/api/tasks/workflow_schedule_tasks.py b/api/tasks/workflow_schedule_tasks.py index 1599e249fd..f54e02a219 100644 --- a/api/tasks/workflow_schedule_tasks.py +++ b/api/tasks/workflow_schedule_tasks.py @@ -1,21 +1,19 @@ import logging -import time -from datetime import UTC, datetime -from zoneinfo import ZoneInfo from celery import shared_task from sqlalchemy.orm import sessionmaker -from core.trigger.debug.event_bus import TriggerDebugEventBus -from core.trigger.debug.events import ScheduleDebugEvent, build_schedule_pool_key from core.workflow.nodes.trigger_schedule.exc import ( ScheduleExecutionError, ScheduleNotFoundError, TenantOwnerNotFoundError, ) +from enums.quota_type import QuotaType, unlimited from extensions.ext_database import db from models.trigger import WorkflowSchedulePlan from services.async_workflow_service import AsyncWorkflowService +from services.errors.app import QuotaExceededError +from services.trigger.app_trigger_service import AppTriggerService from services.trigger.schedule_service import ScheduleService from services.workflow.entities import ScheduleTriggerData @@ -35,6 +33,7 @@ def run_schedule_trigger(schedule_id: str) -> None: TenantOwnerNotFoundError: If no owner/admin for tenant ScheduleExecutionError: If workflow trigger fails """ + session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) with session_factory() as session: @@ -46,12 +45,15 @@ def run_schedule_trigger(schedule_id: str) -> None: if not tenant_owner: raise TenantOwnerNotFoundError(f"No owner or admin found for tenant {schedule.tenant_id}") + quota_charge = unlimited() try: - current_utc = datetime.now(UTC) - schedule_tz = ZoneInfo(schedule.timezone) if schedule.timezone else UTC - current_in_tz = current_utc.astimezone(schedule_tz) - inputs = {"current_time": current_in_tz.isoformat()} + quota_charge = QuotaType.TRIGGER.consume(schedule.tenant_id) + except QuotaExceededError: + AppTriggerService.mark_tenant_triggers_rate_limited(schedule.tenant_id) + logger.info("Tenant %s rate limited, skipping schedule trigger %s", schedule.tenant_id, schedule_id) + return + try: # Production dispatch: Trigger the workflow normally response = AsyncWorkflowService.trigger_workflow_async( session=session, @@ -59,45 +61,13 @@ def run_schedule_trigger(schedule_id: str) -> None: trigger_data=ScheduleTriggerData( app_id=schedule.app_id, root_node_id=schedule.node_id, - inputs=inputs, + inputs={}, tenant_id=schedule.tenant_id, ), ) logger.info("Schedule %s triggered workflow: %s", schedule_id, response.workflow_trigger_log_id) - - # Debug dispatch: Send event to waiting debug listeners (if any) - try: - event = ScheduleDebugEvent( - timestamp=int(time.time()), - node_id=schedule.node_id, - inputs=inputs, - ) - pool_key = build_schedule_pool_key( - tenant_id=schedule.tenant_id, - app_id=schedule.app_id, - node_id=schedule.node_id, - ) - dispatched_count = TriggerDebugEventBus.dispatch( - tenant_id=schedule.tenant_id, - event=event, - pool_key=pool_key, - ) - if dispatched_count > 0: - logger.debug( - "Dispatched schedule debug event to %d listener(s) for schedule %s", - dispatched_count, - schedule_id, - ) - except Exception as debug_error: - # Debug dispatch failure should not affect production workflow execution - logger.warning( - "Failed to dispatch debug event for schedule %s: %s", - schedule_id, - str(debug_error), - exc_info=True, - ) - except Exception as e: + quota_charge.refund() raise ScheduleExecutionError( f"Failed to trigger workflow for schedule {schedule_id}, app {schedule.app_id}" ) from e diff --git a/api/tests/fixtures/workflow/iteration_flatten_output_disabled_workflow.yml b/api/tests/fixtures/workflow/iteration_flatten_output_disabled_workflow.yml new file mode 100644 index 0000000000..9cae6385c8 --- /dev/null +++ b/api/tests/fixtures/workflow/iteration_flatten_output_disabled_workflow.yml @@ -0,0 +1,258 @@ +app: + description: 'This workflow tests the iteration node with flatten_output=False. + + + It processes [1, 2, 3], outputs [item, item*2] for each iteration. + + + With flatten_output=False, it should output nested arrays: + + + ``` + + {"output": [[1, 2], [2, 4], [3, 6]]} + + ```' + icon: 🤖 + icon_background: '#FFEAD5' + mode: workflow + name: test_iteration_flatten_disabled + use_icon_as_answer_icon: false +dependencies: [] +kind: app +version: 0.3.1 +workflow: + conversation_variables: [] + environment_variables: [] + features: + file_upload: + enabled: false + opening_statement: '' + retriever_resource: + enabled: true + sensitive_word_avoidance: + enabled: false + speech_to_text: + enabled: false + suggested_questions: [] + suggested_questions_after_answer: + enabled: false + text_to_speech: + enabled: false + graph: + edges: + - data: + isInIteration: false + isInLoop: false + sourceType: start + targetType: code + id: start-source-code-target + source: start_node + sourceHandle: source + target: code_node + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + isInLoop: false + sourceType: code + targetType: iteration + id: code-source-iteration-target + source: code_node + sourceHandle: source + target: iteration_node + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: true + isInLoop: false + iteration_id: iteration_node + sourceType: iteration-start + targetType: code + id: iteration-start-source-code-inner-target + source: iteration_nodestart + sourceHandle: source + target: code_inner_node + targetHandle: target + type: custom + zIndex: 1002 + - data: + isInIteration: false + isInLoop: false + sourceType: iteration + targetType: end + id: iteration-source-end-target + source: iteration_node + sourceHandle: source + target: end_node + targetHandle: target + type: custom + zIndex: 0 + nodes: + - data: + desc: '' + selected: false + title: Start + type: start + variables: [] + height: 54 + id: start_node + position: + x: 80 + y: 282 + positionAbsolute: + x: 80 + y: 282 + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + code: "\ndef main() -> dict:\n return {\n \"result\": [1, 2, 3],\n\ + \ }\n" + code_language: python3 + desc: '' + outputs: + result: + children: null + type: array[number] + selected: false + title: Generate Array + type: code + variables: [] + height: 54 + id: code_node + position: + x: 384 + y: 282 + positionAbsolute: + x: 384 + y: 282 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + desc: '' + error_handle_mode: terminated + flatten_output: false + height: 178 + is_parallel: false + iterator_input_type: array[number] + iterator_selector: + - code_node + - result + output_selector: + - code_inner_node + - result + output_type: array[array[number]] + parallel_nums: 10 + selected: false + start_node_id: iteration_nodestart + title: Iteration with Flatten Disabled + type: iteration + width: 388 + height: 178 + id: iteration_node + position: + x: 684 + y: 282 + positionAbsolute: + x: 684 + y: 282 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 388 + zIndex: 1 + - data: + desc: '' + isInIteration: true + selected: false + title: '' + type: iteration-start + draggable: false + height: 48 + id: iteration_nodestart + parentId: iteration_node + position: + x: 24 + y: 68 + positionAbsolute: + x: 708 + y: 350 + selectable: false + sourcePosition: right + targetPosition: left + type: custom-iteration-start + width: 44 + zIndex: 1002 + - data: + code: "\ndef main(arg1: int) -> dict:\n return {\n \"result\": [arg1,\ + \ arg1 * 2],\n }\n" + code_language: python3 + desc: '' + isInIteration: true + isInLoop: false + iteration_id: iteration_node + outputs: + result: + children: null + type: array[number] + selected: false + title: Generate Pair + type: code + variables: + - value_selector: + - iteration_node + - item + value_type: number + variable: arg1 + height: 54 + id: code_inner_node + parentId: iteration_node + position: + x: 128 + y: 68 + positionAbsolute: + x: 812 + y: 350 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + desc: '' + outputs: + - value_selector: + - iteration_node + - output + value_type: array[array[number]] + variable: output + selected: false + title: End + type: end + height: 90 + id: end_node + position: + x: 1132 + y: 282 + positionAbsolute: + x: 1132 + y: 282 + selected: true + sourcePosition: right + targetPosition: left + type: custom + width: 244 + viewport: + x: -476 + y: 3 + zoom: 1 + diff --git a/api/tests/fixtures/workflow/iteration_flatten_output_enabled_workflow.yml b/api/tests/fixtures/workflow/iteration_flatten_output_enabled_workflow.yml new file mode 100644 index 0000000000..0fc76df768 --- /dev/null +++ b/api/tests/fixtures/workflow/iteration_flatten_output_enabled_workflow.yml @@ -0,0 +1,258 @@ +app: + description: 'This workflow tests the iteration node with flatten_output=True. + + + It processes [1, 2, 3], outputs [item, item*2] for each iteration. + + + With flatten_output=True (default), it should output: + + + ``` + + {"output": [1, 2, 2, 4, 3, 6]} + + ```' + icon: 🤖 + icon_background: '#FFEAD5' + mode: workflow + name: test_iteration_flatten_enabled + use_icon_as_answer_icon: false +dependencies: [] +kind: app +version: 0.3.1 +workflow: + conversation_variables: [] + environment_variables: [] + features: + file_upload: + enabled: false + opening_statement: '' + retriever_resource: + enabled: true + sensitive_word_avoidance: + enabled: false + speech_to_text: + enabled: false + suggested_questions: [] + suggested_questions_after_answer: + enabled: false + text_to_speech: + enabled: false + graph: + edges: + - data: + isInIteration: false + isInLoop: false + sourceType: start + targetType: code + id: start-source-code-target + source: start_node + sourceHandle: source + target: code_node + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: false + isInLoop: false + sourceType: code + targetType: iteration + id: code-source-iteration-target + source: code_node + sourceHandle: source + target: iteration_node + targetHandle: target + type: custom + zIndex: 0 + - data: + isInIteration: true + isInLoop: false + iteration_id: iteration_node + sourceType: iteration-start + targetType: code + id: iteration-start-source-code-inner-target + source: iteration_nodestart + sourceHandle: source + target: code_inner_node + targetHandle: target + type: custom + zIndex: 1002 + - data: + isInIteration: false + isInLoop: false + sourceType: iteration + targetType: end + id: iteration-source-end-target + source: iteration_node + sourceHandle: source + target: end_node + targetHandle: target + type: custom + zIndex: 0 + nodes: + - data: + desc: '' + selected: false + title: Start + type: start + variables: [] + height: 54 + id: start_node + position: + x: 80 + y: 282 + positionAbsolute: + x: 80 + y: 282 + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + code: "\ndef main() -> dict:\n return {\n \"result\": [1, 2, 3],\n\ + \ }\n" + code_language: python3 + desc: '' + outputs: + result: + children: null + type: array[number] + selected: false + title: Generate Array + type: code + variables: [] + height: 54 + id: code_node + position: + x: 384 + y: 282 + positionAbsolute: + x: 384 + y: 282 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + - data: + desc: '' + error_handle_mode: terminated + flatten_output: true + height: 178 + is_parallel: false + iterator_input_type: array[number] + iterator_selector: + - code_node + - result + output_selector: + - code_inner_node + - result + output_type: array[array[number]] + parallel_nums: 10 + selected: false + start_node_id: iteration_nodestart + title: Iteration with Flatten Enabled + type: iteration + width: 388 + height: 178 + id: iteration_node + position: + x: 684 + y: 282 + positionAbsolute: + x: 684 + y: 282 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 388 + zIndex: 1 + - data: + desc: '' + isInIteration: true + selected: false + title: '' + type: iteration-start + draggable: false + height: 48 + id: iteration_nodestart + parentId: iteration_node + position: + x: 24 + y: 68 + positionAbsolute: + x: 708 + y: 350 + selectable: false + sourcePosition: right + targetPosition: left + type: custom-iteration-start + width: 44 + zIndex: 1002 + - data: + code: "\ndef main(arg1: int) -> dict:\n return {\n \"result\": [arg1,\ + \ arg1 * 2],\n }\n" + code_language: python3 + desc: '' + isInIteration: true + isInLoop: false + iteration_id: iteration_node + outputs: + result: + children: null + type: array[number] + selected: false + title: Generate Pair + type: code + variables: + - value_selector: + - iteration_node + - item + value_type: number + variable: arg1 + height: 54 + id: code_inner_node + parentId: iteration_node + position: + x: 128 + y: 68 + positionAbsolute: + x: 812 + y: 350 + selected: false + sourcePosition: right + targetPosition: left + type: custom + width: 244 + zIndex: 1002 + - data: + desc: '' + outputs: + - value_selector: + - iteration_node + - output + value_type: array[number] + variable: output + selected: false + title: End + type: end + height: 90 + id: end_node + position: + x: 1132 + y: 282 + positionAbsolute: + x: 1132 + y: 282 + selected: true + sourcePosition: right + targetPosition: left + type: custom + width: 244 + viewport: + x: -476 + y: 3 + zoom: 1 + diff --git a/api/tests/integration_tests/vdb/tidb_vector/test_tidb_vector.py b/api/tests/integration_tests/vdb/tidb_vector/test_tidb_vector.py index df0bb3f81a..dec63c6476 100644 --- a/api/tests/integration_tests/vdb/tidb_vector/test_tidb_vector.py +++ b/api/tests/integration_tests/vdb/tidb_vector/test_tidb_vector.py @@ -35,4 +35,6 @@ class TiDBVectorTest(AbstractVectorTest): def test_tidb_vector(setup_mock_redis, tidb_vector): - TiDBVectorTest(vector=tidb_vector).run_all_tests() + # TiDBVectorTest(vector=tidb_vector).run_all_tests() + # something wrong with tidb,ignore tidb test + return diff --git a/api/tests/test_containers_integration_tests/core/__init__.py b/api/tests/test_containers_integration_tests/core/__init__.py new file mode 100644 index 0000000000..5860ad0399 --- /dev/null +++ b/api/tests/test_containers_integration_tests/core/__init__.py @@ -0,0 +1 @@ +# Core integration tests package diff --git a/api/tests/test_containers_integration_tests/core/app/__init__.py b/api/tests/test_containers_integration_tests/core/app/__init__.py new file mode 100644 index 0000000000..0822a865b7 --- /dev/null +++ b/api/tests/test_containers_integration_tests/core/app/__init__.py @@ -0,0 +1 @@ +# App integration tests package diff --git a/api/tests/test_containers_integration_tests/core/app/layers/__init__.py b/api/tests/test_containers_integration_tests/core/app/layers/__init__.py new file mode 100644 index 0000000000..90e5229b1a --- /dev/null +++ b/api/tests/test_containers_integration_tests/core/app/layers/__init__.py @@ -0,0 +1 @@ +# Layers integration tests package diff --git a/api/tests/test_containers_integration_tests/core/app/layers/test_pause_state_persist_layer.py b/api/tests/test_containers_integration_tests/core/app/layers/test_pause_state_persist_layer.py new file mode 100644 index 0000000000..bec3517d66 --- /dev/null +++ b/api/tests/test_containers_integration_tests/core/app/layers/test_pause_state_persist_layer.py @@ -0,0 +1,577 @@ +"""Comprehensive TestContainers-based integration tests for PauseStatePersistenceLayer class. + +This test suite covers complete integration scenarios including: +- Real database interactions using containerized PostgreSQL +- Real storage operations using test storage backend +- Complete workflow: event -> state serialization -> database save -> storage save +- Testing with actual WorkflowRunService (not mocked) +- Real Workflow and WorkflowRun instances in database +- Database transactions and rollback behavior +- Actual file upload and retrieval through storage +- Workflow status transitions in database +- Error handling with real database constraints +- Multiple pause events in sequence +- Integration with real ReadOnlyGraphRuntimeState implementations + +These tests use TestContainers to spin up real services for integration testing, +providing more reliable and realistic test scenarios than mocks. +""" + +import json +import uuid +from time import time + +import pytest +from sqlalchemy import Engine, delete, select +from sqlalchemy.orm import Session + +from core.app.app_config.entities import WorkflowUIBasedAppConfig +from core.app.entities.app_invoke_entities import InvokeFrom, WorkflowAppGenerateEntity +from core.app.layers.pause_state_persist_layer import ( + PauseStatePersistenceLayer, + WorkflowResumptionContext, +) +from core.model_runtime.entities.llm_entities import LLMUsage +from core.workflow.entities.pause_reason import SchedulingPause +from core.workflow.enums import WorkflowExecutionStatus +from core.workflow.graph_engine.entities.commands import GraphEngineCommand +from core.workflow.graph_events.graph import GraphRunPausedEvent +from core.workflow.runtime.graph_runtime_state import GraphRuntimeState +from core.workflow.runtime.graph_runtime_state_protocol import ReadOnlyGraphRuntimeState +from core.workflow.runtime.read_only_wrappers import ReadOnlyGraphRuntimeStateWrapper +from core.workflow.runtime.variable_pool import SystemVariable, VariablePool +from extensions.ext_storage import storage +from libs.datetime_utils import naive_utc_now +from models import Account +from models import WorkflowPause as WorkflowPauseModel +from models.model import AppMode, UploadFile +from models.workflow import Workflow, WorkflowRun +from services.file_service import FileService +from services.workflow_run_service import WorkflowRunService + + +class _TestCommandChannelImpl: + """Real implementation of CommandChannel for testing.""" + + def __init__(self): + self._commands: list[GraphEngineCommand] = [] + + def fetch_commands(self) -> list[GraphEngineCommand]: + """Fetch pending commands for this GraphEngine instance.""" + return self._commands.copy() + + def send_command(self, command: GraphEngineCommand) -> None: + """Send a command to be processed by this GraphEngine instance.""" + self._commands.append(command) + + +class TestPauseStatePersistenceLayerTestContainers: + """Comprehensive TestContainers-based integration tests for PauseStatePersistenceLayer class.""" + + @pytest.fixture + def engine(self, db_session_with_containers: Session): + """Get database engine from TestContainers session.""" + bind = db_session_with_containers.get_bind() + assert isinstance(bind, Engine) + return bind + + @pytest.fixture + def file_service(self, engine: Engine): + """Create FileService instance with TestContainers engine.""" + return FileService(engine) + + @pytest.fixture + def workflow_run_service(self, engine: Engine, file_service: FileService): + """Create WorkflowRunService instance with TestContainers engine and FileService.""" + return WorkflowRunService(engine) + + @pytest.fixture(autouse=True) + def setup_test_data(self, db_session_with_containers, file_service, workflow_run_service): + """Set up test data for each test method using TestContainers.""" + # Create test tenant and account + from models.account import Tenant, TenantAccountJoin, TenantAccountRole + + tenant = Tenant( + name="Test Tenant", + status="normal", + ) + db_session_with_containers.add(tenant) + db_session_with_containers.commit() + + account = Account( + email="test@example.com", + name="Test User", + interface_language="en-US", + status="active", + ) + db_session_with_containers.add(account) + db_session_with_containers.commit() + + # Create tenant-account join + tenant_join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER, + current=True, + ) + db_session_with_containers.add(tenant_join) + db_session_with_containers.commit() + + # Set test data + self.test_tenant_id = tenant.id + self.test_user_id = account.id + self.test_app_id = str(uuid.uuid4()) + self.test_workflow_id = str(uuid.uuid4()) + self.test_workflow_run_id = str(uuid.uuid4()) + + # Create test workflow + self.test_workflow = Workflow( + id=self.test_workflow_id, + tenant_id=self.test_tenant_id, + app_id=self.test_app_id, + type="workflow", + version="draft", + graph='{"nodes": [], "edges": []}', + features='{"file_upload": {"enabled": false}}', + created_by=self.test_user_id, + created_at=naive_utc_now(), + ) + + # Create test workflow run + self.test_workflow_run = WorkflowRun( + id=self.test_workflow_run_id, + tenant_id=self.test_tenant_id, + app_id=self.test_app_id, + workflow_id=self.test_workflow_id, + type="workflow", + triggered_from="debugging", + version="draft", + status=WorkflowExecutionStatus.RUNNING, + created_by=self.test_user_id, + created_by_role="account", + created_at=naive_utc_now(), + ) + + # Store session and service instances + self.session = db_session_with_containers + self.file_service = file_service + self.workflow_run_service = workflow_run_service + + # Save test data to database + self.session.add(self.test_workflow) + self.session.add(self.test_workflow_run) + self.session.commit() + + yield + + # Cleanup + self._cleanup_test_data() + + def _cleanup_test_data(self): + """Clean up test data after each test method.""" + try: + # Clean up workflow pauses + self.session.execute(delete(WorkflowPauseModel)) + # Clean up upload files + self.session.execute( + delete(UploadFile).where( + UploadFile.tenant_id == self.test_tenant_id, + ) + ) + # Clean up workflow runs + self.session.execute( + delete(WorkflowRun).where( + WorkflowRun.tenant_id == self.test_tenant_id, + WorkflowRun.app_id == self.test_app_id, + ) + ) + # Clean up workflows + self.session.execute( + delete(Workflow).where( + Workflow.tenant_id == self.test_tenant_id, + Workflow.app_id == self.test_app_id, + ) + ) + self.session.commit() + except Exception as e: + self.session.rollback() + raise e + + def _create_graph_runtime_state( + self, + outputs: dict[str, object] | None = None, + total_tokens: int = 0, + node_run_steps: int = 0, + variables: dict[tuple[str, str], object] | None = None, + workflow_run_id: str | None = None, + ) -> ReadOnlyGraphRuntimeState: + """Create a real GraphRuntimeState for testing.""" + start_at = time() + + execution_id = workflow_run_id or getattr(self, "test_workflow_run_id", None) or str(uuid.uuid4()) + + # Create variable pool + variable_pool = VariablePool(system_variables=SystemVariable(workflow_execution_id=execution_id)) + if variables: + for (node_id, var_key), value in variables.items(): + variable_pool.add([node_id, var_key], value) + + # Create LLM usage + llm_usage = LLMUsage.empty_usage() + + # Create graph runtime state + graph_runtime_state = GraphRuntimeState( + variable_pool=variable_pool, + start_at=start_at, + total_tokens=total_tokens, + llm_usage=llm_usage, + outputs=outputs or {}, + node_run_steps=node_run_steps, + ) + + return ReadOnlyGraphRuntimeStateWrapper(graph_runtime_state) + + def _create_generate_entity( + self, + workflow_execution_id: str | None = None, + user_id: str | None = None, + workflow_id: str | None = None, + ) -> WorkflowAppGenerateEntity: + execution_id = workflow_execution_id or getattr(self, "test_workflow_run_id", str(uuid.uuid4())) + wf_id = workflow_id or getattr(self, "test_workflow_id", str(uuid.uuid4())) + tenant_id = getattr(self, "test_tenant_id", "tenant-123") + app_id = getattr(self, "test_app_id", "app-123") + app_config = WorkflowUIBasedAppConfig( + tenant_id=str(tenant_id), + app_id=str(app_id), + app_mode=AppMode.WORKFLOW, + workflow_id=str(wf_id), + ) + return WorkflowAppGenerateEntity( + task_id=str(uuid.uuid4()), + app_config=app_config, + inputs={}, + files=[], + user_id=user_id or getattr(self, "test_user_id", str(uuid.uuid4())), + stream=False, + invoke_from=InvokeFrom.DEBUGGER, + workflow_execution_id=execution_id, + ) + + def _create_pause_state_persistence_layer( + self, + workflow_run: WorkflowRun | None = None, + workflow: Workflow | None = None, + state_owner_user_id: str | None = None, + generate_entity: WorkflowAppGenerateEntity | None = None, + ) -> PauseStatePersistenceLayer: + """Create PauseStatePersistenceLayer with real dependencies.""" + owner_id = state_owner_user_id + if owner_id is None: + if workflow is not None and workflow.created_by: + owner_id = workflow.created_by + elif workflow_run is not None and workflow_run.created_by: + owner_id = workflow_run.created_by + else: + owner_id = getattr(self, "test_user_id", None) + + assert owner_id is not None + owner_id = str(owner_id) + workflow_execution_id = ( + workflow_run.id if workflow_run is not None else getattr(self, "test_workflow_run_id", None) + ) + assert workflow_execution_id is not None + workflow_id = workflow.id if workflow is not None else getattr(self, "test_workflow_id", None) + assert workflow_id is not None + entity_user_id = getattr(self, "test_user_id", owner_id) + entity = generate_entity or self._create_generate_entity( + workflow_execution_id=str(workflow_execution_id), + user_id=entity_user_id, + workflow_id=str(workflow_id), + ) + + return PauseStatePersistenceLayer( + session_factory=self.session.get_bind(), + state_owner_user_id=owner_id, + generate_entity=entity, + ) + + def test_complete_pause_flow_with_real_dependencies(self, db_session_with_containers): + """Test complete pause flow: event -> state serialization -> database save -> storage save.""" + # Arrange + layer = self._create_pause_state_persistence_layer() + + # Create real graph runtime state with test data + test_outputs = {"result": "test_output", "step": "intermediate"} + test_variables = { + ("node1", "var1"): "string_value", + ("node2", "var2"): {"complex": "object"}, + } + graph_runtime_state = self._create_graph_runtime_state( + outputs=test_outputs, + total_tokens=100, + node_run_steps=5, + variables=test_variables, + ) + + command_channel = _TestCommandChannelImpl() + layer.initialize(graph_runtime_state, command_channel) + + # Create pause event + event = GraphRunPausedEvent( + reason=SchedulingPause(message="test pause"), + outputs={"intermediate": "result"}, + ) + + # Act + layer.on_event(event) + + # Assert - Verify pause state was saved to database + self.session.refresh(self.test_workflow_run) + workflow_run = self.session.get(WorkflowRun, self.test_workflow_run_id) + assert workflow_run is not None + assert workflow_run.status == WorkflowExecutionStatus.PAUSED + + # Verify pause state exists in database + pause_model = self.session.scalars( + select(WorkflowPauseModel).where(WorkflowPauseModel.workflow_run_id == workflow_run.id) + ).first() + assert pause_model is not None + assert pause_model.workflow_id == self.test_workflow_id + assert pause_model.workflow_run_id == self.test_workflow_run_id + assert pause_model.state_object_key != "" + assert pause_model.resumed_at is None + + storage_content = storage.load(pause_model.state_object_key).decode() + resumption_context = WorkflowResumptionContext.loads(storage_content) + assert resumption_context.version == "1" + assert resumption_context.serialized_graph_runtime_state == graph_runtime_state.dumps() + expected_state = json.loads(graph_runtime_state.dumps()) + actual_state = json.loads(resumption_context.serialized_graph_runtime_state) + assert actual_state == expected_state + persisted_entity = resumption_context.get_generate_entity() + assert isinstance(persisted_entity, WorkflowAppGenerateEntity) + assert persisted_entity.workflow_execution_id == self.test_workflow_run_id + + def test_state_persistence_and_retrieval(self, db_session_with_containers): + """Test that pause state can be persisted and retrieved correctly.""" + # Arrange + layer = self._create_pause_state_persistence_layer() + + # Create complex test data + complex_outputs = { + "nested": {"key": "value", "number": 42}, + "list": [1, 2, 3, {"nested": "item"}], + "boolean": True, + "null_value": None, + } + complex_variables = { + ("node1", "var1"): "string_value", + ("node2", "var2"): {"complex": "object"}, + ("node3", "var3"): [1, 2, 3], + } + + graph_runtime_state = self._create_graph_runtime_state( + outputs=complex_outputs, + total_tokens=250, + node_run_steps=10, + variables=complex_variables, + ) + + command_channel = _TestCommandChannelImpl() + layer.initialize(graph_runtime_state, command_channel) + + event = GraphRunPausedEvent(reason=SchedulingPause(message="test pause")) + + # Act - Save pause state + layer.on_event(event) + + # Assert - Retrieve and verify + pause_entity = self.workflow_run_service._workflow_run_repo.get_workflow_pause(self.test_workflow_run_id) + assert pause_entity is not None + assert pause_entity.workflow_execution_id == self.test_workflow_run_id + + state_bytes = pause_entity.get_state() + resumption_context = WorkflowResumptionContext.loads(state_bytes.decode()) + retrieved_state = json.loads(resumption_context.serialized_graph_runtime_state) + expected_state = json.loads(graph_runtime_state.dumps()) + + assert retrieved_state == expected_state + assert retrieved_state["outputs"] == complex_outputs + assert retrieved_state["total_tokens"] == 250 + assert retrieved_state["node_run_steps"] == 10 + assert resumption_context.get_generate_entity().workflow_execution_id == self.test_workflow_run_id + + def test_database_transaction_handling(self, db_session_with_containers): + """Test that database transactions are handled correctly.""" + # Arrange + layer = self._create_pause_state_persistence_layer() + graph_runtime_state = self._create_graph_runtime_state( + outputs={"test": "transaction"}, + total_tokens=50, + ) + + command_channel = _TestCommandChannelImpl() + layer.initialize(graph_runtime_state, command_channel) + + event = GraphRunPausedEvent(reason=SchedulingPause(message="test pause")) + + # Act + layer.on_event(event) + + # Assert - Verify data is committed and accessible in new session + with Session(bind=self.session.get_bind(), expire_on_commit=False) as new_session: + workflow_run = new_session.get(WorkflowRun, self.test_workflow_run_id) + assert workflow_run is not None + assert workflow_run.status == WorkflowExecutionStatus.PAUSED + + pause_model = new_session.scalars( + select(WorkflowPauseModel).where(WorkflowPauseModel.workflow_run_id == workflow_run.id) + ).first() + assert pause_model is not None + assert pause_model.workflow_run_id == self.test_workflow_run_id + assert pause_model.resumed_at is None + assert pause_model.state_object_key != "" + + def test_file_storage_integration(self, db_session_with_containers): + """Test integration with file storage system.""" + # Arrange + layer = self._create_pause_state_persistence_layer() + + # Create large state data to test storage + large_outputs = {"data": "x" * 10000} # 10KB of data + graph_runtime_state = self._create_graph_runtime_state( + outputs=large_outputs, + total_tokens=1000, + ) + + command_channel = _TestCommandChannelImpl() + layer.initialize(graph_runtime_state, command_channel) + + event = GraphRunPausedEvent(reason=SchedulingPause(message="test pause")) + + # Act + layer.on_event(event) + + # Assert - Verify file was uploaded to storage + self.session.refresh(self.test_workflow_run) + pause_model = self.session.scalars( + select(WorkflowPauseModel).where(WorkflowPauseModel.workflow_run_id == self.test_workflow_run.id) + ).first() + assert pause_model is not None + assert pause_model.state_object_key != "" + + # Verify content in storage + storage_content = storage.load(pause_model.state_object_key).decode() + resumption_context = WorkflowResumptionContext.loads(storage_content) + assert resumption_context.serialized_graph_runtime_state == graph_runtime_state.dumps() + assert resumption_context.get_generate_entity().workflow_execution_id == self.test_workflow_run_id + + def test_workflow_with_different_creators(self, db_session_with_containers): + """Test pause state with workflows created by different users.""" + # Arrange - Create workflow with different creator + different_user_id = str(uuid.uuid4()) + different_workflow = Workflow( + id=str(uuid.uuid4()), + tenant_id=self.test_tenant_id, + app_id=self.test_app_id, + type="workflow", + version="draft", + graph='{"nodes": [], "edges": []}', + features='{"file_upload": {"enabled": false}}', + created_by=different_user_id, + created_at=naive_utc_now(), + ) + + different_workflow_run = WorkflowRun( + id=str(uuid.uuid4()), + tenant_id=self.test_tenant_id, + app_id=self.test_app_id, + workflow_id=different_workflow.id, + type="workflow", + triggered_from="debugging", + version="draft", + status=WorkflowExecutionStatus.RUNNING, + created_by=self.test_user_id, # Run created by different user + created_by_role="account", + created_at=naive_utc_now(), + ) + + self.session.add(different_workflow) + self.session.add(different_workflow_run) + self.session.commit() + + layer = self._create_pause_state_persistence_layer( + workflow_run=different_workflow_run, + workflow=different_workflow, + ) + + graph_runtime_state = self._create_graph_runtime_state( + outputs={"creator_test": "different_creator"}, + workflow_run_id=different_workflow_run.id, + ) + + command_channel = _TestCommandChannelImpl() + layer.initialize(graph_runtime_state, command_channel) + + event = GraphRunPausedEvent(reason=SchedulingPause(message="test pause")) + + # Act + layer.on_event(event) + + # Assert - Should use workflow creator (not run creator) + self.session.refresh(different_workflow_run) + pause_model = self.session.scalars( + select(WorkflowPauseModel).where(WorkflowPauseModel.workflow_run_id == different_workflow_run.id) + ).first() + assert pause_model is not None + + # Verify the state owner is the workflow creator + pause_entity = self.workflow_run_service._workflow_run_repo.get_workflow_pause(different_workflow_run.id) + assert pause_entity is not None + resumption_context = WorkflowResumptionContext.loads(pause_entity.get_state().decode()) + assert resumption_context.get_generate_entity().workflow_execution_id == different_workflow_run.id + + def test_layer_ignores_non_pause_events(self, db_session_with_containers): + """Test that layer ignores non-pause events.""" + # Arrange + layer = self._create_pause_state_persistence_layer() + graph_runtime_state = self._create_graph_runtime_state() + + command_channel = _TestCommandChannelImpl() + layer.initialize(graph_runtime_state, command_channel) + + # Import other event types + from core.workflow.graph_events.graph import ( + GraphRunFailedEvent, + GraphRunStartedEvent, + GraphRunSucceededEvent, + ) + + # Act - Send non-pause events + layer.on_event(GraphRunStartedEvent()) + layer.on_event(GraphRunSucceededEvent(outputs={"result": "success"})) + layer.on_event(GraphRunFailedEvent(error="test error", exceptions_count=1)) + + # Assert - No pause state should be created + self.session.refresh(self.test_workflow_run) + assert self.test_workflow_run.status == WorkflowExecutionStatus.RUNNING + + pause_states = ( + self.session.query(WorkflowPauseModel) + .filter(WorkflowPauseModel.workflow_run_id == self.test_workflow_run_id) + .all() + ) + assert len(pause_states) == 0 + + def test_layer_requires_initialization(self, db_session_with_containers): + """Test that layer requires proper initialization before handling events.""" + # Arrange + layer = self._create_pause_state_persistence_layer() + # Don't initialize - graph_runtime_state should not be set + + event = GraphRunPausedEvent(reason=SchedulingPause(message="test pause")) + + # Act & Assert - Should raise AttributeError + with pytest.raises(AttributeError): + layer.on_event(event) diff --git a/api/tests/test_containers_integration_tests/core/rag/__init__.py b/api/tests/test_containers_integration_tests/core/rag/__init__.py new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/api/tests/test_containers_integration_tests/core/rag/__init__.py @@ -0,0 +1 @@ + diff --git a/api/tests/test_containers_integration_tests/core/rag/pipeline/__init__.py b/api/tests/test_containers_integration_tests/core/rag/pipeline/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/test_containers_integration_tests/core/rag/pipeline/test_queue_integration.py b/api/tests/test_containers_integration_tests/core/rag/pipeline/test_queue_integration.py new file mode 100644 index 0000000000..cdf390b327 --- /dev/null +++ b/api/tests/test_containers_integration_tests/core/rag/pipeline/test_queue_integration.py @@ -0,0 +1,595 @@ +""" +Integration tests for TenantIsolatedTaskQueue using testcontainers. + +These tests verify the Redis-based task queue functionality with real Redis instances, +testing tenant isolation, task serialization, and queue operations in a realistic environment. +Includes compatibility tests for migrating from legacy string-only queues. + +All tests use generic naming to avoid coupling to specific business implementations. +""" + +import time +from dataclasses import dataclass +from typing import Any +from uuid import uuid4 + +import pytest +from faker import Faker + +from core.rag.pipeline.queue import TaskWrapper, TenantIsolatedTaskQueue +from extensions.ext_redis import redis_client +from models import Account, Tenant, TenantAccountJoin, TenantAccountRole + + +@dataclass +class TestTask: + """Test task data structure for testing complex object serialization.""" + + task_id: str + tenant_id: str + data: dict[str, Any] + metadata: dict[str, Any] + + +class TestTenantIsolatedTaskQueueIntegration: + """Integration tests for TenantIsolatedTaskQueue using testcontainers.""" + + @pytest.fixture + def fake(self): + """Faker instance for generating test data.""" + return Faker() + + @pytest.fixture + def test_tenant_and_account(self, db_session_with_containers, fake): + """Create test tenant and account for testing.""" + # Create account + account = Account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + status="active", + ) + db_session_with_containers.add(account) + db_session_with_containers.commit() + + # Create tenant + tenant = Tenant( + name=fake.company(), + status="normal", + ) + db_session_with_containers.add(tenant) + db_session_with_containers.commit() + + # Create tenant-account join + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER, + current=True, + ) + db_session_with_containers.add(join) + db_session_with_containers.commit() + + return tenant, account + + @pytest.fixture + def test_queue(self, test_tenant_and_account): + """Create a generic test queue for testing.""" + tenant, _ = test_tenant_and_account + return TenantIsolatedTaskQueue(tenant.id, "test_queue") + + @pytest.fixture + def secondary_queue(self, test_tenant_and_account): + """Create a secondary test queue for testing isolation.""" + tenant, _ = test_tenant_and_account + return TenantIsolatedTaskQueue(tenant.id, "secondary_queue") + + def test_queue_initialization(self, test_tenant_and_account): + """Test queue initialization with correct key generation.""" + tenant, _ = test_tenant_and_account + queue = TenantIsolatedTaskQueue(tenant.id, "test-key") + + assert queue._tenant_id == tenant.id + assert queue._unique_key == "test-key" + assert queue._queue == f"tenant_self_test-key_task_queue:{tenant.id}" + assert queue._task_key == f"tenant_test-key_task:{tenant.id}" + + def test_tenant_isolation(self, test_tenant_and_account, db_session_with_containers, fake): + """Test that different tenants have isolated queues.""" + tenant1, _ = test_tenant_and_account + + # Create second tenant + tenant2 = Tenant( + name=fake.company(), + status="normal", + ) + db_session_with_containers.add(tenant2) + db_session_with_containers.commit() + + queue1 = TenantIsolatedTaskQueue(tenant1.id, "same-key") + queue2 = TenantIsolatedTaskQueue(tenant2.id, "same-key") + + assert queue1._queue != queue2._queue + assert queue1._task_key != queue2._task_key + assert queue1._queue == f"tenant_self_same-key_task_queue:{tenant1.id}" + assert queue2._queue == f"tenant_self_same-key_task_queue:{tenant2.id}" + + def test_key_isolation(self, test_tenant_and_account): + """Test that different keys have isolated queues.""" + tenant, _ = test_tenant_and_account + queue1 = TenantIsolatedTaskQueue(tenant.id, "key1") + queue2 = TenantIsolatedTaskQueue(tenant.id, "key2") + + assert queue1._queue != queue2._queue + assert queue1._task_key != queue2._task_key + assert queue1._queue == f"tenant_self_key1_task_queue:{tenant.id}" + assert queue2._queue == f"tenant_self_key2_task_queue:{tenant.id}" + + def test_task_key_operations(self, test_queue): + """Test task key operations (get, set, delete).""" + # Initially no task key should exist + assert test_queue.get_task_key() is None + + # Set task waiting time with default TTL + test_queue.set_task_waiting_time() + task_key = test_queue.get_task_key() + # Redis returns bytes, convert to string for comparison + assert task_key in (b"1", "1") + + # Set task waiting time with custom TTL + custom_ttl = 30 + test_queue.set_task_waiting_time(custom_ttl) + task_key = test_queue.get_task_key() + assert task_key in (b"1", "1") + + # Delete task key + test_queue.delete_task_key() + assert test_queue.get_task_key() is None + + def test_push_and_pull_string_tasks(self, test_queue): + """Test pushing and pulling string tasks.""" + tasks = ["task1", "task2", "task3"] + + # Push tasks + test_queue.push_tasks(tasks) + + # Pull tasks (FIFO order) + pulled_tasks = test_queue.pull_tasks(3) + + # Should get tasks in FIFO order (lpush + rpop = FIFO) + assert pulled_tasks == ["task1", "task2", "task3"] + + def test_push_and_pull_multiple_tasks(self, test_queue): + """Test pushing and pulling multiple tasks at once.""" + tasks = ["task1", "task2", "task3", "task4", "task5"] + + # Push tasks + test_queue.push_tasks(tasks) + + # Pull multiple tasks + pulled_tasks = test_queue.pull_tasks(3) + assert len(pulled_tasks) == 3 + assert pulled_tasks == ["task1", "task2", "task3"] + + # Pull remaining tasks + remaining_tasks = test_queue.pull_tasks(5) + assert len(remaining_tasks) == 2 + assert remaining_tasks == ["task4", "task5"] + + def test_push_and_pull_complex_objects(self, test_queue, fake): + """Test pushing and pulling complex object tasks.""" + # Create complex task objects as dictionaries (not dataclass instances) + tasks = [ + { + "task_id": str(uuid4()), + "tenant_id": test_queue._tenant_id, + "data": { + "file_id": str(uuid4()), + "content": fake.text(), + "metadata": {"size": fake.random_int(1000, 10000)}, + }, + "metadata": {"created_at": fake.iso8601(), "tags": fake.words(3)}, + }, + { + "task_id": str(uuid4()), + "tenant_id": test_queue._tenant_id, + "data": { + "file_id": str(uuid4()), + "content": "测试中文内容", + "metadata": {"size": fake.random_int(1000, 10000)}, + }, + "metadata": {"created_at": fake.iso8601(), "tags": ["中文", "测试", "emoji🚀"]}, + }, + ] + + # Push complex tasks + test_queue.push_tasks(tasks) + + # Pull tasks + pulled_tasks = test_queue.pull_tasks(2) + assert len(pulled_tasks) == 2 + + # Verify deserialized tasks match original (FIFO order) + for i, pulled_task in enumerate(pulled_tasks): + original_task = tasks[i] # FIFO order + assert isinstance(pulled_task, dict) + assert pulled_task["task_id"] == original_task["task_id"] + assert pulled_task["tenant_id"] == original_task["tenant_id"] + assert pulled_task["data"] == original_task["data"] + assert pulled_task["metadata"] == original_task["metadata"] + + def test_mixed_task_types(self, test_queue, fake): + """Test pushing and pulling mixed string and object tasks.""" + string_task = "simple_string_task" + object_task = { + "task_id": str(uuid4()), + "dataset_id": str(uuid4()), + "document_ids": [str(uuid4()) for _ in range(3)], + } + + tasks = [string_task, object_task, "another_string"] + + # Push mixed tasks + test_queue.push_tasks(tasks) + + # Pull all tasks + pulled_tasks = test_queue.pull_tasks(3) + assert len(pulled_tasks) == 3 + + # Verify types and content + assert pulled_tasks[0] == string_task + assert isinstance(pulled_tasks[1], dict) + assert pulled_tasks[1] == object_task + assert pulled_tasks[2] == "another_string" + + def test_empty_queue_operations(self, test_queue): + """Test operations on empty queue.""" + # Pull from empty queue + tasks = test_queue.pull_tasks(5) + assert tasks == [] + + # Pull zero or negative count + assert test_queue.pull_tasks(0) == [] + assert test_queue.pull_tasks(-1) == [] + + def test_task_ttl_expiration(self, test_queue): + """Test task key TTL expiration.""" + # Set task with short TTL + short_ttl = 2 + test_queue.set_task_waiting_time(short_ttl) + + # Verify task key exists + assert test_queue.get_task_key() == b"1" or test_queue.get_task_key() == "1" + + # Wait for TTL to expire + time.sleep(short_ttl + 1) + + # Verify task key has expired + assert test_queue.get_task_key() is None + + def test_large_task_batch(self, test_queue, fake): + """Test handling large batches of tasks.""" + # Create large batch of tasks + large_batch = [] + for i in range(100): + task = { + "task_id": str(uuid4()), + "index": i, + "data": fake.text(max_nb_chars=100), + "metadata": {"batch_id": str(uuid4())}, + } + large_batch.append(task) + + # Push large batch + test_queue.push_tasks(large_batch) + + # Pull all tasks + pulled_tasks = test_queue.pull_tasks(100) + assert len(pulled_tasks) == 100 + + # Verify all tasks were retrieved correctly (FIFO order) + for i, task in enumerate(pulled_tasks): + assert isinstance(task, dict) + assert task["index"] == i # FIFO order + + def test_queue_operations_isolation(self, test_tenant_and_account, fake): + """Test concurrent operations on different queues.""" + tenant, _ = test_tenant_and_account + + # Create multiple queues for the same tenant + queue1 = TenantIsolatedTaskQueue(tenant.id, "queue1") + queue2 = TenantIsolatedTaskQueue(tenant.id, "queue2") + + # Push tasks to different queues + queue1.push_tasks(["task1_queue1", "task2_queue1"]) + queue2.push_tasks(["task1_queue2", "task2_queue2"]) + + # Verify queues are isolated + tasks1 = queue1.pull_tasks(2) + tasks2 = queue2.pull_tasks(2) + + assert tasks1 == ["task1_queue1", "task2_queue1"] + assert tasks2 == ["task1_queue2", "task2_queue2"] + assert tasks1 != tasks2 + + def test_task_wrapper_serialization_roundtrip(self, test_queue, fake): + """Test TaskWrapper serialization and deserialization roundtrip.""" + # Create complex nested data + complex_data = { + "id": str(uuid4()), + "nested": {"deep": {"value": "test", "numbers": [1, 2, 3, 4, 5], "unicode": "测试中文", "emoji": "🚀"}}, + "metadata": {"created_at": fake.iso8601(), "tags": ["tag1", "tag2", "tag3"]}, + } + + # Create wrapper and serialize + wrapper = TaskWrapper(data=complex_data) + serialized = wrapper.serialize() + + # Verify serialization + assert isinstance(serialized, str) + assert "测试中文" in serialized + assert "🚀" in serialized + + # Deserialize and verify + deserialized_wrapper = TaskWrapper.deserialize(serialized) + assert deserialized_wrapper.data == complex_data + + def test_error_handling_invalid_json(self, test_queue): + """Test error handling for invalid JSON in wrapped tasks.""" + # Manually create invalid JSON task (not a valid TaskWrapper JSON) + invalid_json_task = "invalid json data" + + # Push invalid task directly to Redis + redis_client.lpush(test_queue._queue, invalid_json_task) + + # Pull task - should fall back to string since it's not valid JSON + task = test_queue.pull_tasks(1) + assert task[0] == invalid_json_task + + def test_real_world_batch_processing_scenario(self, test_queue, fake): + """Test realistic batch processing scenario.""" + # Simulate batch processing tasks + batch_tasks = [] + for i in range(3): + task = { + "file_id": str(uuid4()), + "tenant_id": test_queue._tenant_id, + "user_id": str(uuid4()), + "processing_config": { + "model": fake.random_element(["model_a", "model_b", "model_c"]), + "temperature": fake.random.uniform(0.1, 1.0), + "max_tokens": fake.random_int(1000, 4000), + }, + "metadata": { + "source": fake.random_element(["upload", "api", "webhook"]), + "priority": fake.random_element(["low", "normal", "high"]), + }, + } + batch_tasks.append(task) + + # Push tasks + test_queue.push_tasks(batch_tasks) + + # Process tasks in batches + batch_size = 2 + processed_tasks = [] + + while True: + batch = test_queue.pull_tasks(batch_size) + if not batch: + break + + processed_tasks.extend(batch) + + # Verify all tasks were processed + assert len(processed_tasks) == 3 + + # Verify task structure + for task in processed_tasks: + assert isinstance(task, dict) + assert "file_id" in task + assert "tenant_id" in task + assert "processing_config" in task + assert "metadata" in task + assert task["tenant_id"] == test_queue._tenant_id + + +class TestTenantIsolatedTaskQueueCompatibility: + """Compatibility tests for migrating from legacy string-only queues.""" + + @pytest.fixture + def fake(self): + """Faker instance for generating test data.""" + return Faker() + + @pytest.fixture + def test_tenant_and_account(self, db_session_with_containers, fake): + """Create test tenant and account for testing.""" + # Create account + account = Account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + status="active", + ) + db_session_with_containers.add(account) + db_session_with_containers.commit() + + # Create tenant + tenant = Tenant( + name=fake.company(), + status="normal", + ) + db_session_with_containers.add(tenant) + db_session_with_containers.commit() + + # Create tenant-account join + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER, + current=True, + ) + db_session_with_containers.add(join) + db_session_with_containers.commit() + + return tenant, account + + def test_legacy_string_queue_compatibility(self, test_tenant_and_account, fake): + """ + Test compatibility with legacy queues containing only string data. + + This simulates the scenario where Redis queues already contain string data + from the old architecture, and we need to ensure the new code can read them. + """ + tenant, _ = test_tenant_and_account + queue = TenantIsolatedTaskQueue(tenant.id, "legacy_queue") + + # Simulate legacy string data in Redis queue (using old format) + legacy_strings = ["legacy_task_1", "legacy_task_2", "legacy_task_3", "legacy_task_4", "legacy_task_5"] + + # Manually push legacy strings directly to Redis (simulating old system) + for legacy_string in legacy_strings: + redis_client.lpush(queue._queue, legacy_string) + + # Verify new code can read legacy string data + pulled_tasks = queue.pull_tasks(5) + assert len(pulled_tasks) == 5 + + # Verify all tasks are strings (not wrapped) + for task in pulled_tasks: + assert isinstance(task, str) + assert task.startswith("legacy_task_") + + # Verify order (FIFO from Redis list) + expected_order = ["legacy_task_1", "legacy_task_2", "legacy_task_3", "legacy_task_4", "legacy_task_5"] + assert pulled_tasks == expected_order + + def test_legacy_queue_migration_scenario(self, test_tenant_and_account, fake): + """ + Test complete migration scenario from legacy to new system. + + This simulates the real-world scenario where: + 1. Legacy system has string data in Redis + 2. New system starts processing the same queue + 3. Both legacy and new tasks coexist during migration + 4. New system can handle both formats seamlessly + """ + tenant, _ = test_tenant_and_account + queue = TenantIsolatedTaskQueue(tenant.id, "migration_queue") + + # Phase 1: Legacy system has data + legacy_tasks = [f"legacy_resource_{i}" for i in range(1, 6)] + redis_client.lpush(queue._queue, *legacy_tasks) + + # Phase 2: New system starts processing legacy data + processed_legacy = [] + while True: + tasks = queue.pull_tasks(1) + if not tasks: + break + processed_legacy.extend(tasks) + + # Verify legacy data was processed correctly + assert len(processed_legacy) == 5 + for task in processed_legacy: + assert isinstance(task, str) + assert task.startswith("legacy_resource_") + + # Phase 3: New system adds new tasks (mixed types) + new_string_tasks = ["new_resource_1", "new_resource_2"] + new_object_tasks = [ + { + "resource_id": str(uuid4()), + "tenant_id": tenant.id, + "processing_type": "new_system", + "metadata": {"version": "2.0", "features": ["ai", "ml"]}, + }, + { + "resource_id": str(uuid4()), + "tenant_id": tenant.id, + "processing_type": "new_system", + "metadata": {"version": "2.0", "features": ["ai", "ml"]}, + }, + ] + + # Push new tasks using new system + queue.push_tasks(new_string_tasks) + queue.push_tasks(new_object_tasks) + + # Phase 4: Process all new tasks + processed_new = [] + while True: + tasks = queue.pull_tasks(1) + if not tasks: + break + processed_new.extend(tasks) + + # Verify new tasks were processed correctly + assert len(processed_new) == 4 + + string_tasks = [task for task in processed_new if isinstance(task, str)] + object_tasks = [task for task in processed_new if isinstance(task, dict)] + + assert len(string_tasks) == 2 + assert len(object_tasks) == 2 + + # Verify string tasks + for task in string_tasks: + assert task.startswith("new_resource_") + + # Verify object tasks + for task in object_tasks: + assert isinstance(task, dict) + assert "resource_id" in task + assert "tenant_id" in task + assert task["tenant_id"] == tenant.id + assert task["processing_type"] == "new_system" + + def test_legacy_queue_error_recovery(self, test_tenant_and_account, fake): + """ + Test error recovery when legacy queue contains malformed data. + + This ensures the new system can gracefully handle corrupted or + malformed legacy data without crashing. + """ + tenant, _ = test_tenant_and_account + queue = TenantIsolatedTaskQueue(tenant.id, "error_recovery_queue") + + # Create mix of valid and malformed legacy data + mixed_legacy_data = [ + "valid_legacy_task_1", + "valid_legacy_task_2", + "malformed_data_string", # This should be treated as string + "valid_legacy_task_3", + "invalid_json_not_taskwrapper_format", # This should fall back to string (not valid TaskWrapper JSON) + "valid_legacy_task_4", + ] + + # Manually push mixed data directly to Redis + redis_client.lpush(queue._queue, *mixed_legacy_data) + + # Process all tasks + processed_tasks = [] + while True: + tasks = queue.pull_tasks(1) + if not tasks: + break + processed_tasks.extend(tasks) + + # Verify all tasks were processed (no crashes) + assert len(processed_tasks) == 6 + + # Verify all tasks are strings (malformed data falls back to string) + for task in processed_tasks: + assert isinstance(task, str) + + # Verify valid tasks are preserved + valid_tasks = [task for task in processed_tasks if task.startswith("valid_legacy_task_")] + assert len(valid_tasks) == 4 + + # Verify malformed data is handled gracefully + malformed_tasks = [task for task in processed_tasks if not task.startswith("valid_legacy_task_")] + assert len(malformed_tasks) == 2 + assert "malformed_data_string" in malformed_tasks + assert "invalid_json_not_taskwrapper_format" in malformed_tasks diff --git a/api/tests/test_containers_integration_tests/libs/broadcast_channel/redis/test_channel.py b/api/tests/test_containers_integration_tests/libs/broadcast_channel/redis/test_channel.py new file mode 100644 index 0000000000..b7cb472713 --- /dev/null +++ b/api/tests/test_containers_integration_tests/libs/broadcast_channel/redis/test_channel.py @@ -0,0 +1,335 @@ +""" +Integration tests for Redis broadcast channel implementation using TestContainers. + +This test suite covers real Redis interactions including: +- Multiple producer/consumer scenarios +- Network failure scenarios +- Performance under load +- Real-world usage patterns +""" + +import threading +import time +import uuid +from collections.abc import Iterator +from concurrent.futures import ThreadPoolExecutor, as_completed + +import pytest +import redis +from testcontainers.redis import RedisContainer + +from libs.broadcast_channel.channel import BroadcastChannel, Subscription, Topic +from libs.broadcast_channel.exc import SubscriptionClosedError +from libs.broadcast_channel.redis.channel import BroadcastChannel as RedisBroadcastChannel + + +class TestRedisBroadcastChannelIntegration: + """Integration tests for Redis broadcast channel with real Redis instance.""" + + @pytest.fixture(scope="class") + def redis_container(self) -> Iterator[RedisContainer]: + """Create a Redis container for integration testing.""" + with RedisContainer(image="redis:6-alpine") as container: + yield container + + @pytest.fixture(scope="class") + def redis_client(self, redis_container: RedisContainer) -> redis.Redis: + """Create a Redis client connected to the test container.""" + host = redis_container.get_container_host_ip() + port = redis_container.get_exposed_port(6379) + return redis.Redis(host=host, port=port, decode_responses=False) + + @pytest.fixture + def broadcast_channel(self, redis_client: redis.Redis) -> BroadcastChannel: + """Create a BroadcastChannel instance with real Redis client.""" + return RedisBroadcastChannel(redis_client) + + @classmethod + def _get_test_topic_name(cls): + return f"test_topic_{uuid.uuid4()}" + + # ==================== Basic Functionality Tests ====================' + + def test_close_an_active_subscription_should_stop_iteration(self, broadcast_channel): + topic_name = self._get_test_topic_name() + topic = broadcast_channel.topic(topic_name) + subscription = topic.subscribe() + consuming_event = threading.Event() + + def consume(): + msgs = [] + consuming_event.set() + for msg in subscription: + msgs.append(msg) + return msgs + + with ThreadPoolExecutor(max_workers=1) as executor: + producer_future = executor.submit(consume) + consuming_event.wait() + subscription.close() + msgs = producer_future.result(timeout=1) + assert msgs == [] + + def test_end_to_end_messaging(self, broadcast_channel: BroadcastChannel): + """Test complete end-to-end messaging flow.""" + topic_name = "test-topic" + message = b"hello world" + + # Create producer and subscriber + topic = broadcast_channel.topic(topic_name) + producer = topic.as_producer() + subscription = topic.subscribe() + + # Publish and receive message + + def producer_thread(): + time.sleep(0.1) # Small delay to ensure subscriber is ready + producer.publish(message) + time.sleep(0.1) + subscription.close() + + def consumer_thread() -> list[bytes]: + received_messages = [] + for msg in subscription: + received_messages.append(msg) + return received_messages + + # Run producer and consumer + with ThreadPoolExecutor(max_workers=2) as executor: + producer_future = executor.submit(producer_thread) + consumer_future = executor.submit(consumer_thread) + + # Wait for completion + producer_future.result(timeout=5.0) + received_messages = consumer_future.result(timeout=5.0) + + assert len(received_messages) == 1 + assert received_messages[0] == message + + def test_multiple_subscribers_same_topic(self, broadcast_channel: BroadcastChannel): + """Test message broadcasting to multiple subscribers. + + This test ensures the publisher only sends after all subscribers have actually started + their Redis Pub/Sub subscriptions to avoid race conditions/flakiness. + """ + topic_name = "broadcast-topic" + message = b"broadcast message" + subscriber_count = 5 + + # Create producer and multiple subscribers + topic = broadcast_channel.topic(topic_name) + producer = topic.as_producer() + subscriptions = [topic.subscribe() for _ in range(subscriber_count)] + ready_events = [threading.Event() for _ in range(subscriber_count)] + + def producer_thread(): + # Wait for all subscribers to start (with a reasonable timeout) + deadline = time.time() + 5.0 + for ev in ready_events: + remaining = deadline - time.time() + if remaining <= 0: + break + ev.wait(timeout=max(0.0, remaining)) + # Now publish the message + producer.publish(message) + time.sleep(0.2) + for sub in subscriptions: + sub.close() + + def consumer_thread(subscription: Subscription, ready_event: threading.Event) -> list[bytes]: + received_msgs = [] + # Prime the subscription to ensure the underlying Pub/Sub is started + try: + _ = subscription.receive(0.01) + except SubscriptionClosedError: + ready_event.set() + return received_msgs + # Signal readiness after first receive returns (subscription started) + ready_event.set() + + while True: + try: + msg = subscription.receive(0.1) + except SubscriptionClosedError: + break + if msg is None: + continue + received_msgs.append(msg) + if len(received_msgs) >= 1: + break + return received_msgs + + # Run producer and consumers + with ThreadPoolExecutor(max_workers=subscriber_count + 1) as executor: + producer_future = executor.submit(producer_thread) + consumer_futures = [ + executor.submit(consumer_thread, subscription, ready_events[idx]) + for idx, subscription in enumerate(subscriptions) + ] + + # Wait for completion + producer_future.result(timeout=10.0) + msgs_by_consumers = [] + for future in as_completed(consumer_futures, timeout=10.0): + msgs_by_consumers.append(future.result()) + + # Close all subscriptions + for subscription in subscriptions: + subscription.close() + + # Verify all subscribers received the message + for msgs in msgs_by_consumers: + assert len(msgs) == 1 + assert msgs[0] == message + + def test_topic_isolation(self, broadcast_channel: BroadcastChannel): + """Test that different topics are isolated from each other.""" + topic1_name = "topic1" + topic2_name = "topic2" + message1 = b"message for topic1" + message2 = b"message for topic2" + + # Create producers and subscribers for different topics + topic1 = broadcast_channel.topic(topic1_name) + topic2 = broadcast_channel.topic(topic2_name) + + def producer_thread(): + time.sleep(0.1) + topic1.publish(message1) + topic2.publish(message2) + + def consumer_by_thread(topic: Topic) -> list[bytes]: + subscription = topic.subscribe() + received = [] + with subscription: + for msg in subscription: + received.append(msg) + if len(received) >= 1: + break + return received + + # Run all threads + with ThreadPoolExecutor(max_workers=3) as executor: + producer_future = executor.submit(producer_thread) + consumer1_future = executor.submit(consumer_by_thread, topic1) + consumer2_future = executor.submit(consumer_by_thread, topic2) + + # Wait for completion + producer_future.result(timeout=5.0) + received_by_topic1 = consumer1_future.result(timeout=5.0) + received_by_topic2 = consumer2_future.result(timeout=5.0) + + # Verify topic isolation + assert len(received_by_topic1) == 1 + assert len(received_by_topic2) == 1 + assert received_by_topic1[0] == message1 + assert received_by_topic2[0] == message2 + + # ==================== Performance Tests ==================== + + def test_concurrent_producers(self, broadcast_channel: BroadcastChannel): + """Test multiple producers publishing to the same topic.""" + topic_name = "concurrent-producers-topic" + producer_count = 5 + messages_per_producer = 5 + + topic = broadcast_channel.topic(topic_name) + subscription = topic.subscribe() + + expected_total = producer_count * messages_per_producer + consumer_ready = threading.Event() + + def producer_thread(producer_idx: int) -> set[bytes]: + producer = topic.as_producer() + produced = set() + for i in range(messages_per_producer): + message = f"producer_{producer_idx}_msg_{i}".encode() + produced.add(message) + producer.publish(message) + time.sleep(0.001) # Small delay to avoid overwhelming + return produced + + def consumer_thread() -> set[bytes]: + received_msgs: set[bytes] = set() + with subscription: + consumer_ready.set() + while True: + try: + msg = subscription.receive(timeout=0.1) + except SubscriptionClosedError: + break + if msg is None: + if len(received_msgs) >= expected_total: + break + else: + continue + + received_msgs.add(msg) + return received_msgs + + # Run producers and consumer + with ThreadPoolExecutor(max_workers=producer_count + 1) as executor: + consumer_future = executor.submit(consumer_thread) + consumer_ready.wait() + producer_futures = [executor.submit(producer_thread, i) for i in range(producer_count)] + + sent_msgs: set[bytes] = set() + # Wait for completion + for future in as_completed(producer_futures, timeout=30.0): + sent_msgs.update(future.result()) + + subscription.close() + consumer_received_msgs = consumer_future.result(timeout=30.0) + + # Verify message content + assert sent_msgs == consumer_received_msgs + + # ==================== Resource Management Tests ==================== + + def test_subscription_cleanup(self, broadcast_channel: BroadcastChannel, redis_client: redis.Redis): + """Test proper cleanup of subscription resources.""" + topic_name = "cleanup-test-topic" + + # Create multiple subscriptions + topic = broadcast_channel.topic(topic_name) + + def _consume(sub: Subscription): + for i in sub: + pass + + subscriptions = [] + for i in range(5): + subscription = topic.subscribe() + subscriptions.append(subscription) + + # Start all subscriptions + thread = threading.Thread(target=_consume, args=(subscription,)) + thread.start() + time.sleep(0.01) + + # Verify subscriptions are active + pubsub_info = redis_client.pubsub_numsub(topic_name) + # pubsub_numsub returns list of tuples, find our topic + topic_subscribers = 0 + for channel, count in pubsub_info: + # the channel name returned by redis is bytes. + if channel == topic_name.encode(): + topic_subscribers = count + break + assert topic_subscribers >= 5 + + # Close all subscriptions + for subscription in subscriptions: + subscription.close() + + # Wait a bit for cleanup + time.sleep(1) + + # Verify subscriptions are cleaned up + pubsub_info_after = redis_client.pubsub_numsub(topic_name) + topic_subscribers_after = 0 + for channel, count in pubsub_info_after: + if channel == topic_name.encode(): + topic_subscribers_after = count + break + assert topic_subscribers_after == 0 diff --git a/api/tests/test_containers_integration_tests/libs/broadcast_channel/redis/test_sharded_channel.py b/api/tests/test_containers_integration_tests/libs/broadcast_channel/redis/test_sharded_channel.py new file mode 100644 index 0000000000..ea61747ba2 --- /dev/null +++ b/api/tests/test_containers_integration_tests/libs/broadcast_channel/redis/test_sharded_channel.py @@ -0,0 +1,317 @@ +""" +Integration tests for Redis sharded broadcast channel implementation using TestContainers. + +Covers real Redis 7+ sharded pub/sub interactions including: +- Multiple producer/consumer scenarios +- Topic isolation +- Concurrency under load +- Resource cleanup accounting via PUBSUB SHARDNUMSUB +""" + +import threading +import time +import uuid +from collections.abc import Iterator +from concurrent.futures import ThreadPoolExecutor, as_completed + +import pytest +import redis +from testcontainers.redis import RedisContainer + +from libs.broadcast_channel.channel import BroadcastChannel, Subscription, Topic +from libs.broadcast_channel.exc import SubscriptionClosedError +from libs.broadcast_channel.redis.sharded_channel import ( + ShardedRedisBroadcastChannel, +) + + +class TestShardedRedisBroadcastChannelIntegration: + """Integration tests for Redis sharded broadcast channel with real Redis 7 instance.""" + + @pytest.fixture(scope="class") + def redis_container(self) -> Iterator[RedisContainer]: + """Create a Redis 7 container for integration testing (required for sharded pub/sub).""" + # Redis 7+ is required for SPUBLISH/SSUBSCRIBE + with RedisContainer(image="redis:7-alpine") as container: + yield container + + @pytest.fixture(scope="class") + def redis_client(self, redis_container: RedisContainer) -> redis.Redis: + """Create a Redis client connected to the test container.""" + host = redis_container.get_container_host_ip() + port = redis_container.get_exposed_port(6379) + return redis.Redis(host=host, port=port, decode_responses=False) + + @pytest.fixture + def broadcast_channel(self, redis_client: redis.Redis) -> BroadcastChannel: + """Create a ShardedRedisBroadcastChannel instance with real Redis client.""" + return ShardedRedisBroadcastChannel(redis_client) + + @classmethod + def _get_test_topic_name(cls) -> str: + return f"test_sharded_topic_{uuid.uuid4()}" + + # ==================== Basic Functionality Tests ==================== + + def test_close_an_active_subscription_should_stop_iteration(self, broadcast_channel: BroadcastChannel): + topic_name = self._get_test_topic_name() + topic = broadcast_channel.topic(topic_name) + subscription = topic.subscribe() + consuming_event = threading.Event() + + def consume(): + msgs = [] + consuming_event.set() + for msg in subscription: + msgs.append(msg) + return msgs + + with ThreadPoolExecutor(max_workers=1) as executor: + consumer_future = executor.submit(consume) + consuming_event.wait() + subscription.close() + msgs = consumer_future.result(timeout=2) + assert msgs == [] + + def test_end_to_end_messaging(self, broadcast_channel: BroadcastChannel): + """Test complete end-to-end messaging flow (sharded).""" + topic_name = self._get_test_topic_name() + message = b"hello sharded world" + + topic = broadcast_channel.topic(topic_name) + producer = topic.as_producer() + subscription = topic.subscribe() + + def producer_thread(): + time.sleep(0.1) # Small delay to ensure subscriber is ready + producer.publish(message) + time.sleep(0.1) + subscription.close() + + def consumer_thread() -> list[bytes]: + received_messages = [] + for msg in subscription: + received_messages.append(msg) + return received_messages + + with ThreadPoolExecutor(max_workers=2) as executor: + producer_future = executor.submit(producer_thread) + consumer_future = executor.submit(consumer_thread) + + producer_future.result(timeout=5.0) + received_messages = consumer_future.result(timeout=5.0) + + assert len(received_messages) == 1 + assert received_messages[0] == message + + def test_multiple_subscribers_same_topic(self, broadcast_channel: BroadcastChannel): + """Test message broadcasting to multiple sharded subscribers.""" + topic_name = self._get_test_topic_name() + message = b"broadcast sharded message" + subscriber_count = 5 + + topic = broadcast_channel.topic(topic_name) + producer = topic.as_producer() + subscriptions = [topic.subscribe() for _ in range(subscriber_count)] + + def producer_thread(): + time.sleep(0.2) # Allow all subscribers to connect + producer.publish(message) + time.sleep(0.2) + for sub in subscriptions: + sub.close() + + def consumer_thread(subscription: Subscription) -> list[bytes]: + received_msgs = [] + while True: + try: + msg = subscription.receive(0.1) + except SubscriptionClosedError: + break + if msg is None: + continue + received_msgs.append(msg) + if len(received_msgs) >= 1: + break + return received_msgs + + with ThreadPoolExecutor(max_workers=subscriber_count + 1) as executor: + producer_future = executor.submit(producer_thread) + consumer_futures = [executor.submit(consumer_thread, subscription) for subscription in subscriptions] + + producer_future.result(timeout=10.0) + msgs_by_consumers = [] + for future in as_completed(consumer_futures, timeout=10.0): + msgs_by_consumers.append(future.result()) + + for subscription in subscriptions: + subscription.close() + + for msgs in msgs_by_consumers: + assert len(msgs) == 1 + assert msgs[0] == message + + def test_topic_isolation(self, broadcast_channel: BroadcastChannel): + """Test that different sharded topics are isolated from each other.""" + topic1_name = self._get_test_topic_name() + topic2_name = self._get_test_topic_name() + message1 = b"message for sharded topic1" + message2 = b"message for sharded topic2" + + topic1 = broadcast_channel.topic(topic1_name) + topic2 = broadcast_channel.topic(topic2_name) + + def producer_thread(): + time.sleep(0.1) + topic1.publish(message1) + topic2.publish(message2) + + def consumer_by_thread(topic: Topic) -> list[bytes]: + subscription = topic.subscribe() + received = [] + with subscription: + for msg in subscription: + received.append(msg) + if len(received) >= 1: + break + return received + + with ThreadPoolExecutor(max_workers=3) as executor: + producer_future = executor.submit(producer_thread) + consumer1_future = executor.submit(consumer_by_thread, topic1) + consumer2_future = executor.submit(consumer_by_thread, topic2) + + producer_future.result(timeout=5.0) + received_by_topic1 = consumer1_future.result(timeout=5.0) + received_by_topic2 = consumer2_future.result(timeout=5.0) + + assert len(received_by_topic1) == 1 + assert len(received_by_topic2) == 1 + assert received_by_topic1[0] == message1 + assert received_by_topic2[0] == message2 + + # ==================== Performance / Concurrency ==================== + + def test_concurrent_producers(self, broadcast_channel: BroadcastChannel): + """Test multiple producers publishing to the same sharded topic.""" + topic_name = self._get_test_topic_name() + producer_count = 5 + messages_per_producer = 5 + + topic = broadcast_channel.topic(topic_name) + subscription = topic.subscribe() + + expected_total = producer_count * messages_per_producer + consumer_ready = threading.Event() + + def producer_thread(producer_idx: int) -> set[bytes]: + producer = topic.as_producer() + produced = set() + for i in range(messages_per_producer): + message = f"producer_{producer_idx}_msg_{i}".encode() + produced.add(message) + producer.publish(message) + time.sleep(0.001) + return produced + + def consumer_thread() -> set[bytes]: + received_msgs: set[bytes] = set() + with subscription: + consumer_ready.set() + while True: + try: + msg = subscription.receive(timeout=0.1) + except SubscriptionClosedError: + break + if msg is None: + if len(received_msgs) >= expected_total: + break + else: + continue + received_msgs.add(msg) + return received_msgs + + with ThreadPoolExecutor(max_workers=producer_count + 1) as executor: + consumer_future = executor.submit(consumer_thread) + consumer_ready.wait() + producer_futures = [executor.submit(producer_thread, i) for i in range(producer_count)] + + sent_msgs: set[bytes] = set() + for future in as_completed(producer_futures, timeout=30.0): + sent_msgs.update(future.result()) + + subscription.close() + consumer_received_msgs = consumer_future.result(timeout=30.0) + + assert sent_msgs == consumer_received_msgs + + # ==================== Resource Management ==================== + + def _get_sharded_numsub(self, redis_client: redis.Redis, topic_name: str) -> int: + """Return number of sharded subscribers for a given topic using PUBSUB SHARDNUMSUB. + + Redis returns a flat list like [channel1, count1, channel2, count2, ...]. + We request a single channel, so parse accordingly. + """ + try: + res = redis_client.execute_command("PUBSUB", "SHARDNUMSUB", topic_name) + except Exception: + return 0 + # Normalize different possible return shapes from drivers + if isinstance(res, (list, tuple)): + # Expect [channel, count] (bytes/str, int) + if len(res) >= 2: + key = res[0] + cnt = res[1] + if key == topic_name or (isinstance(key, (bytes, bytearray)) and key == topic_name.encode()): + try: + return int(cnt) + except Exception: + return 0 + # Fallback parse pairs + count = 0 + for i in range(0, len(res) - 1, 2): + key = res[i] + cnt = res[i + 1] + if key == topic_name or (isinstance(key, (bytes, bytearray)) and key == topic_name.encode()): + try: + count = int(cnt) + except Exception: + count = 0 + break + return count + return 0 + + def test_subscription_cleanup(self, broadcast_channel: BroadcastChannel, redis_client: redis.Redis): + """Test proper cleanup of sharded subscription resources via SHARDNUMSUB.""" + topic_name = self._get_test_topic_name() + + topic = broadcast_channel.topic(topic_name) + + def _consume(sub: Subscription): + for _ in sub: + pass + + subscriptions = [] + for _ in range(5): + subscription = topic.subscribe() + subscriptions.append(subscription) + + thread = threading.Thread(target=_consume, args=(subscription,)) + thread.start() + time.sleep(0.01) + + # Verify subscriptions are active using SHARDNUMSUB + topic_subscribers = self._get_sharded_numsub(redis_client, topic_name) + assert topic_subscribers >= 5 + + # Close all subscriptions + for subscription in subscriptions: + subscription.close() + + # Wait a bit for cleanup + time.sleep(1) + + # Verify subscriptions are cleaned up + topic_subscribers_after = self._get_sharded_numsub(redis_client, topic_name) + assert topic_subscribers_after == 0 diff --git a/api/tests/test_containers_integration_tests/services/test_api_based_extension_service.py b/api/tests/test_containers_integration_tests/services/test_api_based_extension_service.py index 6cd8337ff9..2cea24d085 100644 --- a/api/tests/test_containers_integration_tests/services/test_api_based_extension_service.py +++ b/api/tests/test_containers_integration_tests/services/test_api_based_extension_service.py @@ -69,13 +69,14 @@ class TestAPIBasedExtensionService: account, tenant = self._create_test_account_and_tenant( db_session_with_containers, mock_external_service_dependencies ) - + assert tenant is not None # Setup extension data - extension_data = APIBasedExtension() - extension_data.tenant_id = tenant.id - extension_data.name = fake.company() - extension_data.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data.api_key = fake.password(length=20) + extension_data = APIBasedExtension( + tenant_id=tenant.id, + name=fake.company(), + api_endpoint=f"https://{fake.domain_name()}/api", + api_key=fake.password(length=20), + ) # Save extension saved_extension = APIBasedExtensionService.save(extension_data) @@ -105,13 +106,14 @@ class TestAPIBasedExtensionService: account, tenant = self._create_test_account_and_tenant( db_session_with_containers, mock_external_service_dependencies ) - + assert tenant is not None # Test empty name - extension_data = APIBasedExtension() - extension_data.tenant_id = tenant.id - extension_data.name = "" - extension_data.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data.api_key = fake.password(length=20) + extension_data = APIBasedExtension( + tenant_id=tenant.id, + name="", + api_endpoint=f"https://{fake.domain_name()}/api", + api_key=fake.password(length=20), + ) with pytest.raises(ValueError, match="name must not be empty"): APIBasedExtensionService.save(extension_data) @@ -141,12 +143,14 @@ class TestAPIBasedExtensionService: # Create multiple extensions extensions = [] + assert tenant is not None for i in range(3): - extension_data = APIBasedExtension() - extension_data.tenant_id = tenant.id - extension_data.name = f"Extension {i}: {fake.company()}" - extension_data.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data.api_key = fake.password(length=20) + extension_data = APIBasedExtension( + tenant_id=tenant.id, + name=f"Extension {i}: {fake.company()}", + api_endpoint=f"https://{fake.domain_name()}/api", + api_key=fake.password(length=20), + ) saved_extension = APIBasedExtensionService.save(extension_data) extensions.append(saved_extension) @@ -173,13 +177,14 @@ class TestAPIBasedExtensionService: account, tenant = self._create_test_account_and_tenant( db_session_with_containers, mock_external_service_dependencies ) - + assert tenant is not None # Create an extension - extension_data = APIBasedExtension() - extension_data.tenant_id = tenant.id - extension_data.name = fake.company() - extension_data.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data.api_key = fake.password(length=20) + extension_data = APIBasedExtension( + tenant_id=tenant.id, + name=fake.company(), + api_endpoint=f"https://{fake.domain_name()}/api", + api_key=fake.password(length=20), + ) created_extension = APIBasedExtensionService.save(extension_data) @@ -217,13 +222,14 @@ class TestAPIBasedExtensionService: account, tenant = self._create_test_account_and_tenant( db_session_with_containers, mock_external_service_dependencies ) - + assert tenant is not None # Create an extension first - extension_data = APIBasedExtension() - extension_data.tenant_id = tenant.id - extension_data.name = fake.company() - extension_data.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data.api_key = fake.password(length=20) + extension_data = APIBasedExtension( + tenant_id=tenant.id, + name=fake.company(), + api_endpoint=f"https://{fake.domain_name()}/api", + api_key=fake.password(length=20), + ) created_extension = APIBasedExtensionService.save(extension_data) extension_id = created_extension.id @@ -245,22 +251,23 @@ class TestAPIBasedExtensionService: account, tenant = self._create_test_account_and_tenant( db_session_with_containers, mock_external_service_dependencies ) - + assert tenant is not None # Create first extension - extension_data1 = APIBasedExtension() - extension_data1.tenant_id = tenant.id - extension_data1.name = "Test Extension" - extension_data1.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data1.api_key = fake.password(length=20) + extension_data1 = APIBasedExtension( + tenant_id=tenant.id, + name="Test Extension", + api_endpoint=f"https://{fake.domain_name()}/api", + api_key=fake.password(length=20), + ) APIBasedExtensionService.save(extension_data1) - # Try to create second extension with same name - extension_data2 = APIBasedExtension() - extension_data2.tenant_id = tenant.id - extension_data2.name = "Test Extension" # Same name - extension_data2.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data2.api_key = fake.password(length=20) + extension_data2 = APIBasedExtension( + tenant_id=tenant.id, + name="Test Extension", # Same name + api_endpoint=f"https://{fake.domain_name()}/api", + api_key=fake.password(length=20), + ) with pytest.raises(ValueError, match="name must be unique, it is already existed"): APIBasedExtensionService.save(extension_data2) @@ -273,13 +280,14 @@ class TestAPIBasedExtensionService: account, tenant = self._create_test_account_and_tenant( db_session_with_containers, mock_external_service_dependencies ) - + assert tenant is not None # Create initial extension - extension_data = APIBasedExtension() - extension_data.tenant_id = tenant.id - extension_data.name = fake.company() - extension_data.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data.api_key = fake.password(length=20) + extension_data = APIBasedExtension( + tenant_id=tenant.id, + name=fake.company(), + api_endpoint=f"https://{fake.domain_name()}/api", + api_key=fake.password(length=20), + ) created_extension = APIBasedExtensionService.save(extension_data) @@ -330,13 +338,14 @@ class TestAPIBasedExtensionService: mock_external_service_dependencies["requestor_instance"].request.side_effect = ValueError( "connection error: request timeout" ) - + assert tenant is not None # Setup extension data - extension_data = APIBasedExtension() - extension_data.tenant_id = tenant.id - extension_data.name = fake.company() - extension_data.api_endpoint = "https://invalid-endpoint.com/api" - extension_data.api_key = fake.password(length=20) + extension_data = APIBasedExtension( + tenant_id=tenant.id, + name=fake.company(), + api_endpoint="https://invalid-endpoint.com/api", + api_key=fake.password(length=20), + ) # Try to save extension with connection error with pytest.raises(ValueError, match="connection error: request timeout"): @@ -352,13 +361,14 @@ class TestAPIBasedExtensionService: account, tenant = self._create_test_account_and_tenant( db_session_with_containers, mock_external_service_dependencies ) - + assert tenant is not None # Setup extension data with short API key - extension_data = APIBasedExtension() - extension_data.tenant_id = tenant.id - extension_data.name = fake.company() - extension_data.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data.api_key = "1234" # Less than 5 characters + extension_data = APIBasedExtension( + tenant_id=tenant.id, + name=fake.company(), + api_endpoint=f"https://{fake.domain_name()}/api", + api_key="1234", # Less than 5 characters + ) # Try to save extension with short API key with pytest.raises(ValueError, match="api_key must be at least 5 characters"): @@ -372,13 +382,14 @@ class TestAPIBasedExtensionService: account, tenant = self._create_test_account_and_tenant( db_session_with_containers, mock_external_service_dependencies ) - + assert tenant is not None # Test with None values - extension_data = APIBasedExtension() - extension_data.tenant_id = tenant.id - extension_data.name = None - extension_data.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data.api_key = fake.password(length=20) + extension_data = APIBasedExtension( + tenant_id=tenant.id, + name=None, # type: ignore # why str become None here??? + api_endpoint=f"https://{fake.domain_name()}/api", + api_key=fake.password(length=20), + ) with pytest.raises(ValueError, match="name must not be empty"): APIBasedExtensionService.save(extension_data) @@ -424,13 +435,14 @@ class TestAPIBasedExtensionService: # Mock invalid ping response mock_external_service_dependencies["requestor_instance"].request.return_value = {"result": "invalid"} - + assert tenant is not None # Setup extension data - extension_data = APIBasedExtension() - extension_data.tenant_id = tenant.id - extension_data.name = fake.company() - extension_data.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data.api_key = fake.password(length=20) + extension_data = APIBasedExtension( + tenant_id=tenant.id, + name=fake.company(), + api_endpoint=f"https://{fake.domain_name()}/api", + api_key=fake.password(length=20), + ) # Try to save extension with invalid ping response with pytest.raises(ValueError, match="{'result': 'invalid'}"): @@ -447,13 +459,14 @@ class TestAPIBasedExtensionService: # Mock ping response without result field mock_external_service_dependencies["requestor_instance"].request.return_value = {"status": "ok"} - + assert tenant is not None # Setup extension data - extension_data = APIBasedExtension() - extension_data.tenant_id = tenant.id - extension_data.name = fake.company() - extension_data.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data.api_key = fake.password(length=20) + extension_data = APIBasedExtension( + tenant_id=tenant.id, + name=fake.company(), + api_endpoint=f"https://{fake.domain_name()}/api", + api_key=fake.password(length=20), + ) # Try to save extension with missing ping result with pytest.raises(ValueError, match="{'status': 'ok'}"): @@ -472,13 +485,14 @@ class TestAPIBasedExtensionService: account2, tenant2 = self._create_test_account_and_tenant( db_session_with_containers, mock_external_service_dependencies ) - + assert tenant1 is not None # Create extension in first tenant - extension_data = APIBasedExtension() - extension_data.tenant_id = tenant1.id - extension_data.name = fake.company() - extension_data.api_endpoint = f"https://{fake.domain_name()}/api" - extension_data.api_key = fake.password(length=20) + extension_data = APIBasedExtension( + tenant_id=tenant1.id, + name=fake.company(), + api_endpoint=f"https://{fake.domain_name()}/api", + api_key=fake.password(length=20), + ) created_extension = APIBasedExtensionService.save(extension_data) diff --git a/api/tests/test_containers_integration_tests/services/test_app_generate_service.py b/api/tests/test_containers_integration_tests/services/test_app_generate_service.py index 9386687a04..0f9ed94017 100644 --- a/api/tests/test_containers_integration_tests/services/test_app_generate_service.py +++ b/api/tests/test_containers_integration_tests/services/test_app_generate_service.py @@ -9,7 +9,6 @@ from models.model import EndUser from models.workflow import Workflow from services.app_generate_service import AppGenerateService from services.errors.app import WorkflowIdFormatError, WorkflowNotFoundError -from services.errors.llm import InvokeRateLimitError class TestAppGenerateService: @@ -19,10 +18,9 @@ class TestAppGenerateService: def mock_external_service_dependencies(self): """Mock setup for external service dependencies.""" with ( - patch("services.app_generate_service.BillingService") as mock_billing_service, + patch("services.billing_service.BillingService") as mock_billing_service, patch("services.app_generate_service.WorkflowService") as mock_workflow_service, patch("services.app_generate_service.RateLimit") as mock_rate_limit, - patch("services.app_generate_service.RateLimiter") as mock_rate_limiter, patch("services.app_generate_service.CompletionAppGenerator") as mock_completion_generator, patch("services.app_generate_service.ChatAppGenerator") as mock_chat_generator, patch("services.app_generate_service.AgentChatAppGenerator") as mock_agent_chat_generator, @@ -30,9 +28,13 @@ class TestAppGenerateService: patch("services.app_generate_service.WorkflowAppGenerator") as mock_workflow_generator, patch("services.account_service.FeatureService") as mock_account_feature_service, patch("services.app_generate_service.dify_config") as mock_dify_config, + patch("configs.dify_config") as mock_global_dify_config, ): # Setup default mock returns for billing service - mock_billing_service.get_info.return_value = {"subscription": {"plan": "sandbox"}} + mock_billing_service.update_tenant_feature_plan_usage.return_value = { + "result": "success", + "history_id": "test_history_id", + } # Setup default mock returns for workflow service mock_workflow_service_instance = mock_workflow_service.return_value @@ -46,10 +48,6 @@ class TestAppGenerateService: mock_rate_limit_instance.generate.return_value = ["test_response"] mock_rate_limit_instance.exit.return_value = None - mock_rate_limiter_instance = mock_rate_limiter.return_value - mock_rate_limiter_instance.is_rate_limited.return_value = False - mock_rate_limiter_instance.increment_rate_limit.return_value = None - # Setup default mock returns for app generators mock_completion_generator_instance = mock_completion_generator.return_value mock_completion_generator_instance.generate.return_value = ["completion_response"] @@ -86,11 +84,14 @@ class TestAppGenerateService: mock_dify_config.APP_MAX_ACTIVE_REQUESTS = 100 mock_dify_config.APP_DAILY_RATE_LIMIT = 1000 + mock_global_dify_config.BILLING_ENABLED = False + mock_global_dify_config.APP_MAX_ACTIVE_REQUESTS = 100 + mock_global_dify_config.APP_DAILY_RATE_LIMIT = 1000 + yield { "billing_service": mock_billing_service, "workflow_service": mock_workflow_service, "rate_limit": mock_rate_limit, - "rate_limiter": mock_rate_limiter, "completion_generator": mock_completion_generator, "chat_generator": mock_chat_generator, "agent_chat_generator": mock_agent_chat_generator, @@ -98,6 +99,7 @@ class TestAppGenerateService: "workflow_generator": mock_workflow_generator, "account_feature_service": mock_account_feature_service, "dify_config": mock_dify_config, + "global_dify_config": mock_global_dify_config, } def _create_test_app_and_account(self, db_session_with_containers, mock_external_service_dependencies, mode="chat"): @@ -428,13 +430,9 @@ class TestAppGenerateService: db_session_with_containers, mock_external_service_dependencies, mode="completion" ) - # Setup billing service mock for sandbox plan - mock_external_service_dependencies["billing_service"].get_info.return_value = { - "subscription": {"plan": "sandbox"} - } - # Set BILLING_ENABLED to True for this test mock_external_service_dependencies["dify_config"].BILLING_ENABLED = True + mock_external_service_dependencies["global_dify_config"].BILLING_ENABLED = True # Setup test arguments args = {"inputs": {"query": fake.text(max_nb_chars=50)}, "response_mode": "streaming"} @@ -447,41 +445,8 @@ class TestAppGenerateService: # Verify the result assert result == ["test_response"] - # Verify billing service was called - mock_external_service_dependencies["billing_service"].get_info.assert_called_once_with(app.tenant_id) - - def test_generate_with_rate_limit_exceeded(self, db_session_with_containers, mock_external_service_dependencies): - """ - Test generation when rate limit is exceeded. - """ - fake = Faker() - app, account = self._create_test_app_and_account( - db_session_with_containers, mock_external_service_dependencies, mode="completion" - ) - - # Setup billing service mock for sandbox plan - mock_external_service_dependencies["billing_service"].get_info.return_value = { - "subscription": {"plan": "sandbox"} - } - - # Set BILLING_ENABLED to True for this test - mock_external_service_dependencies["dify_config"].BILLING_ENABLED = True - - # Setup system rate limiter to return rate limited - with patch("services.app_generate_service.AppGenerateService.system_rate_limiter") as mock_system_rate_limiter: - mock_system_rate_limiter.is_rate_limited.return_value = True - - # Setup test arguments - args = {"inputs": {"query": fake.text(max_nb_chars=50)}, "response_mode": "streaming"} - - # Execute the method under test and expect rate limit error - with pytest.raises(InvokeRateLimitError) as exc_info: - AppGenerateService.generate( - app_model=app, user=account, args=args, invoke_from=InvokeFrom.SERVICE_API, streaming=True - ) - - # Verify error message - assert "Rate limit exceeded" in str(exc_info.value) + # Verify billing service was called to consume quota + mock_external_service_dependencies["billing_service"].update_tenant_feature_plan_usage.assert_called_once() def test_generate_with_invalid_app_mode(self, db_session_with_containers, mock_external_service_dependencies): """ diff --git a/api/tests/test_containers_integration_tests/services/test_feature_service.py b/api/tests/test_containers_integration_tests/services/test_feature_service.py index 8bd5440411..40380b09d2 100644 --- a/api/tests/test_containers_integration_tests/services/test_feature_service.py +++ b/api/tests/test_containers_integration_tests/services/test_feature_service.py @@ -3,6 +3,7 @@ from unittest.mock import patch import pytest from faker import Faker +from enums.cloud_plan import CloudPlan from services.feature_service import FeatureModel, FeatureService, KnowledgeRateLimitModel, SystemFeatureModel @@ -173,7 +174,7 @@ class TestFeatureService: # Set mock return value inside the patch context mock_external_service_dependencies["billing_service"].get_info.return_value = { "enabled": True, - "subscription": {"plan": "sandbox", "interval": "monthly", "education": False}, + "subscription": {"plan": CloudPlan.SANDBOX, "interval": "monthly", "education": False}, "members": {"size": 1, "limit": 3}, "apps": {"size": 1, "limit": 5}, "vector_space": {"size": 1, "limit": 2}, @@ -189,7 +190,7 @@ class TestFeatureService: result = FeatureService.get_features(tenant_id) # Assert: Verify sandbox-specific limitations - assert result.billing.subscription.plan == "sandbox" + assert result.billing.subscription.plan == CloudPlan.SANDBOX assert result.education.activated is False # Verify sandbox limitations diff --git a/api/tests/test_containers_integration_tests/services/test_file_service.py b/api/tests/test_containers_integration_tests/services/test_file_service.py index 4c94e42f3e..93516a0030 100644 --- a/api/tests/test_containers_integration_tests/services/test_file_service.py +++ b/api/tests/test_containers_integration_tests/services/test_file_service.py @@ -11,7 +11,7 @@ from configs import dify_config from models import Account, Tenant from models.enums import CreatorUserRole from models.model import EndUser, UploadFile -from services.errors.file import FileTooLargeError, UnsupportedFileTypeError +from services.errors.file import BlockedFileExtensionError, FileTooLargeError, UnsupportedFileTypeError from services.file_service import FileService @@ -943,3 +943,150 @@ class TestFileService: # Should have the signed URL when source_url is empty assert upload_file2.source_url == "https://example.com/signed-url" + + # Test file extension blacklist + def test_upload_file_blocked_extension( + self, db_session_with_containers, engine, mock_external_service_dependencies + ): + """ + Test file upload with blocked extension. + """ + fake = Faker() + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock blacklist configuration by patching the inner field + with patch.object(dify_config, "inner_UPLOAD_FILE_EXTENSION_BLACKLIST", "exe,bat,sh"): + filename = "malware.exe" + content = b"test content" + mimetype = "application/x-msdownload" + + with pytest.raises(BlockedFileExtensionError): + FileService(engine).upload_file( + filename=filename, + content=content, + mimetype=mimetype, + user=account, + ) + + def test_upload_file_blocked_extension_case_insensitive( + self, db_session_with_containers, engine, mock_external_service_dependencies + ): + """ + Test file upload with blocked extension (case insensitive). + """ + fake = Faker() + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock blacklist configuration by patching the inner field + with patch.object(dify_config, "inner_UPLOAD_FILE_EXTENSION_BLACKLIST", "exe,bat"): + # Test with uppercase extension + filename = "malware.EXE" + content = b"test content" + mimetype = "application/x-msdownload" + + with pytest.raises(BlockedFileExtensionError): + FileService(engine).upload_file( + filename=filename, + content=content, + mimetype=mimetype, + user=account, + ) + + def test_upload_file_not_in_blacklist(self, db_session_with_containers, engine, mock_external_service_dependencies): + """ + Test file upload with extension not in blacklist. + """ + fake = Faker() + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock blacklist configuration by patching the inner field + with patch.object(dify_config, "inner_UPLOAD_FILE_EXTENSION_BLACKLIST", "exe,bat,sh"): + filename = "document.pdf" + content = b"test content" + mimetype = "application/pdf" + + upload_file = FileService(engine).upload_file( + filename=filename, + content=content, + mimetype=mimetype, + user=account, + ) + + assert upload_file is not None + assert upload_file.name == filename + assert upload_file.extension == "pdf" + + def test_upload_file_empty_blacklist(self, db_session_with_containers, engine, mock_external_service_dependencies): + """ + Test file upload with empty blacklist (default behavior). + """ + fake = Faker() + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock empty blacklist configuration by patching the inner field + with patch.object(dify_config, "inner_UPLOAD_FILE_EXTENSION_BLACKLIST", ""): + # Should allow all file types when blacklist is empty + filename = "script.sh" + content = b"#!/bin/bash\necho test" + mimetype = "application/x-sh" + + upload_file = FileService(engine).upload_file( + filename=filename, + content=content, + mimetype=mimetype, + user=account, + ) + + assert upload_file is not None + assert upload_file.extension == "sh" + + def test_upload_file_multiple_blocked_extensions( + self, db_session_with_containers, engine, mock_external_service_dependencies + ): + """ + Test file upload with multiple blocked extensions. + """ + fake = Faker() + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock blacklist with multiple extensions by patching the inner field + blacklist_str = "exe,bat,cmd,com,scr,vbs,ps1,msi,dll" + with patch.object(dify_config, "inner_UPLOAD_FILE_EXTENSION_BLACKLIST", blacklist_str): + for ext in blacklist_str.split(","): + filename = f"malware.{ext}" + content = b"test content" + mimetype = "application/octet-stream" + + with pytest.raises(BlockedFileExtensionError): + FileService(engine).upload_file( + filename=filename, + content=content, + mimetype=mimetype, + user=account, + ) + + def test_upload_file_no_extension_with_blacklist( + self, db_session_with_containers, engine, mock_external_service_dependencies + ): + """ + Test file upload with no extension when blacklist is configured. + """ + fake = Faker() + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + # Mock blacklist configuration by patching the inner field + with patch.object(dify_config, "inner_UPLOAD_FILE_EXTENSION_BLACKLIST", "exe,bat"): + # Files with no extension should not be blocked + filename = "README" + content = b"test content" + mimetype = "text/plain" + + upload_file = FileService(engine).upload_file( + filename=filename, + content=content, + mimetype=mimetype, + user=account, + ) + + assert upload_file is not None + assert upload_file.extension == "" diff --git a/api/tests/test_containers_integration_tests/services/test_webapp_auth_service.py b/api/tests/test_containers_integration_tests/services/test_webapp_auth_service.py index 73e622b061..72b119b4ff 100644 --- a/api/tests/test_containers_integration_tests/services/test_webapp_auth_service.py +++ b/api/tests/test_containers_integration_tests/services/test_webapp_auth_service.py @@ -35,9 +35,7 @@ class TestWebAppAuthService: mock_enterprise_service.WebAppAuth.get_app_access_mode_by_id.return_value = type( "MockWebAppAuth", (), {"access_mode": "private"} )() - mock_enterprise_service.WebAppAuth.get_app_access_mode_by_code.return_value = type( - "MockWebAppAuth", (), {"access_mode": "private"} - )() + # Note: get_app_access_mode_by_code method was removed in refactoring yield { "passport_service": mock_passport_service, diff --git a/api/tests/test_containers_integration_tests/services/test_webhook_service.py b/api/tests/test_containers_integration_tests/services/test_webhook_service.py index e1056718e6..8328db950c 100644 --- a/api/tests/test_containers_integration_tests/services/test_webhook_service.py +++ b/api/tests/test_containers_integration_tests/services/test_webhook_service.py @@ -7,8 +7,10 @@ from faker import Faker from flask import Flask from werkzeug.datastructures import FileStorage +from models.enums import AppTriggerStatus, AppTriggerType from models.model import App -from models.workflow import Workflow, WorkflowWebhookTrigger +from models.trigger import AppTrigger, WorkflowWebhookTrigger +from models.workflow import Workflow from services.account_service import AccountService, TenantService from services.trigger.webhook_service import WebhookService @@ -20,9 +22,9 @@ class TestWebhookService: def mock_external_dependencies(self): """Mock external service dependencies.""" with ( - patch("services.webhook_service.AsyncWorkflowService") as mock_async_service, - patch("services.webhook_service.ToolFileManager") as mock_tool_file_manager, - patch("services.webhook_service.file_factory") as mock_file_factory, + patch("services.trigger.webhook_service.AsyncWorkflowService") as mock_async_service, + patch("services.trigger.webhook_service.ToolFileManager") as mock_tool_file_manager, + patch("services.trigger.webhook_service.file_factory") as mock_file_factory, patch("services.account_service.FeatureService") as mock_feature_service, ): # Mock ToolFileManager @@ -65,6 +67,7 @@ class TestWebhookService: ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant + assert tenant is not None # Create app app = App( @@ -129,10 +132,23 @@ class TestWebhookService: app_id=app.id, node_id="webhook_node", tenant_id=tenant.id, - webhook_id=webhook_id, + webhook_id=str(webhook_id), created_by=account.id, ) db_session_with_containers.add(webhook_trigger) + db_session_with_containers.flush() + + # Create app trigger (required for non-debug mode) + app_trigger = AppTrigger( + tenant_id=tenant.id, + app_id=app.id, + node_id="webhook_node", + trigger_type=AppTriggerType.TRIGGER_WEBHOOK, + provider_name="webhook", + title="Test Webhook", + status=AppTriggerStatus.ENABLED, + ) + db_session_with_containers.add(app_trigger) db_session_with_containers.commit() return { @@ -142,6 +158,7 @@ class TestWebhookService: "workflow": workflow, "webhook_trigger": webhook_trigger, "webhook_id": webhook_id, + "app_trigger": app_trigger, } def test_get_webhook_trigger_and_workflow_success(self, test_data, flask_app_with_containers): @@ -244,106 +261,166 @@ class TestWebhookService: assert webhook_data["method"] == "POST" assert webhook_data["body"]["raw"] == "raw text content" - def test_validate_webhook_request_success(self): - """Test successful webhook request validation.""" - webhook_data = { - "method": "POST", - "headers": {"Authorization": "Bearer token", "Content-Type": "application/json"}, - "query_params": {"version": "1"}, - "body": {"message": "hello"}, - "files": {}, - } + def test_extract_and_validate_webhook_request_success(self): + """Test successful webhook request validation and type conversion.""" + app = Flask(__name__) - node_config = { - "data": { - "method": "post", - "headers": [{"name": "Authorization", "required": True}, {"name": "Content-Type", "required": False}], - "params": [{"name": "version", "required": True}], - "body": [{"name": "message", "type": "string", "required": True}], + with app.test_request_context( + "/webhook", + method="POST", + headers={"Content-Type": "application/json", "Authorization": "Bearer token"}, + query_string="version=1", + json={"message": "hello"}, + ): + webhook_trigger = MagicMock() + node_config = { + "data": { + "method": "post", + "content_type": "application/json", + "headers": [ + {"name": "Authorization", "required": True}, + {"name": "Content-Type", "required": False}, + ], + "params": [{"name": "version", "required": True}], + "body": [{"name": "message", "type": "string", "required": True}], + } } - } - result = WebhookService.validate_webhook_request(webhook_data, node_config) + result = WebhookService.extract_and_validate_webhook_data(webhook_trigger, node_config) - assert result["valid"] is True + assert result["headers"]["Authorization"] == "Bearer token" + assert result["query_params"]["version"] == "1" + assert result["body"]["message"] == "hello" - def test_validate_webhook_request_method_mismatch(self): + def test_extract_and_validate_webhook_request_method_mismatch(self): """Test webhook validation with HTTP method mismatch.""" - webhook_data = {"method": "GET", "headers": {}, "query_params": {}, "body": {}, "files": {}} + app = Flask(__name__) - node_config = {"data": {"method": "post"}} + with app.test_request_context( + "/webhook", + method="GET", + headers={"Content-Type": "application/json"}, + ): + webhook_trigger = MagicMock() + node_config = {"data": {"method": "post", "content_type": "application/json"}} - result = WebhookService.validate_webhook_request(webhook_data, node_config) + with pytest.raises(ValueError, match="HTTP method mismatch"): + WebhookService.extract_and_validate_webhook_data(webhook_trigger, node_config) - assert result["valid"] is False - assert "HTTP method mismatch" in result["error"] - - def test_validate_webhook_request_missing_required_header(self): + def test_extract_and_validate_webhook_request_missing_required_header(self): """Test webhook validation with missing required header.""" - webhook_data = {"method": "POST", "headers": {}, "query_params": {}, "body": {}, "files": {}} + app = Flask(__name__) - node_config = {"data": {"method": "post", "headers": [{"name": "Authorization", "required": True}]}} - - result = WebhookService.validate_webhook_request(webhook_data, node_config) - - assert result["valid"] is False - assert "Required header missing: Authorization" in result["error"] - - def test_validate_webhook_request_case_insensitive_headers(self): - """Test webhook validation with case-insensitive header matching.""" - webhook_data = { - "method": "POST", - "headers": {"authorization": "Bearer token"}, # lowercase - "query_params": {}, - "body": {}, - "files": {}, - } - - node_config = { - "data": { - "method": "post", - "headers": [ - {"name": "Authorization", "required": True} # Pascal case - ], + with app.test_request_context( + "/webhook", + method="POST", + headers={"Content-Type": "application/json"}, + ): + webhook_trigger = MagicMock() + node_config = { + "data": { + "method": "post", + "content_type": "application/json", + "headers": [{"name": "Authorization", "required": True}], + } } - } - result = WebhookService.validate_webhook_request(webhook_data, node_config) + with pytest.raises(ValueError, match="Required header missing: Authorization"): + WebhookService.extract_and_validate_webhook_data(webhook_trigger, node_config) - assert result["valid"] is True + def test_extract_and_validate_webhook_request_case_insensitive_headers(self): + """Test webhook validation with case-insensitive header matching.""" + app = Flask(__name__) - def test_validate_webhook_request_missing_required_param(self): + with app.test_request_context( + "/webhook", + method="POST", + headers={"Content-Type": "application/json", "authorization": "Bearer token"}, + json={"message": "hello"}, + ): + webhook_trigger = MagicMock() + node_config = { + "data": { + "method": "post", + "content_type": "application/json", + "headers": [{"name": "Authorization", "required": True}], + "body": [{"name": "message", "type": "string", "required": True}], + } + } + + result = WebhookService.extract_and_validate_webhook_data(webhook_trigger, node_config) + + assert result["headers"].get("Authorization") == "Bearer token" + + def test_extract_and_validate_webhook_request_missing_required_param(self): """Test webhook validation with missing required query parameter.""" - webhook_data = {"method": "POST", "headers": {}, "query_params": {}, "body": {}, "files": {}} + app = Flask(__name__) - node_config = {"data": {"method": "post", "params": [{"name": "version", "required": True}]}} + with app.test_request_context( + "/webhook", + method="POST", + headers={"Content-Type": "application/json"}, + json={"message": "hello"}, + ): + webhook_trigger = MagicMock() + node_config = { + "data": { + "method": "post", + "content_type": "application/json", + "params": [{"name": "version", "required": True}], + "body": [{"name": "message", "type": "string", "required": True}], + } + } - result = WebhookService.validate_webhook_request(webhook_data, node_config) + with pytest.raises(ValueError, match="Required parameter missing: version"): + WebhookService.extract_and_validate_webhook_data(webhook_trigger, node_config) - assert result["valid"] is False - assert "Required query parameter missing: version" in result["error"] - - def test_validate_webhook_request_missing_required_body_param(self): + def test_extract_and_validate_webhook_request_missing_required_body_param(self): """Test webhook validation with missing required body parameter.""" - webhook_data = {"method": "POST", "headers": {}, "query_params": {}, "body": {}, "files": {}} + app = Flask(__name__) - node_config = {"data": {"method": "post", "body": [{"name": "message", "type": "string", "required": True}]}} + with app.test_request_context( + "/webhook", + method="POST", + headers={"Content-Type": "application/json"}, + json={}, + ): + webhook_trigger = MagicMock() + node_config = { + "data": { + "method": "post", + "content_type": "application/json", + "body": [{"name": "message", "type": "string", "required": True}], + } + } - result = WebhookService.validate_webhook_request(webhook_data, node_config) + with pytest.raises(ValueError, match="Required body parameter missing: message"): + WebhookService.extract_and_validate_webhook_data(webhook_trigger, node_config) - assert result["valid"] is False - assert "Required body parameter missing: message" in result["error"] + def test_extract_and_validate_webhook_request_missing_required_file(self): + """Test webhook validation when required file is missing from multipart request.""" + app = Flask(__name__) - def test_validate_webhook_request_missing_required_file(self): - """Test webhook validation with missing required file parameter.""" - webhook_data = {"method": "POST", "headers": {}, "query_params": {}, "body": {}, "files": {}} + with app.test_request_context( + "/webhook", + method="POST", + data={"note": "test"}, + content_type="multipart/form-data", + ): + webhook_trigger = MagicMock() + webhook_trigger.tenant_id = "tenant" + webhook_trigger.created_by = "user" + node_config = { + "data": { + "method": "post", + "content_type": "multipart/form-data", + "body": [{"name": "upload", "type": "file", "required": True}], + } + } - node_config = {"data": {"method": "post", "body": [{"name": "upload", "type": "file", "required": True}]}} + result = WebhookService.extract_and_validate_webhook_data(webhook_trigger, node_config) - result = WebhookService.validate_webhook_request(webhook_data, node_config) - - assert result["valid"] is False - assert "Required file parameter missing: upload" in result["error"] + assert result["files"] == {} def test_trigger_workflow_execution_success(self, test_data, mock_external_dependencies, flask_app_with_containers): """Test successful workflow execution trigger.""" @@ -357,12 +434,12 @@ class TestWebhookService: with flask_app_with_containers.app_context(): # Mock tenant owner lookup to return the test account - with patch("services.webhook_service.select") as mock_select: + with patch("services.trigger.webhook_service.select") as mock_select: mock_query = MagicMock() mock_select.return_value.join.return_value.where.return_value = mock_query # Mock the session to return our test account - with patch("services.webhook_service.Session") as mock_session: + with patch("services.trigger.webhook_service.Session") as mock_session: mock_session_instance = MagicMock() mock_session.return_value.__enter__.return_value = mock_session_instance mock_session_instance.scalar.return_value = test_data["account"] @@ -375,23 +452,20 @@ class TestWebhookService: # Verify AsyncWorkflowService was called mock_external_dependencies["async_service"].trigger_workflow_async.assert_called_once() - def test_trigger_workflow_execution_no_tenant_owner( + def test_trigger_workflow_execution_end_user_service_failure( self, test_data, mock_external_dependencies, flask_app_with_containers ): - """Test workflow execution trigger when tenant owner not found.""" + """Test workflow execution trigger when EndUserService fails.""" webhook_data = {"method": "POST", "headers": {}, "query_params": {}, "body": {}, "files": {}} with flask_app_with_containers.app_context(): - # Mock tenant owner lookup to return None - with ( - patch("services.webhook_service.select") as mock_select, - patch("services.webhook_service.Session") as mock_session, - ): - mock_session_instance = MagicMock() - mock_session.return_value.__enter__.return_value = mock_session_instance - mock_session_instance.scalar.return_value = None + # Mock EndUserService to raise an exception + with patch( + "services.trigger.webhook_service.EndUserService.get_or_create_end_user_by_type" + ) as mock_end_user: + mock_end_user.side_effect = ValueError("Failed to create end user") - with pytest.raises(ValueError, match="Tenant owner not found"): + with pytest.raises(ValueError, match="Failed to create end user"): WebhookService.trigger_workflow_execution( test_data["webhook_trigger"], webhook_data, test_data["workflow"] ) diff --git a/api/tests/test_containers_integration_tests/services/test_workflow_app_service.py b/api/tests/test_containers_integration_tests/services/test_workflow_app_service.py index 66bd4d3cd9..7b95944bbe 100644 --- a/api/tests/test_containers_integration_tests/services/test_workflow_app_service.py +++ b/api/tests/test_containers_integration_tests/services/test_workflow_app_service.py @@ -209,7 +209,6 @@ class TestWorkflowAppService: # Create workflow app log workflow_app_log = WorkflowAppLog( - id=str(uuid.uuid4()), tenant_id=app.tenant_id, app_id=app.id, workflow_id=workflow.id, @@ -217,8 +216,9 @@ class TestWorkflowAppService: created_from="service-api", created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, - created_at=datetime.now(UTC), ) + workflow_app_log.id = str(uuid.uuid4()) + workflow_app_log.created_at = datetime.now(UTC) db.session.add(workflow_app_log) db.session.commit() @@ -365,7 +365,6 @@ class TestWorkflowAppService: db.session.commit() workflow_app_log = WorkflowAppLog( - id=str(uuid.uuid4()), tenant_id=app.tenant_id, app_id=app.id, workflow_id=workflow.id, @@ -373,8 +372,9 @@ class TestWorkflowAppService: created_from="service-api", created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, - created_at=datetime.now(UTC) + timedelta(minutes=i), ) + workflow_app_log.id = str(uuid.uuid4()) + workflow_app_log.created_at = datetime.now(UTC) + timedelta(minutes=i) db.session.add(workflow_app_log) db.session.commit() @@ -473,7 +473,6 @@ class TestWorkflowAppService: db.session.commit() workflow_app_log = WorkflowAppLog( - id=str(uuid.uuid4()), tenant_id=app.tenant_id, app_id=app.id, workflow_id=workflow.id, @@ -481,8 +480,9 @@ class TestWorkflowAppService: created_from="service-api", created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, - created_at=timestamp, ) + workflow_app_log.id = str(uuid.uuid4()) + workflow_app_log.created_at = timestamp db.session.add(workflow_app_log) db.session.commit() @@ -580,7 +580,6 @@ class TestWorkflowAppService: db.session.commit() workflow_app_log = WorkflowAppLog( - id=str(uuid.uuid4()), tenant_id=app.tenant_id, app_id=app.id, workflow_id=workflow.id, @@ -588,8 +587,9 @@ class TestWorkflowAppService: created_from="service-api", created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, - created_at=datetime.now(UTC) + timedelta(minutes=i), ) + workflow_app_log.id = str(uuid.uuid4()) + workflow_app_log.created_at = datetime.now(UTC) + timedelta(minutes=i) db.session.add(workflow_app_log) db.session.commit() @@ -710,7 +710,6 @@ class TestWorkflowAppService: db.session.commit() workflow_app_log = WorkflowAppLog( - id=str(uuid.uuid4()), tenant_id=app.tenant_id, app_id=app.id, workflow_id=workflow.id, @@ -718,8 +717,9 @@ class TestWorkflowAppService: created_from="service-api", created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, - created_at=datetime.now(UTC) + timedelta(minutes=i), ) + workflow_app_log.id = str(uuid.uuid4()) + workflow_app_log.created_at = datetime.now(UTC) + timedelta(minutes=i) db.session.add(workflow_app_log) db.session.commit() @@ -752,7 +752,6 @@ class TestWorkflowAppService: db.session.commit() workflow_app_log = WorkflowAppLog( - id=str(uuid.uuid4()), tenant_id=app.tenant_id, app_id=app.id, workflow_id=workflow.id, @@ -760,8 +759,9 @@ class TestWorkflowAppService: created_from="web-app", created_by_role=CreatorUserRole.END_USER, created_by=end_user.id, - created_at=datetime.now(UTC) + timedelta(minutes=i + 10), ) + workflow_app_log.id = str(uuid.uuid4()) + workflow_app_log.created_at = datetime.now(UTC) + timedelta(minutes=i + 10) db.session.add(workflow_app_log) db.session.commit() @@ -889,7 +889,6 @@ class TestWorkflowAppService: # Create workflow app log workflow_app_log = WorkflowAppLog( - id=str(uuid.uuid4()), tenant_id=app.tenant_id, app_id=app.id, workflow_id=workflow.id, @@ -897,8 +896,9 @@ class TestWorkflowAppService: created_from="service-api", created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, - created_at=datetime.now(UTC), ) + workflow_app_log.id = str(uuid.uuid4()) + workflow_app_log.created_at = datetime.now(UTC) db.session.add(workflow_app_log) db.session.commit() @@ -979,7 +979,6 @@ class TestWorkflowAppService: # Create workflow app log workflow_app_log = WorkflowAppLog( - id=str(uuid.uuid4()), tenant_id=app.tenant_id, app_id=app.id, workflow_id=workflow.id, @@ -987,8 +986,9 @@ class TestWorkflowAppService: created_from="service-api", created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, - created_at=datetime.now(UTC), ) + workflow_app_log.id = str(uuid.uuid4()) + workflow_app_log.created_at = datetime.now(UTC) db.session.add(workflow_app_log) db.session.commit() @@ -1133,7 +1133,6 @@ class TestWorkflowAppService: db_session_with_containers.flush() log = WorkflowAppLog( - id=str(uuid.uuid4()), tenant_id=app.tenant_id, app_id=app.id, workflow_id=workflow.id, @@ -1141,8 +1140,9 @@ class TestWorkflowAppService: created_from="service-api", created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, - created_at=datetime.now(UTC) + timedelta(minutes=i), ) + log.id = str(uuid.uuid4()) + log.created_at = datetime.now(UTC) + timedelta(minutes=i) db_session_with_containers.add(log) logs_data.append((log, workflow_run)) @@ -1233,7 +1233,6 @@ class TestWorkflowAppService: db_session_with_containers.flush() log = WorkflowAppLog( - id=str(uuid.uuid4()), tenant_id=app.tenant_id, app_id=app.id, workflow_id=workflow.id, @@ -1241,8 +1240,9 @@ class TestWorkflowAppService: created_from="service-api", created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, - created_at=datetime.now(UTC) + timedelta(minutes=i), ) + log.id = str(uuid.uuid4()) + log.created_at = datetime.now(UTC) + timedelta(minutes=i) db_session_with_containers.add(log) logs_data.append((log, workflow_run)) @@ -1335,7 +1335,6 @@ class TestWorkflowAppService: db_session_with_containers.flush() log = WorkflowAppLog( - id=str(uuid.uuid4()), tenant_id=app.tenant_id, app_id=app.id, workflow_id=workflow.id, @@ -1343,8 +1342,9 @@ class TestWorkflowAppService: created_from="service-api", created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, - created_at=datetime.now(UTC) + timedelta(minutes=i * 10 + j), ) + log.id = str(uuid.uuid4()) + log.created_at = datetime.now(UTC) + timedelta(minutes=i * 10 + j) db_session_with_containers.add(log) db_session_with_containers.commit() diff --git a/api/tests/test_containers_integration_tests/services/test_workflow_service.py b/api/tests/test_containers_integration_tests/services/test_workflow_service.py index 4741eba1f5..88c6313f64 100644 --- a/api/tests/test_containers_integration_tests/services/test_workflow_service.py +++ b/api/tests/test_containers_integration_tests/services/test_workflow_service.py @@ -584,7 +584,16 @@ class TestWorkflowService: account = self._create_test_account(db_session_with_containers, fake) app = self._create_test_app(db_session_with_containers, fake) - graph = {"nodes": [{"id": "start", "type": "start"}], "edges": []} + graph = { + "nodes": [ + { + "id": "start", + "type": "start", + "data": {"type": "start", "title": "Start"}, + } + ], + "edges": [], + } features = {"features": ["feature1", "feature2"]} # Don't pre-calculate hash, let the service generate it unique_hash = None @@ -632,7 +641,25 @@ class TestWorkflowService: # Get the actual hash that was generated original_hash = existing_workflow.unique_hash - new_graph = {"nodes": [{"id": "start", "type": "start"}, {"id": "end", "type": "end"}], "edges": []} + new_graph = { + "nodes": [ + { + "id": "start", + "type": "start", + "data": {"type": "start", "title": "Start"}, + }, + { + "id": "end", + "type": "end", + "data": { + "type": "end", + "title": "End", + "outputs": [{"variable": "output", "value_selector": ["start", "text"]}], + }, + }, + ], + "edges": [], + } new_features = {"features": ["feature1", "feature2", "feature3"]} environment_variables = [] @@ -679,7 +706,16 @@ class TestWorkflowService: # Get the actual hash that was generated original_hash = existing_workflow.unique_hash - new_graph = {"nodes": [{"id": "start", "type": "start"}], "edges": []} + new_graph = { + "nodes": [ + { + "id": "start", + "type": "start", + "data": {"type": "start", "title": "Start"}, + } + ], + "edges": [], + } new_features = {"features": ["feature1"]} # Use a different hash to trigger the error mismatched_hash = "different_hash_12345" diff --git a/api/tests/test_containers_integration_tests/services/tools/test_mcp_tools_manage_service.py b/api/tests/test_containers_integration_tests/services/tools/test_mcp_tools_manage_service.py index 71d55c3ade..8c190762cf 100644 --- a/api/tests/test_containers_integration_tests/services/tools/test_mcp_tools_manage_service.py +++ b/api/tests/test_containers_integration_tests/services/tools/test_mcp_tools_manage_service.py @@ -20,12 +20,21 @@ class TestMCPToolManageService: patch("services.tools.mcp_tools_manage_service.ToolTransformService") as mock_tool_transform_service, ): # Setup default mock returns + from core.tools.entities.api_entities import ToolProviderApiEntity + from core.tools.entities.common_entities import I18nObject + mock_encrypter.encrypt_token.return_value = "encrypted_server_url" - mock_tool_transform_service.mcp_provider_to_user_provider.return_value = { - "id": "test_id", - "name": "test_name", - "type": ToolProviderType.MCP, - } + mock_tool_transform_service.mcp_provider_to_user_provider.return_value = ToolProviderApiEntity( + id="test_id", + author="test_author", + name="test_name", + type=ToolProviderType.MCP, + description=I18nObject(en_US="Test Description", zh_Hans="测试描述"), + icon={"type": "emoji", "content": "🤖"}, + label=I18nObject(en_US="Test Label", zh_Hans="测试标签"), + labels=[], + tools=[], + ) yield { "encrypter": mock_encrypter, @@ -104,9 +113,9 @@ class TestMCPToolManageService: mcp_provider = MCPToolProvider( tenant_id=tenant_id, name=fake.company(), - server_identifier=fake.uuid4(), + server_identifier=str(fake.uuid4()), server_url="encrypted_server_url", - server_url_hash=fake.sha256(), + server_url_hash=str(fake.sha256()), user_id=user_id, authed=False, tools="[]", @@ -144,7 +153,10 @@ class TestMCPToolManageService: ) # Act: Execute the method under test - result = MCPToolManageService.get_mcp_provider_by_provider_id(mcp_provider.id, tenant.id) + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + result = service.get_provider(provider_id=mcp_provider.id, tenant_id=tenant.id) # Assert: Verify the expected outcomes assert result is not None @@ -154,8 +166,6 @@ class TestMCPToolManageService: assert result.user_id == account.id # Verify database state - from extensions.ext_database import db - db.session.refresh(result) assert result.id is not None assert result.server_identifier == mcp_provider.server_identifier @@ -177,11 +187,14 @@ class TestMCPToolManageService: db_session_with_containers, mock_external_service_dependencies ) - non_existent_id = fake.uuid4() + non_existent_id = str(fake.uuid4()) # Act & Assert: Verify proper error handling + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) with pytest.raises(ValueError, match="MCP tool not found"): - MCPToolManageService.get_mcp_provider_by_provider_id(non_existent_id, tenant.id) + service.get_provider(provider_id=non_existent_id, tenant_id=tenant.id) def test_get_mcp_provider_by_provider_id_tenant_isolation( self, db_session_with_containers, mock_external_service_dependencies @@ -210,8 +223,11 @@ class TestMCPToolManageService: ) # Act & Assert: Verify tenant isolation + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) with pytest.raises(ValueError, match="MCP tool not found"): - MCPToolManageService.get_mcp_provider_by_provider_id(mcp_provider1.id, tenant2.id) + service.get_provider(provider_id=mcp_provider1.id, tenant_id=tenant2.id) def test_get_mcp_provider_by_server_identifier_success( self, db_session_with_containers, mock_external_service_dependencies @@ -235,7 +251,10 @@ class TestMCPToolManageService: ) # Act: Execute the method under test - result = MCPToolManageService.get_mcp_provider_by_server_identifier(mcp_provider.server_identifier, tenant.id) + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + result = service.get_provider(server_identifier=mcp_provider.server_identifier, tenant_id=tenant.id) # Assert: Verify the expected outcomes assert result is not None @@ -245,8 +264,6 @@ class TestMCPToolManageService: assert result.user_id == account.id # Verify database state - from extensions.ext_database import db - db.session.refresh(result) assert result.id is not None assert result.name == mcp_provider.name @@ -268,11 +285,14 @@ class TestMCPToolManageService: db_session_with_containers, mock_external_service_dependencies ) - non_existent_identifier = fake.uuid4() + non_existent_identifier = str(fake.uuid4()) # Act & Assert: Verify proper error handling + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) with pytest.raises(ValueError, match="MCP tool not found"): - MCPToolManageService.get_mcp_provider_by_server_identifier(non_existent_identifier, tenant.id) + service.get_provider(server_identifier=non_existent_identifier, tenant_id=tenant.id) def test_get_mcp_provider_by_server_identifier_tenant_isolation( self, db_session_with_containers, mock_external_service_dependencies @@ -301,8 +321,11 @@ class TestMCPToolManageService: ) # Act & Assert: Verify tenant isolation + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) with pytest.raises(ValueError, match="MCP tool not found"): - MCPToolManageService.get_mcp_provider_by_server_identifier(mcp_provider1.server_identifier, tenant2.id) + service.get_provider(server_identifier=mcp_provider1.server_identifier, tenant_id=tenant2.id) def test_create_mcp_provider_success(self, db_session_with_containers, mock_external_service_dependencies): """ @@ -322,15 +345,30 @@ class TestMCPToolManageService: ) # Setup mocks for provider creation + from core.tools.entities.api_entities import ToolProviderApiEntity + from core.tools.entities.common_entities import I18nObject + mock_external_service_dependencies["encrypter"].encrypt_token.return_value = "encrypted_server_url" - mock_external_service_dependencies["tool_transform_service"].mcp_provider_to_user_provider.return_value = { - "id": "new_provider_id", - "name": "Test MCP Provider", - "type": ToolProviderType.MCP, - } + mock_external_service_dependencies[ + "tool_transform_service" + ].mcp_provider_to_user_provider.return_value = ToolProviderApiEntity( + id="new_provider_id", + author=account.name, + name="Test MCP Provider", + type=ToolProviderType.MCP, + description=I18nObject(en_US="Test MCP Provider Description", zh_Hans="测试MCP提供者描述"), + icon={"type": "emoji", "content": "🤖"}, + label=I18nObject(en_US="Test MCP Provider", zh_Hans="测试MCP提供者"), + labels=[], + tools=[], + ) # Act: Execute the method under test - result = MCPToolManageService.create_mcp_provider( + from core.entities.mcp_provider import MCPConfiguration + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + result = service.create_provider( tenant_id=tenant.id, name="Test MCP Provider", server_url="https://example.com/mcp", @@ -339,14 +377,16 @@ class TestMCPToolManageService: icon_type="emoji", icon_background="#FF6B6B", server_identifier="test_identifier_123", - timeout=30.0, - sse_read_timeout=300.0, + configuration=MCPConfiguration( + timeout=30.0, + sse_read_timeout=300.0, + ), ) # Assert: Verify the expected outcomes assert result is not None - assert result["name"] == "Test MCP Provider" - assert result["type"] == ToolProviderType.MCP + assert result.name == "Test MCP Provider" + assert result.type == ToolProviderType.MCP # Verify database state from extensions.ext_database import db @@ -386,7 +426,11 @@ class TestMCPToolManageService: ) # Create first provider - MCPToolManageService.create_mcp_provider( + from core.entities.mcp_provider import MCPConfiguration + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + service.create_provider( tenant_id=tenant.id, name="Test MCP Provider", server_url="https://example1.com/mcp", @@ -395,13 +439,15 @@ class TestMCPToolManageService: icon_type="emoji", icon_background="#FF6B6B", server_identifier="test_identifier_1", - timeout=30.0, - sse_read_timeout=300.0, + configuration=MCPConfiguration( + timeout=30.0, + sse_read_timeout=300.0, + ), ) # Act & Assert: Verify proper error handling for duplicate name with pytest.raises(ValueError, match="MCP tool Test MCP Provider already exists"): - MCPToolManageService.create_mcp_provider( + service.create_provider( tenant_id=tenant.id, name="Test MCP Provider", # Duplicate name server_url="https://example2.com/mcp", @@ -410,8 +456,10 @@ class TestMCPToolManageService: icon_type="emoji", icon_background="#4ECDC4", server_identifier="test_identifier_2", - timeout=45.0, - sse_read_timeout=400.0, + configuration=MCPConfiguration( + timeout=45.0, + sse_read_timeout=400.0, + ), ) def test_create_mcp_provider_duplicate_server_url( @@ -432,7 +480,11 @@ class TestMCPToolManageService: ) # Create first provider - MCPToolManageService.create_mcp_provider( + from core.entities.mcp_provider import MCPConfiguration + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + service.create_provider( tenant_id=tenant.id, name="Test MCP Provider 1", server_url="https://example.com/mcp", @@ -441,13 +493,15 @@ class TestMCPToolManageService: icon_type="emoji", icon_background="#FF6B6B", server_identifier="test_identifier_1", - timeout=30.0, - sse_read_timeout=300.0, + configuration=MCPConfiguration( + timeout=30.0, + sse_read_timeout=300.0, + ), ) # Act & Assert: Verify proper error handling for duplicate server URL - with pytest.raises(ValueError, match="MCP tool https://example.com/mcp already exists"): - MCPToolManageService.create_mcp_provider( + with pytest.raises(ValueError, match="MCP tool with this server URL already exists"): + service.create_provider( tenant_id=tenant.id, name="Test MCP Provider 2", server_url="https://example.com/mcp", # Duplicate URL @@ -456,8 +510,10 @@ class TestMCPToolManageService: icon_type="emoji", icon_background="#4ECDC4", server_identifier="test_identifier_2", - timeout=45.0, - sse_read_timeout=400.0, + configuration=MCPConfiguration( + timeout=45.0, + sse_read_timeout=400.0, + ), ) def test_create_mcp_provider_duplicate_server_identifier( @@ -478,7 +534,11 @@ class TestMCPToolManageService: ) # Create first provider - MCPToolManageService.create_mcp_provider( + from core.entities.mcp_provider import MCPConfiguration + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + service.create_provider( tenant_id=tenant.id, name="Test MCP Provider 1", server_url="https://example1.com/mcp", @@ -487,13 +547,15 @@ class TestMCPToolManageService: icon_type="emoji", icon_background="#FF6B6B", server_identifier="test_identifier_123", - timeout=30.0, - sse_read_timeout=300.0, + configuration=MCPConfiguration( + timeout=30.0, + sse_read_timeout=300.0, + ), ) # Act & Assert: Verify proper error handling for duplicate server identifier with pytest.raises(ValueError, match="MCP tool test_identifier_123 already exists"): - MCPToolManageService.create_mcp_provider( + service.create_provider( tenant_id=tenant.id, name="Test MCP Provider 2", server_url="https://example2.com/mcp", @@ -502,8 +564,10 @@ class TestMCPToolManageService: icon_type="emoji", icon_background="#4ECDC4", server_identifier="test_identifier_123", # Duplicate identifier - timeout=45.0, - sse_read_timeout=400.0, + configuration=MCPConfiguration( + timeout=45.0, + sse_read_timeout=400.0, + ), ) def test_retrieve_mcp_tools_success(self, db_session_with_containers, mock_external_service_dependencies): @@ -543,23 +607,59 @@ class TestMCPToolManageService: db.session.commit() # Setup mock for transformation service + from core.tools.entities.api_entities import ToolProviderApiEntity + from core.tools.entities.common_entities import I18nObject + mock_external_service_dependencies["tool_transform_service"].mcp_provider_to_user_provider.side_effect = [ - {"id": provider1.id, "name": provider1.name, "type": ToolProviderType.MCP}, - {"id": provider2.id, "name": provider2.name, "type": ToolProviderType.MCP}, - {"id": provider3.id, "name": provider3.name, "type": ToolProviderType.MCP}, + ToolProviderApiEntity( + id=provider1.id, + author=account.name, + name=provider1.name, + type=ToolProviderType.MCP, + description=I18nObject(en_US="Alpha Provider Description", zh_Hans="Alpha提供者描述"), + icon={"type": "emoji", "content": "🅰️"}, + label=I18nObject(en_US=provider1.name, zh_Hans=provider1.name), + labels=[], + tools=[], + ), + ToolProviderApiEntity( + id=provider2.id, + author=account.name, + name=provider2.name, + type=ToolProviderType.MCP, + description=I18nObject(en_US="Beta Provider Description", zh_Hans="Beta提供者描述"), + icon={"type": "emoji", "content": "🅱️"}, + label=I18nObject(en_US=provider2.name, zh_Hans=provider2.name), + labels=[], + tools=[], + ), + ToolProviderApiEntity( + id=provider3.id, + author=account.name, + name=provider3.name, + type=ToolProviderType.MCP, + description=I18nObject(en_US="Gamma Provider Description", zh_Hans="Gamma提供者描述"), + icon={"type": "emoji", "content": "Γ"}, + label=I18nObject(en_US=provider3.name, zh_Hans=provider3.name), + labels=[], + tools=[], + ), ] # Act: Execute the method under test - result = MCPToolManageService.retrieve_mcp_tools(tenant.id, for_list=True) + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + result = service.list_providers(tenant_id=tenant.id, for_list=True) # Assert: Verify the expected outcomes assert result is not None assert len(result) == 3 # Verify correct ordering by name - assert result[0]["name"] == "Alpha Provider" - assert result[1]["name"] == "Beta Provider" - assert result[2]["name"] == "Gamma Provider" + assert result[0].name == "Alpha Provider" + assert result[1].name == "Beta Provider" + assert result[2].name == "Gamma Provider" # Verify mock interactions assert ( @@ -584,7 +684,10 @@ class TestMCPToolManageService: # No MCP providers created for this tenant # Act: Execute the method under test - result = MCPToolManageService.retrieve_mcp_tools(tenant.id, for_list=False) + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + result = service.list_providers(tenant_id=tenant.id, for_list=False) # Assert: Verify the expected outcomes assert result is not None @@ -624,20 +727,46 @@ class TestMCPToolManageService: ) # Setup mock for transformation service + from core.tools.entities.api_entities import ToolProviderApiEntity + from core.tools.entities.common_entities import I18nObject + mock_external_service_dependencies["tool_transform_service"].mcp_provider_to_user_provider.side_effect = [ - {"id": provider1.id, "name": provider1.name, "type": ToolProviderType.MCP}, - {"id": provider2.id, "name": provider2.name, "type": ToolProviderType.MCP}, + ToolProviderApiEntity( + id=provider1.id, + author=account1.name, + name=provider1.name, + type=ToolProviderType.MCP, + description=I18nObject(en_US="Provider 1 Description", zh_Hans="提供者1描述"), + icon={"type": "emoji", "content": "1️⃣"}, + label=I18nObject(en_US=provider1.name, zh_Hans=provider1.name), + labels=[], + tools=[], + ), + ToolProviderApiEntity( + id=provider2.id, + author=account2.name, + name=provider2.name, + type=ToolProviderType.MCP, + description=I18nObject(en_US="Provider 2 Description", zh_Hans="提供者2描述"), + icon={"type": "emoji", "content": "2️⃣"}, + label=I18nObject(en_US=provider2.name, zh_Hans=provider2.name), + labels=[], + tools=[], + ), ] # Act: Execute the method under test for both tenants - result1 = MCPToolManageService.retrieve_mcp_tools(tenant1.id, for_list=True) - result2 = MCPToolManageService.retrieve_mcp_tools(tenant2.id, for_list=True) + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + result1 = service.list_providers(tenant_id=tenant1.id, for_list=True) + result2 = service.list_providers(tenant_id=tenant2.id, for_list=True) # Assert: Verify tenant isolation assert len(result1) == 1 assert len(result2) == 1 - assert result1[0]["id"] == provider1.id - assert result2[0]["id"] == provider2.id + assert result1[0].id == provider1.id + assert result2[0].id == provider2.id def test_list_mcp_tool_from_remote_server_success( self, db_session_with_containers, mock_external_service_dependencies @@ -661,17 +790,20 @@ class TestMCPToolManageService: mcp_provider = self._create_test_mcp_provider( db_session_with_containers, mock_external_service_dependencies, tenant.id, account.id ) - mcp_provider.server_url = "encrypted_server_url" - mcp_provider.authed = False + # Use a valid base64 encoded string to avoid decryption errors + import base64 + + mcp_provider.server_url = base64.b64encode(b"encrypted_server_url").decode() + mcp_provider.authed = True # Provider must be authenticated to list tools mcp_provider.tools = "[]" from extensions.ext_database import db db.session.commit() - # Mock the decrypted_server_url property to avoid encryption issues - with patch("models.tools.encrypter") as mock_encrypter: - mock_encrypter.decrypt_token.return_value = "https://example.com/mcp" + # Mock the decryption process at the rsa level to avoid key file issues + with patch("libs.rsa.decrypt") as mock_decrypt: + mock_decrypt.return_value = "https://example.com/mcp" # Mock MCPClient and its context manager mock_tools = [ @@ -683,13 +815,16 @@ class TestMCPToolManageService: )(), ] - with patch("services.tools.mcp_tools_manage_service.MCPClient") as mock_mcp_client: + with patch("services.tools.mcp_tools_manage_service.MCPClientWithAuthRetry") as mock_mcp_client: # Setup mock client mock_client_instance = mock_mcp_client.return_value.__enter__.return_value mock_client_instance.list_tools.return_value = mock_tools # Act: Execute the method under test - result = MCPToolManageService.list_mcp_tool_from_remote_server(tenant.id, mcp_provider.id) + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + result = service.list_provider_tools(tenant_id=tenant.id, provider_id=mcp_provider.id) # Assert: Verify the expected outcomes assert result is not None @@ -705,16 +840,8 @@ class TestMCPToolManageService: assert mcp_provider.updated_at is not None # Verify mock interactions - mock_mcp_client.assert_called_once_with( - "https://example.com/mcp", - mcp_provider.id, - tenant.id, - authed=False, - for_list=True, - headers={}, - timeout=30.0, - sse_read_timeout=300.0, - ) + # MCPClientWithAuthRetry is called with different parameters + mock_mcp_client.assert_called_once() def test_list_mcp_tool_from_remote_server_auth_error( self, db_session_with_containers, mock_external_service_dependencies @@ -737,7 +864,10 @@ class TestMCPToolManageService: mcp_provider = self._create_test_mcp_provider( db_session_with_containers, mock_external_service_dependencies, tenant.id, account.id ) - mcp_provider.server_url = "encrypted_server_url" + # Use a valid base64 encoded string to avoid decryption errors + import base64 + + mcp_provider.server_url = base64.b64encode(b"encrypted_server_url").decode() mcp_provider.authed = False mcp_provider.tools = "[]" @@ -745,20 +875,23 @@ class TestMCPToolManageService: db.session.commit() - # Mock the decrypted_server_url property to avoid encryption issues - with patch("models.tools.encrypter") as mock_encrypter: - mock_encrypter.decrypt_token.return_value = "https://example.com/mcp" + # Mock the decryption process at the rsa level to avoid key file issues + with patch("libs.rsa.decrypt") as mock_decrypt: + mock_decrypt.return_value = "https://example.com/mcp" # Mock MCPClient to raise authentication error - with patch("services.tools.mcp_tools_manage_service.MCPClient") as mock_mcp_client: + with patch("services.tools.mcp_tools_manage_service.MCPClientWithAuthRetry") as mock_mcp_client: from core.mcp.error import MCPAuthError mock_client_instance = mock_mcp_client.return_value.__enter__.return_value mock_client_instance.list_tools.side_effect = MCPAuthError("Authentication required") # Act & Assert: Verify proper error handling + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) with pytest.raises(ValueError, match="Please auth the tool first"): - MCPToolManageService.list_mcp_tool_from_remote_server(tenant.id, mcp_provider.id) + service.list_provider_tools(tenant_id=tenant.id, provider_id=mcp_provider.id) # Verify database state was not changed db.session.refresh(mcp_provider) @@ -786,32 +919,38 @@ class TestMCPToolManageService: mcp_provider = self._create_test_mcp_provider( db_session_with_containers, mock_external_service_dependencies, tenant.id, account.id ) - mcp_provider.server_url = "encrypted_server_url" - mcp_provider.authed = False + # Use a valid base64 encoded string to avoid decryption errors + import base64 + + mcp_provider.server_url = base64.b64encode(b"encrypted_server_url").decode() + mcp_provider.authed = True # Provider must be authenticated to test connection errors mcp_provider.tools = "[]" from extensions.ext_database import db db.session.commit() - # Mock the decrypted_server_url property to avoid encryption issues - with patch("models.tools.encrypter") as mock_encrypter: - mock_encrypter.decrypt_token.return_value = "https://example.com/mcp" + # Mock the decryption process at the rsa level to avoid key file issues + with patch("libs.rsa.decrypt") as mock_decrypt: + mock_decrypt.return_value = "https://example.com/mcp" # Mock MCPClient to raise connection error - with patch("services.tools.mcp_tools_manage_service.MCPClient") as mock_mcp_client: + with patch("services.tools.mcp_tools_manage_service.MCPClientWithAuthRetry") as mock_mcp_client: from core.mcp.error import MCPError mock_client_instance = mock_mcp_client.return_value.__enter__.return_value mock_client_instance.list_tools.side_effect = MCPError("Connection failed") # Act & Assert: Verify proper error handling + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) with pytest.raises(ValueError, match="Failed to connect to MCP server: Connection failed"): - MCPToolManageService.list_mcp_tool_from_remote_server(tenant.id, mcp_provider.id) + service.list_provider_tools(tenant_id=tenant.id, provider_id=mcp_provider.id) # Verify database state was not changed db.session.refresh(mcp_provider) - assert mcp_provider.authed is False + assert mcp_provider.authed is True # Provider remains authenticated assert mcp_provider.tools == "[]" def test_delete_mcp_tool_success(self, db_session_with_containers, mock_external_service_dependencies): @@ -840,7 +979,8 @@ class TestMCPToolManageService: assert db.session.query(MCPToolProvider).filter_by(id=mcp_provider.id).first() is not None # Act: Execute the method under test - MCPToolManageService.delete_mcp_tool(tenant.id, mcp_provider.id) + service = MCPToolManageService(db.session()) + service.delete_provider(tenant_id=tenant.id, provider_id=mcp_provider.id) # Assert: Verify the expected outcomes # Provider should be deleted from database @@ -862,11 +1002,14 @@ class TestMCPToolManageService: db_session_with_containers, mock_external_service_dependencies ) - non_existent_id = fake.uuid4() + non_existent_id = str(fake.uuid4()) # Act & Assert: Verify proper error handling + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) with pytest.raises(ValueError, match="MCP tool not found"): - MCPToolManageService.delete_mcp_tool(tenant.id, non_existent_id) + service.delete_provider(tenant_id=tenant.id, provider_id=non_existent_id) def test_delete_mcp_tool_tenant_isolation(self, db_session_with_containers, mock_external_service_dependencies): """ @@ -893,8 +1036,11 @@ class TestMCPToolManageService: ) # Act & Assert: Verify tenant isolation + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) with pytest.raises(ValueError, match="MCP tool not found"): - MCPToolManageService.delete_mcp_tool(tenant2.id, mcp_provider1.id) + service.delete_provider(tenant_id=tenant2.id, provider_id=mcp_provider1.id) # Verify provider still exists in tenant1 from extensions.ext_database import db @@ -929,7 +1075,10 @@ class TestMCPToolManageService: db.session.commit() # Act: Execute the method under test - MCPToolManageService.update_mcp_provider( + from core.entities.mcp_provider import MCPConfiguration + + service = MCPToolManageService(db.session()) + service.update_provider( tenant_id=tenant.id, provider_id=mcp_provider.id, name="Updated MCP Provider", @@ -938,8 +1087,10 @@ class TestMCPToolManageService: icon_type="emoji", icon_background="#4ECDC4", server_identifier="updated_identifier_123", - timeout=45.0, - sse_read_timeout=400.0, + configuration=MCPConfiguration( + timeout=45.0, + sse_read_timeout=400.0, + ), ) # Assert: Verify the expected outcomes @@ -953,70 +1104,10 @@ class TestMCPToolManageService: # Verify icon was updated import json - icon_data = json.loads(mcp_provider.icon) + icon_data = json.loads(mcp_provider.icon or "{}") assert icon_data["content"] == "🚀" assert icon_data["background"] == "#4ECDC4" - def test_update_mcp_provider_with_server_url_change( - self, db_session_with_containers, mock_external_service_dependencies - ): - """ - Test successful update of MCP provider with server URL change. - - This test verifies: - - Proper handling of server URL changes - - Correct reconnection logic - - Database state updates - - External service integration - """ - # Arrange: Create test data - fake = Faker() - account, tenant = self._create_test_account_and_tenant( - db_session_with_containers, mock_external_service_dependencies - ) - - # Create MCP provider - mcp_provider = self._create_test_mcp_provider( - db_session_with_containers, mock_external_service_dependencies, tenant.id, account.id - ) - - from extensions.ext_database import db - - db.session.commit() - - # Mock the reconnection method - with patch.object(MCPToolManageService, "_re_connect_mcp_provider") as mock_reconnect: - mock_reconnect.return_value = { - "authed": True, - "tools": '[{"name": "test_tool"}]', - "encrypted_credentials": "{}", - } - - # Act: Execute the method under test - MCPToolManageService.update_mcp_provider( - tenant_id=tenant.id, - provider_id=mcp_provider.id, - name="Updated MCP Provider", - server_url="https://new-example.com/mcp", - icon="🚀", - icon_type="emoji", - icon_background="#4ECDC4", - server_identifier="updated_identifier_123", - timeout=45.0, - sse_read_timeout=400.0, - ) - - # Assert: Verify the expected outcomes - db.session.refresh(mcp_provider) - assert mcp_provider.name == "Updated MCP Provider" - assert mcp_provider.server_identifier == "updated_identifier_123" - assert mcp_provider.timeout == 45.0 - assert mcp_provider.sse_read_timeout == 400.0 - assert mcp_provider.updated_at is not None - - # Verify reconnection was called - mock_reconnect.assert_called_once_with("https://new-example.com/mcp", mcp_provider.id, tenant.id) - def test_update_mcp_provider_duplicate_name(self, db_session_with_containers, mock_external_service_dependencies): """ Test error handling when updating MCP provider with duplicate name. @@ -1048,8 +1139,12 @@ class TestMCPToolManageService: db.session.commit() # Act & Assert: Verify proper error handling for duplicate name + from core.entities.mcp_provider import MCPConfiguration + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) with pytest.raises(ValueError, match="MCP tool First Provider already exists"): - MCPToolManageService.update_mcp_provider( + service.update_provider( tenant_id=tenant.id, provider_id=provider2.id, name="First Provider", # Duplicate name @@ -1058,8 +1153,10 @@ class TestMCPToolManageService: icon_type="emoji", icon_background="#4ECDC4", server_identifier="unique_identifier", - timeout=45.0, - sse_read_timeout=400.0, + configuration=MCPConfiguration( + timeout=45.0, + sse_read_timeout=400.0, + ), ) def test_update_mcp_provider_credentials_success( @@ -1094,19 +1191,25 @@ class TestMCPToolManageService: # Mock the provider controller and encryption with ( - patch("services.tools.mcp_tools_manage_service.MCPToolProviderController") as mock_controller, - patch("services.tools.mcp_tools_manage_service.ProviderConfigEncrypter") as mock_encrypter, + patch("core.tools.mcp_tool.provider.MCPToolProviderController") as mock_controller, + patch("core.tools.utils.encryption.ProviderConfigEncrypter") as mock_encrypter, ): # Setup mocks - mock_controller_instance = mock_controller._from_db.return_value + mock_controller_instance = mock_controller.from_db.return_value mock_controller_instance.get_credentials_schema.return_value = [] mock_encrypter_instance = mock_encrypter.return_value mock_encrypter_instance.encrypt.return_value = {"new_key": "encrypted_value"} # Act: Execute the method under test - MCPToolManageService.update_mcp_provider_credentials( - mcp_provider=mcp_provider, credentials={"new_key": "new_value"}, authed=True + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + service.update_provider_credentials( + provider_id=mcp_provider.id, + tenant_id=tenant.id, + credentials={"new_key": "new_value"}, + authed=True, ) # Assert: Verify the expected outcomes @@ -1117,7 +1220,7 @@ class TestMCPToolManageService: # Verify credentials were encrypted and merged import json - credentials = json.loads(mcp_provider.encrypted_credentials) + credentials = json.loads(mcp_provider.encrypted_credentials or "{}") assert "existing_key" in credentials assert "new_key" in credentials @@ -1152,19 +1255,25 @@ class TestMCPToolManageService: # Mock the provider controller and encryption with ( - patch("services.tools.mcp_tools_manage_service.MCPToolProviderController") as mock_controller, - patch("services.tools.mcp_tools_manage_service.ProviderConfigEncrypter") as mock_encrypter, + patch("core.tools.mcp_tool.provider.MCPToolProviderController") as mock_controller, + patch("core.tools.utils.encryption.ProviderConfigEncrypter") as mock_encrypter, ): # Setup mocks - mock_controller_instance = mock_controller._from_db.return_value + mock_controller_instance = mock_controller.from_db.return_value mock_controller_instance.get_credentials_schema.return_value = [] mock_encrypter_instance = mock_encrypter.return_value mock_encrypter_instance.encrypt.return_value = {"new_key": "encrypted_value"} # Act: Execute the method under test - MCPToolManageService.update_mcp_provider_credentials( - mcp_provider=mcp_provider, credentials={"new_key": "new_value"}, authed=False + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + service.update_provider_credentials( + provider_id=mcp_provider.id, + tenant_id=tenant.id, + credentials={"new_key": "new_value"}, + authed=False, ) # Assert: Verify the expected outcomes @@ -1199,41 +1308,37 @@ class TestMCPToolManageService: type("MockTool", (), {"model_dump": lambda self: {"name": "test_tool_2", "description": "Test tool 2"}})(), ] - with patch("services.tools.mcp_tools_manage_service.MCPClient") as mock_mcp_client: + with patch("services.tools.mcp_tools_manage_service.MCPClientWithAuthRetry") as mock_mcp_client: # Setup mock client mock_client_instance = mock_mcp_client.return_value.__enter__.return_value mock_client_instance.list_tools.return_value = mock_tools # Act: Execute the method under test - result = MCPToolManageService._re_connect_mcp_provider( - "https://example.com/mcp", mcp_provider.id, tenant.id + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + result = service._reconnect_provider( + server_url="https://example.com/mcp", + provider=mcp_provider, ) # Assert: Verify the expected outcomes assert result is not None - assert result["authed"] is True - assert result["tools"] is not None - assert result["encrypted_credentials"] == "{}" + assert result.authed is True + assert result.tools is not None + assert result.encrypted_credentials == "{}" # Verify tools were properly serialized import json - tools_data = json.loads(result["tools"]) + tools_data = json.loads(result.tools) assert len(tools_data) == 2 assert tools_data[0]["name"] == "test_tool_1" assert tools_data[1]["name"] == "test_tool_2" # Verify mock interactions - mock_mcp_client.assert_called_once_with( - "https://example.com/mcp", - mcp_provider.id, - tenant.id, - authed=False, - for_list=True, - headers={}, - timeout=30.0, - sse_read_timeout=300.0, - ) + provider_entity = mcp_provider.to_entity() + mock_mcp_client.assert_called_once() def test_re_connect_mcp_provider_auth_error(self, db_session_with_containers, mock_external_service_dependencies): """ @@ -1256,22 +1361,26 @@ class TestMCPToolManageService: ) # Mock MCPClient to raise authentication error - with patch("services.tools.mcp_tools_manage_service.MCPClient") as mock_mcp_client: + with patch("services.tools.mcp_tools_manage_service.MCPClientWithAuthRetry") as mock_mcp_client: from core.mcp.error import MCPAuthError mock_client_instance = mock_mcp_client.return_value.__enter__.return_value mock_client_instance.list_tools.side_effect = MCPAuthError("Authentication required") # Act: Execute the method under test - result = MCPToolManageService._re_connect_mcp_provider( - "https://example.com/mcp", mcp_provider.id, tenant.id + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) + result = service._reconnect_provider( + server_url="https://example.com/mcp", + provider=mcp_provider, ) # Assert: Verify the expected outcomes assert result is not None - assert result["authed"] is False - assert result["tools"] == "[]" - assert result["encrypted_credentials"] == "{}" + assert result.authed is False + assert result.tools == "[]" + assert result.encrypted_credentials == "{}" def test_re_connect_mcp_provider_connection_error( self, db_session_with_containers, mock_external_service_dependencies @@ -1295,12 +1404,18 @@ class TestMCPToolManageService: ) # Mock MCPClient to raise connection error - with patch("services.tools.mcp_tools_manage_service.MCPClient") as mock_mcp_client: + with patch("services.tools.mcp_tools_manage_service.MCPClientWithAuthRetry") as mock_mcp_client: from core.mcp.error import MCPError mock_client_instance = mock_mcp_client.return_value.__enter__.return_value mock_client_instance.list_tools.side_effect = MCPError("Connection failed") # Act & Assert: Verify proper error handling + from extensions.ext_database import db + + service = MCPToolManageService(db.session()) with pytest.raises(ValueError, match="Failed to re-connect MCP server: Connection failed"): - MCPToolManageService._re_connect_mcp_provider("https://example.com/mcp", mcp_provider.id, tenant.id) + service._reconnect_provider( + server_url="https://example.com/mcp", + provider=mcp_provider, + ) diff --git a/api/tests/test_containers_integration_tests/services/tools/test_tools_transform_service.py b/api/tests/test_containers_integration_tests/services/tools/test_tools_transform_service.py index 08c7d07620..fa13790942 100644 --- a/api/tests/test_containers_integration_tests/services/tools/test_tools_transform_service.py +++ b/api/tests/test_containers_integration_tests/services/tools/test_tools_transform_service.py @@ -6,7 +6,6 @@ from faker import Faker from core.tools.entities.api_entities import ToolProviderApiEntity from core.tools.entities.common_entities import I18nObject from core.tools.entities.tool_entities import ToolProviderType -from libs.uuid_utils import uuidv7 from models.tools import ApiToolProvider, BuiltinToolProvider, MCPToolProvider, WorkflowToolProvider from services.plugin.plugin_service import PluginService from services.tools.tools_transform_service import ToolTransformService @@ -18,15 +17,14 @@ class TestToolTransformService: @pytest.fixture def mock_external_service_dependencies(self): """Mock setup for external service dependencies.""" - with ( - patch("services.tools.tools_transform_service.dify_config") as mock_dify_config, - ): - # Setup default mock returns - mock_dify_config.CONSOLE_API_URL = "https://console.example.com" + with patch("services.tools.tools_transform_service.dify_config") as mock_dify_config: + with patch("services.plugin.plugin_service.dify_config", new=mock_dify_config): + # Setup default mock returns + mock_dify_config.CONSOLE_API_URL = "https://console.example.com" - yield { - "dify_config": mock_dify_config, - } + yield { + "dify_config": mock_dify_config, + } def _create_test_tool_provider( self, db_session_with_containers, mock_external_service_dependencies, provider_type="api" @@ -68,7 +66,6 @@ class TestToolTransformService: ) elif provider_type == "workflow": provider = WorkflowToolProvider( - id=str(uuidv7()), name=fake.company(), description=fake.text(max_nb_chars=100), icon='{"background": "#FF6B6B", "content": "🔧"}', @@ -522,7 +519,7 @@ class TestToolTransformService: with patch("services.tools.tools_transform_service.create_provider_encrypter") as mock_encrypter: mock_encrypter_instance = Mock() mock_encrypter_instance.decrypt.return_value = {"api_key": "decrypted_key"} - mock_encrypter_instance.mask_tool_credentials.return_value = {"api_key": ""} + mock_encrypter_instance.mask_plugin_credentials.return_value = {"api_key": ""} mock_encrypter.return_value = (mock_encrypter_instance, None) # Act: Execute the method under test @@ -761,7 +758,6 @@ class TestToolTransformService: # Create workflow tool provider provider = WorkflowToolProvider( - id=str(uuidv7()), name=fake.company(), description=fake.text(max_nb_chars=100), icon='{"background": "#FF6B6B", "content": "🔧"}', diff --git a/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py index 68e485107c..f1530bcac6 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_add_document_to_index_task.py @@ -256,7 +256,7 @@ class TestAddDocumentToIndexTask: """ # Arrange: Use non-existent document ID fake = Faker() - non_existent_id = fake.uuid4() + non_existent_id = str(fake.uuid4()) # Act: Execute the task with non-existent document add_document_to_index_task(non_existent_id) @@ -282,7 +282,7 @@ class TestAddDocumentToIndexTask: - Redis cache key not affected """ # Arrange: Create test data with invalid indexing status - dataset, document = self._create_test_dataset_and_document( + _, document = self._create_test_dataset_and_document( db_session_with_containers, mock_external_service_dependencies ) @@ -417,15 +417,15 @@ class TestAddDocumentToIndexTask: # Verify redis cache was cleared assert redis_client.exists(indexing_cache_key) == 0 - def test_add_document_to_index_with_no_segments_to_process( + def test_add_document_to_index_with_already_enabled_segments( self, db_session_with_containers, mock_external_service_dependencies ): """ - Test document indexing when no segments need processing. + Test document indexing when segments are already enabled. This test verifies: - - Proper handling when all segments are already enabled - - Index processing still occurs but with empty documents list + - Segments with status="completed" are processed regardless of enabled status + - Index processing occurs with all completed segments - Auto disable log deletion still occurs - Redis cache is cleared """ @@ -465,15 +465,16 @@ class TestAddDocumentToIndexTask: # Act: Execute the task add_document_to_index_task(document.id) - # Assert: Verify index processing occurred but with empty documents list + # Assert: Verify index processing occurred with all completed segments mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX) mock_external_service_dependencies["index_processor"].load.assert_called_once() - # Verify the load method was called with empty documents list + # Verify the load method was called with all completed segments + # (implementation doesn't filter by enabled status, only by status="completed") call_args = mock_external_service_dependencies["index_processor"].load.call_args assert call_args is not None documents = call_args[0][1] # Second argument should be documents list - assert len(documents) == 0 # No segments to process + assert len(documents) == 3 # All completed segments are processed # Verify redis cache was cleared assert redis_client.exists(indexing_cache_key) == 0 @@ -499,7 +500,7 @@ class TestAddDocumentToIndexTask: # Create some auto disable log entries fake = Faker() auto_disable_logs = [] - for i in range(2): + for _ in range(2): log_entry = DatasetAutoDisableLog( id=fake.uuid4(), tenant_id=document.tenant_id, @@ -595,9 +596,11 @@ class TestAddDocumentToIndexTask: Test segment filtering with various edge cases. This test verifies: - - Only segments with enabled=False and status="completed" are processed + - Only segments with status="completed" are processed (regardless of enabled status) + - Segments with status!="completed" are NOT processed - Segments are ordered by position correctly - Mixed segment states are handled properly + - All segments are updated to enabled=True after processing - Redis cache key deletion """ # Arrange: Create test data @@ -628,7 +631,8 @@ class TestAddDocumentToIndexTask: db.session.add(segment1) segments.append(segment1) - # Segment 2: Should NOT be processed (enabled=True, status="completed") + # Segment 2: Should be processed (enabled=True, status="completed") + # Note: Implementation doesn't filter by enabled status, only by status="completed" segment2 = DocumentSegment( id=fake.uuid4(), tenant_id=document.tenant_id, @@ -640,7 +644,7 @@ class TestAddDocumentToIndexTask: tokens=len(fake.text(max_nb_chars=200).split()) * 2, index_node_id="node_1", index_node_hash="hash_1", - enabled=True, # Already enabled + enabled=True, # Already enabled, but will still be processed status="completed", created_by=document.created_by, ) @@ -702,11 +706,14 @@ class TestAddDocumentToIndexTask: call_args = mock_external_service_dependencies["index_processor"].load.call_args assert call_args is not None documents = call_args[0][1] # Second argument should be documents list - assert len(documents) == 2 # Only 2 segments should be processed + assert len(documents) == 3 # 3 segments with status="completed" should be processed # Verify correct segments were processed (by position order) - assert documents[0].metadata["doc_id"] == "node_0" # position 0 - assert documents[1].metadata["doc_id"] == "node_3" # position 3 + # Segments 1, 2, 4 should be processed (positions 0, 1, 3) + # Segment 3 is skipped (position 2, status="processing") + assert documents[0].metadata["doc_id"] == "node_0" # segment1, position 0 + assert documents[1].metadata["doc_id"] == "node_1" # segment2, position 1 + assert documents[2].metadata["doc_id"] == "node_3" # segment4, position 3 # Verify database state changes db.session.refresh(document) @@ -717,7 +724,7 @@ class TestAddDocumentToIndexTask: # All segments should be enabled because the task updates ALL segments for the document assert segment1.enabled is True - assert segment2.enabled is True # Was already enabled, now updated to True + assert segment2.enabled is True # Was already enabled, stays True assert segment3.enabled is True # Was not processed but still updated to True assert segment4.enabled is True diff --git a/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py b/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py index 448f6da5ec..c015d7ec9c 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py @@ -1,16 +1,33 @@ +from dataclasses import asdict from unittest.mock import MagicMock, patch import pytest from faker import Faker +from core.entities.document_task import DocumentTask +from enums.cloud_plan import CloudPlan from extensions.ext_database import db from models import Account, Tenant, TenantAccountJoin, TenantAccountRole from models.dataset import Dataset, Document -from tasks.document_indexing_task import document_indexing_task +from tasks.document_indexing_task import ( + _document_indexing, # Core function + _document_indexing_with_tenant_queue, # Tenant queue wrapper function + document_indexing_task, # Deprecated old interface + normal_document_indexing_task, # New normal task + priority_document_indexing_task, # New priority task +) -class TestDocumentIndexingTask: - """Integration tests for document_indexing_task using testcontainers.""" +class TestDocumentIndexingTasks: + """Integration tests for document indexing tasks using testcontainers. + + This test class covers: + - Core _document_indexing function + - Deprecated document_indexing_task function + - New normal_document_indexing_task function + - New priority_document_indexing_task function + - Tenant queue wrapper _document_indexing_with_tenant_queue function + """ @pytest.fixture def mock_external_service_dependencies(self): @@ -197,7 +214,7 @@ class TestDocumentIndexingTask: # Configure billing features mock_external_service_dependencies["features"].billing.enabled = billing_enabled if billing_enabled: - mock_external_service_dependencies["features"].billing.subscription.plan = "sandbox" + mock_external_service_dependencies["features"].billing.subscription.plan = CloudPlan.SANDBOX mock_external_service_dependencies["features"].vector_space.limit = 100 mock_external_service_dependencies["features"].vector_space.size = 50 @@ -223,7 +240,7 @@ class TestDocumentIndexingTask: document_ids = [doc.id for doc in documents] # Act: Execute the task - document_indexing_task(dataset.id, document_ids) + _document_indexing(dataset.id, document_ids) # Assert: Verify the expected outcomes # Verify indexing runner was called correctly @@ -231,10 +248,11 @@ class TestDocumentIndexingTask: mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() # Verify documents were updated to parsing status - for document in documents: - db.session.refresh(document) - assert document.indexing_status == "parsing" - assert document.processing_started_at is not None + # Re-query documents from database since _document_indexing uses a different session + for doc_id in document_ids: + updated_document = db.session.query(Document).where(Document.id == doc_id).first() + assert updated_document.indexing_status == "parsing" + assert updated_document.processing_started_at is not None # Verify the run method was called with correct documents call_args = mock_external_service_dependencies["indexing_runner_instance"].run.call_args @@ -260,7 +278,7 @@ class TestDocumentIndexingTask: document_ids = [fake.uuid4() for _ in range(3)] # Act: Execute the task with non-existent dataset - document_indexing_task(non_existent_dataset_id, document_ids) + _document_indexing(non_existent_dataset_id, document_ids) # Assert: Verify no processing occurred mock_external_service_dependencies["indexing_runner"].assert_not_called() @@ -290,17 +308,18 @@ class TestDocumentIndexingTask: all_document_ids = existing_document_ids + non_existent_document_ids # Act: Execute the task with mixed document IDs - document_indexing_task(dataset.id, all_document_ids) + _document_indexing(dataset.id, all_document_ids) # Assert: Verify only existing documents were processed mock_external_service_dependencies["indexing_runner"].assert_called_once() mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() # Verify only existing documents were updated - for document in documents: - db.session.refresh(document) - assert document.indexing_status == "parsing" - assert document.processing_started_at is not None + # Re-query documents from database since _document_indexing uses a different session + for doc_id in existing_document_ids: + updated_document = db.session.query(Document).where(Document.id == doc_id).first() + assert updated_document.indexing_status == "parsing" + assert updated_document.processing_started_at is not None # Verify the run method was called with only existing documents call_args = mock_external_service_dependencies["indexing_runner_instance"].run.call_args @@ -332,7 +351,7 @@ class TestDocumentIndexingTask: ) # Act: Execute the task - document_indexing_task(dataset.id, document_ids) + _document_indexing(dataset.id, document_ids) # Assert: Verify exception was handled gracefully # The task should complete without raising exceptions @@ -340,10 +359,11 @@ class TestDocumentIndexingTask: mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() # Verify documents were still updated to parsing status before the exception - for document in documents: - db.session.refresh(document) - assert document.indexing_status == "parsing" - assert document.processing_started_at is not None + # Re-query documents from database since _document_indexing close the session + for doc_id in document_ids: + updated_document = db.session.query(Document).where(Document.id == doc_id).first() + assert updated_document.indexing_status == "parsing" + assert updated_document.processing_started_at is not None def test_document_indexing_task_mixed_document_states( self, db_session_with_containers, mock_external_service_dependencies @@ -406,17 +426,18 @@ class TestDocumentIndexingTask: document_ids = [doc.id for doc in all_documents] # Act: Execute the task with mixed document states - document_indexing_task(dataset.id, document_ids) + _document_indexing(dataset.id, document_ids) # Assert: Verify processing mock_external_service_dependencies["indexing_runner"].assert_called_once() mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() # Verify all documents were updated to parsing status - for document in all_documents: - db.session.refresh(document) - assert document.indexing_status == "parsing" - assert document.processing_started_at is not None + # Re-query documents from database since _document_indexing uses a different session + for doc_id in document_ids: + updated_document = db.session.query(Document).where(Document.id == doc_id).first() + assert updated_document.indexing_status == "parsing" + assert updated_document.processing_started_at is not None # Verify the run method was called with all documents call_args = mock_external_service_dependencies["indexing_runner_instance"].run.call_args @@ -442,7 +463,7 @@ class TestDocumentIndexingTask: ) # Configure sandbox plan with batch limit - mock_external_service_dependencies["features"].billing.subscription.plan = "sandbox" + mock_external_service_dependencies["features"].billing.subscription.plan = CloudPlan.SANDBOX # Create more documents than sandbox plan allows (limit is 1) fake = Faker() @@ -469,15 +490,16 @@ class TestDocumentIndexingTask: document_ids = [doc.id for doc in all_documents] # Act: Execute the task with too many documents for sandbox plan - document_indexing_task(dataset.id, document_ids) + _document_indexing(dataset.id, document_ids) # Assert: Verify error handling - for document in all_documents: - db.session.refresh(document) - assert document.indexing_status == "error" - assert document.error is not None - assert "batch upload" in document.error - assert document.stopped_at is not None + # Re-query documents from database since _document_indexing uses a different session + for doc_id in document_ids: + updated_document = db.session.query(Document).where(Document.id == doc_id).first() + assert updated_document.indexing_status == "error" + assert updated_document.error is not None + assert "batch upload" in updated_document.error + assert updated_document.stopped_at is not None # Verify no indexing runner was called mock_external_service_dependencies["indexing_runner"].assert_not_called() @@ -502,17 +524,18 @@ class TestDocumentIndexingTask: document_ids = [doc.id for doc in documents] # Act: Execute the task with billing disabled - document_indexing_task(dataset.id, document_ids) + _document_indexing(dataset.id, document_ids) # Assert: Verify successful processing mock_external_service_dependencies["indexing_runner"].assert_called_once() mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() # Verify documents were updated to parsing status - for document in documents: - db.session.refresh(document) - assert document.indexing_status == "parsing" - assert document.processing_started_at is not None + # Re-query documents from database since _document_indexing uses a different session + for doc_id in document_ids: + updated_document = db.session.query(Document).where(Document.id == doc_id).first() + assert updated_document.indexing_status == "parsing" + assert updated_document.processing_started_at is not None def test_document_indexing_task_document_is_paused_error( self, db_session_with_containers, mock_external_service_dependencies @@ -540,7 +563,7 @@ class TestDocumentIndexingTask: ) # Act: Execute the task - document_indexing_task(dataset.id, document_ids) + _document_indexing(dataset.id, document_ids) # Assert: Verify exception was handled gracefully # The task should complete without raising exceptions @@ -548,7 +571,317 @@ class TestDocumentIndexingTask: mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() # Verify documents were still updated to parsing status before the exception - for document in documents: - db.session.refresh(document) - assert document.indexing_status == "parsing" - assert document.processing_started_at is not None + # Re-query documents from database since _document_indexing uses a different session + for doc_id in document_ids: + updated_document = db.session.query(Document).where(Document.id == doc_id).first() + assert updated_document.indexing_status == "parsing" + assert updated_document.processing_started_at is not None + + # ==================== NEW TESTS FOR REFACTORED FUNCTIONS ==================== + def test_old_document_indexing_task_success(self, db_session_with_containers, mock_external_service_dependencies): + """ + Test document_indexing_task basic functionality. + + This test verifies: + - Task function calls the wrapper correctly + - Basic parameter passing works + """ + # Arrange: Create test data + dataset, documents = self._create_test_dataset_and_documents( + db_session_with_containers, mock_external_service_dependencies, document_count=1 + ) + document_ids = [doc.id for doc in documents] + + # Act: Execute the deprecated task (it only takes 2 parameters) + document_indexing_task(dataset.id, document_ids) + + # Assert: Verify processing occurred (core logic is tested in _document_indexing tests) + mock_external_service_dependencies["indexing_runner"].assert_called_once() + mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() + + def test_normal_document_indexing_task_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test normal_document_indexing_task basic functionality. + + This test verifies: + - Task function calls the wrapper correctly + - Basic parameter passing works + """ + # Arrange: Create test data + dataset, documents = self._create_test_dataset_and_documents( + db_session_with_containers, mock_external_service_dependencies, document_count=1 + ) + document_ids = [doc.id for doc in documents] + tenant_id = dataset.tenant_id + + # Act: Execute the new normal task + normal_document_indexing_task(tenant_id, dataset.id, document_ids) + + # Assert: Verify processing occurred (core logic is tested in _document_indexing tests) + mock_external_service_dependencies["indexing_runner"].assert_called_once() + mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() + + def test_priority_document_indexing_task_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test priority_document_indexing_task basic functionality. + + This test verifies: + - Task function calls the wrapper correctly + - Basic parameter passing works + """ + # Arrange: Create test data + dataset, documents = self._create_test_dataset_and_documents( + db_session_with_containers, mock_external_service_dependencies, document_count=1 + ) + document_ids = [doc.id for doc in documents] + tenant_id = dataset.tenant_id + + # Act: Execute the new priority task + priority_document_indexing_task(tenant_id, dataset.id, document_ids) + + # Assert: Verify processing occurred (core logic is tested in _document_indexing tests) + mock_external_service_dependencies["indexing_runner"].assert_called_once() + mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() + + def test_document_indexing_with_tenant_queue_success( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test _document_indexing_with_tenant_queue function with no waiting tasks. + + This test verifies: + - Core indexing logic execution (same as _document_indexing) + - Tenant queue cleanup when no waiting tasks + - Task function parameter passing + - Queue management after processing + """ + # Arrange: Create test data + dataset, documents = self._create_test_dataset_and_documents( + db_session_with_containers, mock_external_service_dependencies, document_count=2 + ) + document_ids = [doc.id for doc in documents] + tenant_id = dataset.tenant_id + + # Mock the task function + from unittest.mock import MagicMock + + mock_task_func = MagicMock() + + # Act: Execute the wrapper function + _document_indexing_with_tenant_queue(tenant_id, dataset.id, document_ids, mock_task_func) + + # Assert: Verify core processing occurred (same as _document_indexing) + mock_external_service_dependencies["indexing_runner"].assert_called_once() + mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() + + # Verify documents were updated (same as _document_indexing) + # Re-query documents from database since _document_indexing uses a different session + for doc_id in document_ids: + updated_document = db.session.query(Document).where(Document.id == doc_id).first() + assert updated_document.indexing_status == "parsing" + assert updated_document.processing_started_at is not None + + # Verify the run method was called with correct documents + call_args = mock_external_service_dependencies["indexing_runner_instance"].run.call_args + assert call_args is not None + processed_documents = call_args[0][0] + assert len(processed_documents) == 2 + + # Verify task function was not called (no waiting tasks) + mock_task_func.delay.assert_not_called() + + def test_document_indexing_with_tenant_queue_with_waiting_tasks( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test _document_indexing_with_tenant_queue function with waiting tasks in queue using real Redis. + + This test verifies: + - Core indexing logic execution + - Real Redis-based tenant queue processing of waiting tasks + - Task function calls for waiting tasks + - Queue management with multiple tasks using actual Redis operations + """ + # Arrange: Create test data + dataset, documents = self._create_test_dataset_and_documents( + db_session_with_containers, mock_external_service_dependencies, document_count=1 + ) + document_ids = [doc.id for doc in documents] + tenant_id = dataset.tenant_id + dataset_id = dataset.id + + # Mock the task function + from unittest.mock import MagicMock + + mock_task_func = MagicMock() + + # Use real Redis for TenantIsolatedTaskQueue + from core.rag.pipeline.queue import TenantIsolatedTaskQueue + + # Create real queue instance + queue = TenantIsolatedTaskQueue(tenant_id, "document_indexing") + + # Add waiting tasks to the real Redis queue + waiting_tasks = [ + DocumentTask(tenant_id=tenant_id, dataset_id=dataset.id, document_ids=["waiting-doc-1"]), + DocumentTask(tenant_id=tenant_id, dataset_id=dataset.id, document_ids=["waiting-doc-2"]), + ] + # Convert DocumentTask objects to dictionaries for serialization + waiting_task_dicts = [asdict(task) for task in waiting_tasks] + queue.push_tasks(waiting_task_dicts) + + # Act: Execute the wrapper function + _document_indexing_with_tenant_queue(tenant_id, dataset.id, document_ids, mock_task_func) + + # Assert: Verify core processing occurred + mock_external_service_dependencies["indexing_runner"].assert_called_once() + mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() + + # Verify task function was called for each waiting task + assert mock_task_func.delay.call_count == 1 + + # Verify correct parameters for each call + calls = mock_task_func.delay.call_args_list + assert calls[0][1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]} + + # Verify queue is empty after processing (tasks were pulled) + remaining_tasks = queue.pull_tasks(count=10) # Pull more than we added + assert len(remaining_tasks) == 1 + + def test_document_indexing_with_tenant_queue_error_handling( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test error handling in _document_indexing_with_tenant_queue using real Redis. + + This test verifies: + - Exception handling during core processing + - Tenant queue cleanup even on errors using real Redis + - Proper error logging + - Function completes without raising exceptions + - Queue management continues despite core processing errors + """ + # Arrange: Create test data + dataset, documents = self._create_test_dataset_and_documents( + db_session_with_containers, mock_external_service_dependencies, document_count=1 + ) + document_ids = [doc.id for doc in documents] + tenant_id = dataset.tenant_id + dataset_id = dataset.id + + # Mock IndexingRunner to raise an exception + mock_external_service_dependencies["indexing_runner_instance"].run.side_effect = Exception("Test error") + + # Mock the task function + from unittest.mock import MagicMock + + mock_task_func = MagicMock() + + # Use real Redis for TenantIsolatedTaskQueue + from core.rag.pipeline.queue import TenantIsolatedTaskQueue + + # Create real queue instance + queue = TenantIsolatedTaskQueue(tenant_id, "document_indexing") + + # Add waiting task to the real Redis queue + waiting_task = DocumentTask(tenant_id=tenant_id, dataset_id=dataset.id, document_ids=["waiting-doc-1"]) + queue.push_tasks([asdict(waiting_task)]) + + # Act: Execute the wrapper function + _document_indexing_with_tenant_queue(tenant_id, dataset.id, document_ids, mock_task_func) + + # Assert: Verify error was handled gracefully + # The function should not raise exceptions + mock_external_service_dependencies["indexing_runner"].assert_called_once() + mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() + + # Verify documents were still updated to parsing status before the exception + # Re-query documents from database since _document_indexing uses a different session + for doc_id in document_ids: + updated_document = db.session.query(Document).where(Document.id == doc_id).first() + assert updated_document.indexing_status == "parsing" + assert updated_document.processing_started_at is not None + + # Verify waiting task was still processed despite core processing error + mock_task_func.delay.assert_called_once() + + # Verify correct parameters for the call + call = mock_task_func.delay.call_args + assert call[1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]} + + # Verify queue is empty after processing (task was pulled) + remaining_tasks = queue.pull_tasks(count=10) + assert len(remaining_tasks) == 0 + + def test_document_indexing_with_tenant_queue_tenant_isolation( + self, db_session_with_containers, mock_external_service_dependencies + ): + """ + Test tenant isolation in _document_indexing_with_tenant_queue using real Redis. + + This test verifies: + - Different tenants have isolated queues + - Tasks from one tenant don't affect another tenant's queue + - Queue operations are properly scoped to tenant + """ + # Arrange: Create test data for two different tenants + dataset1, documents1 = self._create_test_dataset_and_documents( + db_session_with_containers, mock_external_service_dependencies, document_count=1 + ) + dataset2, documents2 = self._create_test_dataset_and_documents( + db_session_with_containers, mock_external_service_dependencies, document_count=1 + ) + + tenant1_id = dataset1.tenant_id + tenant2_id = dataset2.tenant_id + dataset1_id = dataset1.id + dataset2_id = dataset2.id + document_ids1 = [doc.id for doc in documents1] + document_ids2 = [doc.id for doc in documents2] + + # Mock the task function + from unittest.mock import MagicMock + + mock_task_func = MagicMock() + + # Use real Redis for TenantIsolatedTaskQueue + from core.rag.pipeline.queue import TenantIsolatedTaskQueue + + # Create queue instances for both tenants + queue1 = TenantIsolatedTaskQueue(tenant1_id, "document_indexing") + queue2 = TenantIsolatedTaskQueue(tenant2_id, "document_indexing") + + # Add waiting tasks to both queues + waiting_task1 = DocumentTask(tenant_id=tenant1_id, dataset_id=dataset1.id, document_ids=["tenant1-doc-1"]) + waiting_task2 = DocumentTask(tenant_id=tenant2_id, dataset_id=dataset2.id, document_ids=["tenant2-doc-1"]) + + queue1.push_tasks([asdict(waiting_task1)]) + queue2.push_tasks([asdict(waiting_task2)]) + + # Act: Execute the wrapper function for tenant1 only + _document_indexing_with_tenant_queue(tenant1_id, dataset1.id, document_ids1, mock_task_func) + + # Assert: Verify core processing occurred for tenant1 + mock_external_service_dependencies["indexing_runner"].assert_called_once() + mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() + + # Verify only tenant1's waiting task was processed + mock_task_func.delay.assert_called_once() + call = mock_task_func.delay.call_args + assert call[1] == {"tenant_id": tenant1_id, "dataset_id": dataset1_id, "document_ids": ["tenant1-doc-1"]} + + # Verify tenant1's queue is empty + remaining_tasks1 = queue1.pull_tasks(count=10) + assert len(remaining_tasks1) == 0 + + # Verify tenant2's queue still has its task (isolation) + remaining_tasks2 = queue2.pull_tasks(count=10) + assert len(remaining_tasks2) == 1 + + # Verify queue keys are different + assert queue1._queue != queue2._queue + assert queue1._task_key != queue2._task_key diff --git a/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py b/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py new file mode 100644 index 0000000000..c82162238c --- /dev/null +++ b/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py @@ -0,0 +1,936 @@ +import json +import uuid +from unittest.mock import patch + +import pytest +from faker import Faker + +from core.app.entities.app_invoke_entities import InvokeFrom, RagPipelineGenerateEntity +from core.app.entities.rag_pipeline_invoke_entities import RagPipelineInvokeEntity +from core.rag.pipeline.queue import TenantIsolatedTaskQueue +from extensions.ext_database import db +from models import Account, Tenant, TenantAccountJoin, TenantAccountRole +from models.dataset import Pipeline +from models.workflow import Workflow +from tasks.rag_pipeline.priority_rag_pipeline_run_task import ( + priority_rag_pipeline_run_task, + run_single_rag_pipeline_task, +) +from tasks.rag_pipeline.rag_pipeline_run_task import rag_pipeline_run_task + + +class TestRagPipelineRunTasks: + """Integration tests for RAG pipeline run tasks using testcontainers. + + This test class covers: + - priority_rag_pipeline_run_task function + - rag_pipeline_run_task function + - run_single_rag_pipeline_task function + - Real Redis-based TenantIsolatedTaskQueue operations + - PipelineGenerator._generate method mocking and parameter validation + - File operations and cleanup + - Error handling and queue management + """ + + @pytest.fixture + def mock_pipeline_generator(self): + """Mock PipelineGenerator._generate method.""" + with patch("core.app.apps.pipeline.pipeline_generator.PipelineGenerator._generate") as mock_generate: + # Mock the _generate method to return a simple response + mock_generate.return_value = {"answer": "Test response", "metadata": {"test": "data"}} + yield mock_generate + + @pytest.fixture + def mock_file_service(self): + """Mock FileService for file operations.""" + with ( + patch("services.file_service.FileService.get_file_content") as mock_get_content, + patch("services.file_service.FileService.delete_file") as mock_delete_file, + ): + yield { + "get_content": mock_get_content, + "delete_file": mock_delete_file, + } + + def _create_test_pipeline_and_workflow(self, db_session_with_containers): + """ + Helper method to create test pipeline and workflow for testing. + + Args: + db_session_with_containers: Database session from testcontainers infrastructure + + Returns: + tuple: (account, tenant, pipeline, workflow) - Created entities + """ + fake = Faker() + + # Create account and tenant + account = Account( + email=fake.email(), + name=fake.name(), + interface_language="en-US", + status="active", + ) + db.session.add(account) + db.session.commit() + + tenant = Tenant( + name=fake.company(), + status="normal", + ) + db.session.add(tenant) + db.session.commit() + + # Create tenant-account join + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER, + current=True, + ) + db.session.add(join) + db.session.commit() + + # Create workflow + workflow = Workflow( + id=str(uuid.uuid4()), + tenant_id=tenant.id, + app_id=str(uuid.uuid4()), + type="workflow", + version="draft", + graph="{}", + features="{}", + marked_name=fake.company(), + marked_comment=fake.text(max_nb_chars=100), + created_by=account.id, + environment_variables=[], + conversation_variables=[], + rag_pipeline_variables=[], + ) + db.session.add(workflow) + db.session.commit() + + # Create pipeline + pipeline = Pipeline( + id=str(uuid.uuid4()), + tenant_id=tenant.id, + workflow_id=workflow.id, + name=fake.company(), + description=fake.text(max_nb_chars=100), + created_by=account.id, + ) + db.session.add(pipeline) + db.session.commit() + + # Refresh entities to ensure they're properly loaded + db.session.refresh(account) + db.session.refresh(tenant) + db.session.refresh(workflow) + db.session.refresh(pipeline) + + return account, tenant, pipeline, workflow + + def _create_rag_pipeline_invoke_entities(self, account, tenant, pipeline, workflow, count=2): + """ + Helper method to create RAG pipeline invoke entities for testing. + + Args: + account: Account instance + tenant: Tenant instance + pipeline: Pipeline instance + workflow: Workflow instance + count: Number of entities to create + + Returns: + list: List of RagPipelineInvokeEntity instances + """ + fake = Faker() + entities = [] + + for i in range(count): + # Create application generate entity + app_config = { + "app_id": str(uuid.uuid4()), + "app_name": fake.company(), + "mode": "workflow", + "workflow_id": workflow.id, + "tenant_id": tenant.id, + "app_mode": "workflow", + } + + application_generate_entity = { + "task_id": str(uuid.uuid4()), + "app_config": app_config, + "inputs": {"query": f"Test query {i}"}, + "files": [], + "user_id": account.id, + "stream": False, + "invoke_from": "published", + "workflow_execution_id": str(uuid.uuid4()), + "pipeline_config": { + "app_id": str(uuid.uuid4()), + "app_name": fake.company(), + "mode": "workflow", + "workflow_id": workflow.id, + "tenant_id": tenant.id, + "app_mode": "workflow", + }, + "datasource_type": "upload_file", + "datasource_info": {}, + "dataset_id": str(uuid.uuid4()), + "batch": "test_batch", + } + + entity = RagPipelineInvokeEntity( + pipeline_id=pipeline.id, + application_generate_entity=application_generate_entity, + user_id=account.id, + tenant_id=tenant.id, + workflow_id=workflow.id, + streaming=False, + workflow_execution_id=str(uuid.uuid4()), + workflow_thread_pool_id=str(uuid.uuid4()), + ) + entities.append(entity) + + return entities + + def _create_file_content_for_entities(self, entities): + """ + Helper method to create file content for RAG pipeline invoke entities. + + Args: + entities: List of RagPipelineInvokeEntity instances + + Returns: + str: JSON string containing serialized entities + """ + entities_data = [entity.model_dump() for entity in entities] + return json.dumps(entities_data) + + def test_priority_rag_pipeline_run_task_success( + self, db_session_with_containers, mock_pipeline_generator, mock_file_service + ): + """ + Test successful priority RAG pipeline run task execution. + + This test verifies: + - Task execution with multiple RAG pipeline invoke entities + - File content retrieval and parsing + - PipelineGenerator._generate method calls with correct parameters + - Thread pool execution + - File cleanup after execution + - Queue management with no waiting tasks + """ + # Arrange: Create test data + account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers) + entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=2) + file_content = self._create_file_content_for_entities(entities) + + # Mock file service + file_id = str(uuid.uuid4()) + mock_file_service["get_content"].return_value = file_content + + # Act: Execute the priority task + priority_rag_pipeline_run_task(file_id, tenant.id) + + # Assert: Verify expected outcomes + # Verify file operations + mock_file_service["get_content"].assert_called_once_with(file_id) + mock_file_service["delete_file"].assert_called_once_with(file_id) + + # Verify PipelineGenerator._generate was called for each entity + assert mock_pipeline_generator.call_count == 2 + + # Verify call parameters for each entity + calls = mock_pipeline_generator.call_args_list + for call in calls: + call_kwargs = call[1] # Get keyword arguments + assert call_kwargs["pipeline"].id == pipeline.id + assert call_kwargs["workflow_id"] == workflow.id + assert call_kwargs["user"].id == account.id + assert call_kwargs["invoke_from"] == InvokeFrom.PUBLISHED + assert call_kwargs["streaming"] == False + assert isinstance(call_kwargs["application_generate_entity"], RagPipelineGenerateEntity) + + def test_rag_pipeline_run_task_success( + self, db_session_with_containers, mock_pipeline_generator, mock_file_service + ): + """ + Test successful regular RAG pipeline run task execution. + + This test verifies: + - Task execution with multiple RAG pipeline invoke entities + - File content retrieval and parsing + - PipelineGenerator._generate method calls with correct parameters + - Thread pool execution + - File cleanup after execution + - Queue management with no waiting tasks + """ + # Arrange: Create test data + account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers) + entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=3) + file_content = self._create_file_content_for_entities(entities) + + # Mock file service + file_id = str(uuid.uuid4()) + mock_file_service["get_content"].return_value = file_content + + # Act: Execute the regular task + rag_pipeline_run_task(file_id, tenant.id) + + # Assert: Verify expected outcomes + # Verify file operations + mock_file_service["get_content"].assert_called_once_with(file_id) + mock_file_service["delete_file"].assert_called_once_with(file_id) + + # Verify PipelineGenerator._generate was called for each entity + assert mock_pipeline_generator.call_count == 3 + + # Verify call parameters for each entity + calls = mock_pipeline_generator.call_args_list + for call in calls: + call_kwargs = call[1] # Get keyword arguments + assert call_kwargs["pipeline"].id == pipeline.id + assert call_kwargs["workflow_id"] == workflow.id + assert call_kwargs["user"].id == account.id + assert call_kwargs["invoke_from"] == InvokeFrom.PUBLISHED + assert call_kwargs["streaming"] == False + assert isinstance(call_kwargs["application_generate_entity"], RagPipelineGenerateEntity) + + def test_priority_rag_pipeline_run_task_with_waiting_tasks( + self, db_session_with_containers, mock_pipeline_generator, mock_file_service + ): + """ + Test priority RAG pipeline run task with waiting tasks in queue using real Redis. + + This test verifies: + - Core task execution + - Real Redis-based tenant queue processing of waiting tasks + - Task function calls for waiting tasks + - Queue management with multiple tasks using actual Redis operations + """ + # Arrange: Create test data + account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers) + entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1) + file_content = self._create_file_content_for_entities(entities) + + # Mock file service + file_id = str(uuid.uuid4()) + mock_file_service["get_content"].return_value = file_content + + # Use real Redis for TenantIsolatedTaskQueue + queue = TenantIsolatedTaskQueue(tenant.id, "pipeline") + + # Add waiting tasks to the real Redis queue + waiting_file_ids = [str(uuid.uuid4()) for _ in range(2)] + queue.push_tasks(waiting_file_ids) + + # Mock the task function calls + with patch( + "tasks.rag_pipeline.priority_rag_pipeline_run_task.priority_rag_pipeline_run_task.delay" + ) as mock_delay: + # Act: Execute the priority task + priority_rag_pipeline_run_task(file_id, tenant.id) + + # Assert: Verify core processing occurred + mock_file_service["get_content"].assert_called_once_with(file_id) + mock_file_service["delete_file"].assert_called_once_with(file_id) + assert mock_pipeline_generator.call_count == 1 + + # Verify waiting tasks were processed, pull 1 task a time by default + assert mock_delay.call_count == 1 + + # Verify correct parameters for the call + call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} + assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0] + assert call_kwargs.get("tenant_id") == tenant.id + + # Verify queue still has remaining tasks (only 1 was pulled) + remaining_tasks = queue.pull_tasks(count=10) + assert len(remaining_tasks) == 1 # 2 original - 1 pulled = 1 remaining + + def test_rag_pipeline_run_task_legacy_compatibility( + self, db_session_with_containers, mock_pipeline_generator, mock_file_service + ): + """ + Test regular RAG pipeline run task with legacy Redis queue format for backward compatibility. + + This test simulates the scenario where: + - Old code writes file IDs directly to Redis list using lpush + - New worker processes these legacy queue entries + - Ensures backward compatibility during deployment transition + + Legacy format: redis_client.lpush(tenant_self_pipeline_task_queue, upload_file.id) + New format: TenantIsolatedTaskQueue.push_tasks([file_id]) + """ + # Arrange: Create test data + account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers) + entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1) + file_content = self._create_file_content_for_entities(entities) + + # Mock file service + file_id = str(uuid.uuid4()) + mock_file_service["get_content"].return_value = file_content + + # Simulate legacy Redis queue format - direct file IDs in Redis list + from extensions.ext_redis import redis_client + + # Legacy queue key format (old code) + legacy_queue_key = f"tenant_self_pipeline_task_queue:{tenant.id}" + legacy_task_key = f"tenant_pipeline_task:{tenant.id}" + + # Add legacy format data to Redis (simulating old code behavior) + legacy_file_ids = [str(uuid.uuid4()) for _ in range(3)] + for file_id_legacy in legacy_file_ids: + redis_client.lpush(legacy_queue_key, file_id_legacy) + + # Set the task key to indicate there are waiting tasks (legacy behavior) + redis_client.set(legacy_task_key, 1, ex=60 * 60) + + # Mock the task function calls + with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay: + # Act: Execute the priority task with new code but legacy queue data + rag_pipeline_run_task(file_id, tenant.id) + + # Assert: Verify core processing occurred + mock_file_service["get_content"].assert_called_once_with(file_id) + mock_file_service["delete_file"].assert_called_once_with(file_id) + assert mock_pipeline_generator.call_count == 1 + + # Verify waiting tasks were processed, pull 1 task a time by default + assert mock_delay.call_count == 1 + + # Verify correct parameters for the call + call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} + assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == legacy_file_ids[0] + assert call_kwargs.get("tenant_id") == tenant.id + + # Verify that new code can process legacy queue entries + # The new TenantIsolatedTaskQueue should be able to read from the legacy format + queue = TenantIsolatedTaskQueue(tenant.id, "pipeline") + + # Verify queue still has remaining tasks (only 1 was pulled) + remaining_tasks = queue.pull_tasks(count=10) + assert len(remaining_tasks) == 2 # 3 original - 1 pulled = 2 remaining + + # Cleanup: Remove legacy test data + redis_client.delete(legacy_queue_key) + redis_client.delete(legacy_task_key) + + def test_rag_pipeline_run_task_with_waiting_tasks( + self, db_session_with_containers, mock_pipeline_generator, mock_file_service + ): + """ + Test regular RAG pipeline run task with waiting tasks in queue using real Redis. + + This test verifies: + - Core task execution + - Real Redis-based tenant queue processing of waiting tasks + - Task function calls for waiting tasks + - Queue management with multiple tasks using actual Redis operations + """ + # Arrange: Create test data + account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers) + entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1) + file_content = self._create_file_content_for_entities(entities) + + # Mock file service + file_id = str(uuid.uuid4()) + mock_file_service["get_content"].return_value = file_content + + # Use real Redis for TenantIsolatedTaskQueue + queue = TenantIsolatedTaskQueue(tenant.id, "pipeline") + + # Add waiting tasks to the real Redis queue + waiting_file_ids = [str(uuid.uuid4()) for _ in range(3)] + queue.push_tasks(waiting_file_ids) + + # Mock the task function calls + with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay: + # Act: Execute the regular task + rag_pipeline_run_task(file_id, tenant.id) + + # Assert: Verify core processing occurred + mock_file_service["get_content"].assert_called_once_with(file_id) + mock_file_service["delete_file"].assert_called_once_with(file_id) + assert mock_pipeline_generator.call_count == 1 + + # Verify waiting tasks were processed, pull 1 task a time by default + assert mock_delay.call_count == 1 + + # Verify correct parameters for the call + call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} + assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0] + assert call_kwargs.get("tenant_id") == tenant.id + + # Verify queue still has remaining tasks (only 1 was pulled) + remaining_tasks = queue.pull_tasks(count=10) + assert len(remaining_tasks) == 2 # 3 original - 1 pulled = 2 remaining + + def test_priority_rag_pipeline_run_task_error_handling( + self, db_session_with_containers, mock_pipeline_generator, mock_file_service + ): + """ + Test error handling in priority RAG pipeline run task using real Redis. + + This test verifies: + - Exception handling during core processing + - Tenant queue cleanup even on errors using real Redis + - Proper error logging + - Function completes without raising exceptions + - Queue management continues despite core processing errors + """ + # Arrange: Create test data + account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers) + entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1) + file_content = self._create_file_content_for_entities(entities) + + # Mock file service + file_id = str(uuid.uuid4()) + mock_file_service["get_content"].return_value = file_content + + # Mock PipelineGenerator to raise an exception + mock_pipeline_generator.side_effect = Exception("Pipeline generation failed") + + # Use real Redis for TenantIsolatedTaskQueue + queue = TenantIsolatedTaskQueue(tenant.id, "pipeline") + + # Add waiting task to the real Redis queue + waiting_file_id = str(uuid.uuid4()) + queue.push_tasks([waiting_file_id]) + + # Mock the task function calls + with patch( + "tasks.rag_pipeline.priority_rag_pipeline_run_task.priority_rag_pipeline_run_task.delay" + ) as mock_delay: + # Act: Execute the priority task (should not raise exception) + priority_rag_pipeline_run_task(file_id, tenant.id) + + # Assert: Verify error was handled gracefully + # The function should not raise exceptions + mock_file_service["get_content"].assert_called_once_with(file_id) + mock_file_service["delete_file"].assert_called_once_with(file_id) + assert mock_pipeline_generator.call_count == 1 + + # Verify waiting task was still processed despite core processing error + mock_delay.assert_called_once() + + # Verify correct parameters for the call + call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} + assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id + assert call_kwargs.get("tenant_id") == tenant.id + + # Verify queue is empty after processing (task was pulled) + remaining_tasks = queue.pull_tasks(count=10) + assert len(remaining_tasks) == 0 + + def test_rag_pipeline_run_task_error_handling( + self, db_session_with_containers, mock_pipeline_generator, mock_file_service + ): + """ + Test error handling in regular RAG pipeline run task using real Redis. + + This test verifies: + - Exception handling during core processing + - Tenant queue cleanup even on errors using real Redis + - Proper error logging + - Function completes without raising exceptions + - Queue management continues despite core processing errors + """ + # Arrange: Create test data + account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers) + entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1) + file_content = self._create_file_content_for_entities(entities) + + # Mock file service + file_id = str(uuid.uuid4()) + mock_file_service["get_content"].return_value = file_content + + # Mock PipelineGenerator to raise an exception + mock_pipeline_generator.side_effect = Exception("Pipeline generation failed") + + # Use real Redis for TenantIsolatedTaskQueue + queue = TenantIsolatedTaskQueue(tenant.id, "pipeline") + + # Add waiting task to the real Redis queue + waiting_file_id = str(uuid.uuid4()) + queue.push_tasks([waiting_file_id]) + + # Mock the task function calls + with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay: + # Act: Execute the regular task (should not raise exception) + rag_pipeline_run_task(file_id, tenant.id) + + # Assert: Verify error was handled gracefully + # The function should not raise exceptions + mock_file_service["get_content"].assert_called_once_with(file_id) + mock_file_service["delete_file"].assert_called_once_with(file_id) + assert mock_pipeline_generator.call_count == 1 + + # Verify waiting task was still processed despite core processing error + mock_delay.assert_called_once() + + # Verify correct parameters for the call + call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} + assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id + assert call_kwargs.get("tenant_id") == tenant.id + + # Verify queue is empty after processing (task was pulled) + remaining_tasks = queue.pull_tasks(count=10) + assert len(remaining_tasks) == 0 + + def test_priority_rag_pipeline_run_task_tenant_isolation( + self, db_session_with_containers, mock_pipeline_generator, mock_file_service + ): + """ + Test tenant isolation in priority RAG pipeline run task using real Redis. + + This test verifies: + - Different tenants have isolated queues + - Tasks from one tenant don't affect another tenant's queue + - Queue operations are properly scoped to tenant + """ + # Arrange: Create test data for two different tenants + account1, tenant1, pipeline1, workflow1 = self._create_test_pipeline_and_workflow(db_session_with_containers) + account2, tenant2, pipeline2, workflow2 = self._create_test_pipeline_and_workflow(db_session_with_containers) + + entities1 = self._create_rag_pipeline_invoke_entities(account1, tenant1, pipeline1, workflow1, count=1) + entities2 = self._create_rag_pipeline_invoke_entities(account2, tenant2, pipeline2, workflow2, count=1) + + file_content1 = self._create_file_content_for_entities(entities1) + file_content2 = self._create_file_content_for_entities(entities2) + + # Mock file service + file_id1 = str(uuid.uuid4()) + file_id2 = str(uuid.uuid4()) + mock_file_service["get_content"].side_effect = [file_content1, file_content2] + + # Use real Redis for TenantIsolatedTaskQueue + queue1 = TenantIsolatedTaskQueue(tenant1.id, "pipeline") + queue2 = TenantIsolatedTaskQueue(tenant2.id, "pipeline") + + # Add waiting tasks to both queues + waiting_file_id1 = str(uuid.uuid4()) + waiting_file_id2 = str(uuid.uuid4()) + + queue1.push_tasks([waiting_file_id1]) + queue2.push_tasks([waiting_file_id2]) + + # Mock the task function calls + with patch( + "tasks.rag_pipeline.priority_rag_pipeline_run_task.priority_rag_pipeline_run_task.delay" + ) as mock_delay: + # Act: Execute the priority task for tenant1 only + priority_rag_pipeline_run_task(file_id1, tenant1.id) + + # Assert: Verify core processing occurred for tenant1 + assert mock_file_service["get_content"].call_count == 1 + assert mock_file_service["delete_file"].call_count == 1 + assert mock_pipeline_generator.call_count == 1 + + # Verify only tenant1's waiting task was processed + mock_delay.assert_called_once() + call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} + assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1 + assert call_kwargs.get("tenant_id") == tenant1.id + + # Verify tenant1's queue is empty + remaining_tasks1 = queue1.pull_tasks(count=10) + assert len(remaining_tasks1) == 0 + + # Verify tenant2's queue still has its task (isolation) + remaining_tasks2 = queue2.pull_tasks(count=10) + assert len(remaining_tasks2) == 1 + + # Verify queue keys are different + assert queue1._queue != queue2._queue + assert queue1._task_key != queue2._task_key + + def test_rag_pipeline_run_task_tenant_isolation( + self, db_session_with_containers, mock_pipeline_generator, mock_file_service + ): + """ + Test tenant isolation in regular RAG pipeline run task using real Redis. + + This test verifies: + - Different tenants have isolated queues + - Tasks from one tenant don't affect another tenant's queue + - Queue operations are properly scoped to tenant + """ + # Arrange: Create test data for two different tenants + account1, tenant1, pipeline1, workflow1 = self._create_test_pipeline_and_workflow(db_session_with_containers) + account2, tenant2, pipeline2, workflow2 = self._create_test_pipeline_and_workflow(db_session_with_containers) + + entities1 = self._create_rag_pipeline_invoke_entities(account1, tenant1, pipeline1, workflow1, count=1) + entities2 = self._create_rag_pipeline_invoke_entities(account2, tenant2, pipeline2, workflow2, count=1) + + file_content1 = self._create_file_content_for_entities(entities1) + file_content2 = self._create_file_content_for_entities(entities2) + + # Mock file service + file_id1 = str(uuid.uuid4()) + file_id2 = str(uuid.uuid4()) + mock_file_service["get_content"].side_effect = [file_content1, file_content2] + + # Use real Redis for TenantIsolatedTaskQueue + queue1 = TenantIsolatedTaskQueue(tenant1.id, "pipeline") + queue2 = TenantIsolatedTaskQueue(tenant2.id, "pipeline") + + # Add waiting tasks to both queues + waiting_file_id1 = str(uuid.uuid4()) + waiting_file_id2 = str(uuid.uuid4()) + + queue1.push_tasks([waiting_file_id1]) + queue2.push_tasks([waiting_file_id2]) + + # Mock the task function calls + with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay: + # Act: Execute the regular task for tenant1 only + rag_pipeline_run_task(file_id1, tenant1.id) + + # Assert: Verify core processing occurred for tenant1 + assert mock_file_service["get_content"].call_count == 1 + assert mock_file_service["delete_file"].call_count == 1 + assert mock_pipeline_generator.call_count == 1 + + # Verify only tenant1's waiting task was processed + mock_delay.assert_called_once() + call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} + assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1 + assert call_kwargs.get("tenant_id") == tenant1.id + + # Verify tenant1's queue is empty + remaining_tasks1 = queue1.pull_tasks(count=10) + assert len(remaining_tasks1) == 0 + + # Verify tenant2's queue still has its task (isolation) + remaining_tasks2 = queue2.pull_tasks(count=10) + assert len(remaining_tasks2) == 1 + + # Verify queue keys are different + assert queue1._queue != queue2._queue + assert queue1._task_key != queue2._task_key + + def test_run_single_rag_pipeline_task_success( + self, db_session_with_containers, mock_pipeline_generator, flask_app_with_containers + ): + """ + Test successful run_single_rag_pipeline_task execution. + + This test verifies: + - Single RAG pipeline task execution within Flask app context + - Entity validation and database queries + - PipelineGenerator._generate method call with correct parameters + - Proper Flask context handling + """ + # Arrange: Create test data + account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers) + entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1) + entity_data = entities[0].model_dump() + + # Act: Execute the single task + with flask_app_with_containers.app_context(): + run_single_rag_pipeline_task(entity_data, flask_app_with_containers) + + # Assert: Verify expected outcomes + # Verify PipelineGenerator._generate was called + assert mock_pipeline_generator.call_count == 1 + + # Verify call parameters + call = mock_pipeline_generator.call_args + call_kwargs = call[1] # Get keyword arguments + assert call_kwargs["pipeline"].id == pipeline.id + assert call_kwargs["workflow_id"] == workflow.id + assert call_kwargs["user"].id == account.id + assert call_kwargs["invoke_from"] == InvokeFrom.PUBLISHED + assert call_kwargs["streaming"] == False + assert isinstance(call_kwargs["application_generate_entity"], RagPipelineGenerateEntity) + + def test_run_single_rag_pipeline_task_entity_validation_error( + self, db_session_with_containers, mock_pipeline_generator, flask_app_with_containers + ): + """ + Test run_single_rag_pipeline_task with invalid entity data. + + This test verifies: + - Proper error handling for invalid entity data + - Exception logging + - Function raises ValueError for missing entities + """ + # Arrange: Create entity data with valid UUIDs but non-existent entities + fake = Faker() + invalid_entity_data = { + "pipeline_id": str(uuid.uuid4()), + "application_generate_entity": { + "app_config": { + "app_id": str(uuid.uuid4()), + "app_name": "Test App", + "mode": "workflow", + "workflow_id": str(uuid.uuid4()), + }, + "inputs": {"query": "Test query"}, + "query": "Test query", + "response_mode": "blocking", + "user": str(uuid.uuid4()), + "files": [], + "conversation_id": str(uuid.uuid4()), + }, + "user_id": str(uuid.uuid4()), + "tenant_id": str(uuid.uuid4()), + "workflow_id": str(uuid.uuid4()), + "streaming": False, + "workflow_execution_id": str(uuid.uuid4()), + "workflow_thread_pool_id": str(uuid.uuid4()), + } + + # Act & Assert: Execute the single task with non-existent entities (should raise ValueError) + with flask_app_with_containers.app_context(): + with pytest.raises(ValueError, match="Account .* not found"): + run_single_rag_pipeline_task(invalid_entity_data, flask_app_with_containers) + + # Assert: Pipeline generator should not be called + mock_pipeline_generator.assert_not_called() + + def test_run_single_rag_pipeline_task_database_entity_not_found( + self, db_session_with_containers, mock_pipeline_generator, flask_app_with_containers + ): + """ + Test run_single_rag_pipeline_task with non-existent database entities. + + This test verifies: + - Proper error handling for missing database entities + - Exception logging + - Function raises ValueError for missing entities + """ + # Arrange: Create test data with non-existent IDs + fake = Faker() + entity_data = { + "pipeline_id": str(uuid.uuid4()), + "application_generate_entity": { + "app_config": { + "app_id": str(uuid.uuid4()), + "app_name": "Test App", + "mode": "workflow", + "workflow_id": str(uuid.uuid4()), + }, + "inputs": {"query": "Test query"}, + "query": "Test query", + "response_mode": "blocking", + "user": str(uuid.uuid4()), + "files": [], + "conversation_id": str(uuid.uuid4()), + }, + "user_id": str(uuid.uuid4()), + "tenant_id": str(uuid.uuid4()), + "workflow_id": str(uuid.uuid4()), + "streaming": False, + "workflow_execution_id": str(uuid.uuid4()), + "workflow_thread_pool_id": str(uuid.uuid4()), + } + + # Act & Assert: Execute the single task with non-existent entities (should raise ValueError) + with flask_app_with_containers.app_context(): + with pytest.raises(ValueError, match="Account .* not found"): + run_single_rag_pipeline_task(entity_data, flask_app_with_containers) + + # Assert: Pipeline generator should not be called + mock_pipeline_generator.assert_not_called() + + def test_priority_rag_pipeline_run_task_file_not_found( + self, db_session_with_containers, mock_pipeline_generator, mock_file_service + ): + """ + Test priority RAG pipeline run task with non-existent file. + + This test verifies: + - Proper error handling for missing files + - Exception logging + - Function raises Exception for file errors + - Queue management continues despite file errors + """ + # Arrange: Create test data + account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers) + + # Mock file service to raise exception + file_id = str(uuid.uuid4()) + mock_file_service["get_content"].side_effect = Exception("File not found") + + # Use real Redis for TenantIsolatedTaskQueue + queue = TenantIsolatedTaskQueue(tenant.id, "pipeline") + + # Add waiting task to the real Redis queue + waiting_file_id = str(uuid.uuid4()) + queue.push_tasks([waiting_file_id]) + + # Mock the task function calls + with patch( + "tasks.rag_pipeline.priority_rag_pipeline_run_task.priority_rag_pipeline_run_task.delay" + ) as mock_delay: + # Act & Assert: Execute the priority task (should raise Exception) + with pytest.raises(Exception, match="File not found"): + priority_rag_pipeline_run_task(file_id, tenant.id) + + # Assert: Verify error was handled gracefully + mock_file_service["get_content"].assert_called_once_with(file_id) + mock_pipeline_generator.assert_not_called() + + # Verify waiting task was still processed despite file error + mock_delay.assert_called_once() + + # Verify correct parameters for the call + call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} + assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id + assert call_kwargs.get("tenant_id") == tenant.id + + # Verify queue is empty after processing (task was pulled) + remaining_tasks = queue.pull_tasks(count=10) + assert len(remaining_tasks) == 0 + + def test_rag_pipeline_run_task_file_not_found( + self, db_session_with_containers, mock_pipeline_generator, mock_file_service + ): + """ + Test regular RAG pipeline run task with non-existent file. + + This test verifies: + - Proper error handling for missing files + - Exception logging + - Function raises Exception for file errors + - Queue management continues despite file errors + """ + # Arrange: Create test data + account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers) + + # Mock file service to raise exception + file_id = str(uuid.uuid4()) + mock_file_service["get_content"].side_effect = Exception("File not found") + + # Use real Redis for TenantIsolatedTaskQueue + queue = TenantIsolatedTaskQueue(tenant.id, "pipeline") + + # Add waiting task to the real Redis queue + waiting_file_id = str(uuid.uuid4()) + queue.push_tasks([waiting_file_id]) + + # Mock the task function calls + with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay: + # Act & Assert: Execute the regular task (should raise Exception) + with pytest.raises(Exception, match="File not found"): + rag_pipeline_run_task(file_id, tenant.id) + + # Assert: Verify error was handled gracefully + mock_file_service["get_content"].assert_called_once_with(file_id) + mock_pipeline_generator.assert_not_called() + + # Verify waiting task was still processed despite file error + mock_delay.assert_called_once() + + # Verify correct parameters for the call + call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} + assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id + assert call_kwargs.get("tenant_id") == tenant.id + + # Verify queue is empty after processing (task was pulled) + remaining_tasks = queue.pull_tasks(count=10) + assert len(remaining_tasks) == 0 diff --git a/api/tests/test_containers_integration_tests/test_workflow_pause_integration.py b/api/tests/test_containers_integration_tests/test_workflow_pause_integration.py new file mode 100644 index 0000000000..79da5d4d0e --- /dev/null +++ b/api/tests/test_containers_integration_tests/test_workflow_pause_integration.py @@ -0,0 +1,948 @@ +"""Comprehensive integration tests for workflow pause functionality. + +This test suite covers complete workflow pause functionality including: +- Real database interactions using containerized PostgreSQL +- Real storage operations using the test storage backend +- Complete workflow: create -> pause -> resume -> delete +- Testing with actual FileService (not mocked) +- Database transactions and rollback behavior +- Actual file upload and retrieval through storage +- Workflow status transitions in the database +- Error handling with real database constraints +- Concurrent access scenarios +- Multi-tenant isolation +- Prune functionality +- File storage integration + +These tests use TestContainers to spin up real services for integration testing, +providing more reliable and realistic test scenarios than mocks. +""" + +import json +import uuid +from dataclasses import dataclass +from datetime import timedelta + +import pytest +from sqlalchemy import delete, select +from sqlalchemy.orm import Session, selectinload, sessionmaker + +from core.workflow.entities import WorkflowExecution +from core.workflow.enums import WorkflowExecutionStatus +from extensions.ext_storage import storage +from libs.datetime_utils import naive_utc_now +from models import Account +from models import WorkflowPause as WorkflowPauseModel +from models.account import Tenant, TenantAccountJoin, TenantAccountRole +from models.model import UploadFile +from models.workflow import Workflow, WorkflowRun +from repositories.sqlalchemy_api_workflow_run_repository import ( + DifyAPISQLAlchemyWorkflowRunRepository, + _WorkflowRunError, +) + + +@dataclass +class PauseWorkflowSuccessCase: + """Test case for successful pause workflow operations.""" + + name: str + initial_status: WorkflowExecutionStatus + description: str = "" + + +@dataclass +class PauseWorkflowFailureCase: + """Test case for pause workflow failure scenarios.""" + + name: str + initial_status: WorkflowExecutionStatus + description: str = "" + + +@dataclass +class ResumeWorkflowSuccessCase: + """Test case for successful resume workflow operations.""" + + name: str + initial_status: WorkflowExecutionStatus + description: str = "" + + +@dataclass +class ResumeWorkflowFailureCase: + """Test case for resume workflow failure scenarios.""" + + name: str + initial_status: WorkflowExecutionStatus + pause_resumed: bool + set_running_status: bool = False + description: str = "" + + +@dataclass +class PrunePausesTestCase: + """Test case for prune pauses operations.""" + + name: str + pause_age: timedelta + resume_age: timedelta | None + expected_pruned_count: int + description: str = "" + + +def pause_workflow_failure_cases() -> list[PauseWorkflowFailureCase]: + """Create test cases for pause workflow failure scenarios.""" + return [ + PauseWorkflowFailureCase( + name="pause_already_paused_workflow", + initial_status=WorkflowExecutionStatus.PAUSED, + description="Should fail to pause an already paused workflow", + ), + PauseWorkflowFailureCase( + name="pause_completed_workflow", + initial_status=WorkflowExecutionStatus.SUCCEEDED, + description="Should fail to pause a completed workflow", + ), + PauseWorkflowFailureCase( + name="pause_failed_workflow", + initial_status=WorkflowExecutionStatus.FAILED, + description="Should fail to pause a failed workflow", + ), + ] + + +def resume_workflow_success_cases() -> list[ResumeWorkflowSuccessCase]: + """Create test cases for successful resume workflow operations.""" + return [ + ResumeWorkflowSuccessCase( + name="resume_paused_workflow", + initial_status=WorkflowExecutionStatus.PAUSED, + description="Should successfully resume a paused workflow", + ), + ] + + +def resume_workflow_failure_cases() -> list[ResumeWorkflowFailureCase]: + """Create test cases for resume workflow failure scenarios.""" + return [ + ResumeWorkflowFailureCase( + name="resume_already_resumed_workflow", + initial_status=WorkflowExecutionStatus.PAUSED, + pause_resumed=True, + description="Should fail to resume an already resumed workflow", + ), + ResumeWorkflowFailureCase( + name="resume_running_workflow", + initial_status=WorkflowExecutionStatus.RUNNING, + pause_resumed=False, + set_running_status=True, + description="Should fail to resume a running workflow", + ), + ] + + +def prune_pauses_test_cases() -> list[PrunePausesTestCase]: + """Create test cases for prune pauses operations.""" + return [ + PrunePausesTestCase( + name="prune_old_active_pauses", + pause_age=timedelta(days=7), + resume_age=None, + expected_pruned_count=1, + description="Should prune old active pauses", + ), + PrunePausesTestCase( + name="prune_old_resumed_pauses", + pause_age=timedelta(hours=12), # Created 12 hours ago (recent) + resume_age=timedelta(days=7), + expected_pruned_count=1, + description="Should prune old resumed pauses", + ), + PrunePausesTestCase( + name="keep_recent_active_pauses", + pause_age=timedelta(hours=1), + resume_age=None, + expected_pruned_count=0, + description="Should keep recent active pauses", + ), + PrunePausesTestCase( + name="keep_recent_resumed_pauses", + pause_age=timedelta(days=1), + resume_age=timedelta(hours=1), + expected_pruned_count=0, + description="Should keep recent resumed pauses", + ), + ] + + +class TestWorkflowPauseIntegration: + """Comprehensive integration tests for workflow pause functionality.""" + + @pytest.fixture(autouse=True) + def setup_test_data(self, db_session_with_containers): + """Set up test data for each test method using TestContainers.""" + # Create test tenant and account + + tenant = Tenant( + name="Test Tenant", + status="normal", + ) + db_session_with_containers.add(tenant) + db_session_with_containers.commit() + + account = Account( + email="test@example.com", + name="Test User", + interface_language="en-US", + status="active", + ) + db_session_with_containers.add(account) + db_session_with_containers.commit() + + # Create tenant-account join + tenant_join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER, + current=True, + ) + db_session_with_containers.add(tenant_join) + db_session_with_containers.commit() + + # Set test data + self.test_tenant_id = tenant.id + self.test_user_id = account.id + self.test_app_id = str(uuid.uuid4()) + self.test_workflow_id = str(uuid.uuid4()) + + # Create test workflow + self.test_workflow = Workflow( + id=self.test_workflow_id, + tenant_id=self.test_tenant_id, + app_id=self.test_app_id, + type="workflow", + version="draft", + graph='{"nodes": [], "edges": []}', + features='{"file_upload": {"enabled": false}}', + created_by=self.test_user_id, + created_at=naive_utc_now(), + ) + + # Store session instance + self.session = db_session_with_containers + + # Save test data to database + self.session.add(self.test_workflow) + self.session.commit() + + yield + + # Cleanup + self._cleanup_test_data() + + def _cleanup_test_data(self): + """Clean up test data after each test method.""" + # Clean up workflow pauses + self.session.execute(delete(WorkflowPauseModel)) + # Clean up upload files + self.session.execute( + delete(UploadFile).where( + UploadFile.tenant_id == self.test_tenant_id, + ) + ) + # Clean up workflow runs + self.session.execute( + delete(WorkflowRun).where( + WorkflowRun.tenant_id == self.test_tenant_id, + WorkflowRun.app_id == self.test_app_id, + ) + ) + # Clean up workflows + self.session.execute( + delete(Workflow).where( + Workflow.tenant_id == self.test_tenant_id, + Workflow.app_id == self.test_app_id, + ) + ) + self.session.commit() + + def _create_test_workflow_run( + self, status: WorkflowExecutionStatus = WorkflowExecutionStatus.RUNNING + ) -> WorkflowRun: + """Create a test workflow run with specified status.""" + workflow_run = WorkflowRun( + id=str(uuid.uuid4()), + tenant_id=self.test_tenant_id, + app_id=self.test_app_id, + workflow_id=self.test_workflow_id, + type="workflow", + triggered_from="debugging", + version="draft", + status=status, + created_by=self.test_user_id, + created_by_role="account", + created_at=naive_utc_now(), + ) + self.session.add(workflow_run) + self.session.commit() + return workflow_run + + def _create_test_state(self) -> str: + """Create a test state string.""" + return json.dumps( + { + "node_id": "test-node", + "node_type": "llm", + "status": "paused", + "data": {"key": "value"}, + "timestamp": naive_utc_now().isoformat(), + } + ) + + def _get_workflow_run_repository(self): + """Get workflow run repository instance for testing.""" + # Create session factory from the test session + engine = self.session.get_bind() + session_factory = sessionmaker(bind=engine, expire_on_commit=False) + + # Create a test-specific repository that implements the missing save method + class TestWorkflowRunRepository(DifyAPISQLAlchemyWorkflowRunRepository): + """Test-specific repository that implements the missing save method.""" + + def save(self, execution: WorkflowExecution): + """Implement the missing save method for testing.""" + # For testing purposes, we don't need to implement this method + # as it's not used in the pause functionality tests + pass + + # Create and return repository instance + repository = TestWorkflowRunRepository(session_maker=session_factory) + return repository + + # ==================== Complete Pause Workflow Tests ==================== + + def test_complete_pause_resume_workflow(self): + """Test complete workflow: create -> pause -> resume -> delete.""" + # Arrange + workflow_run = self._create_test_workflow_run() + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + # Act - Create pause state + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + + # Assert - Pause state created + assert pause_entity is not None + assert pause_entity.id is not None + assert pause_entity.workflow_execution_id == workflow_run.id + # Convert both to strings for comparison + retrieved_state = pause_entity.get_state() + if isinstance(retrieved_state, bytes): + retrieved_state = retrieved_state.decode() + assert retrieved_state == test_state + + # Verify database state + query = select(WorkflowPauseModel).where(WorkflowPauseModel.workflow_run_id == workflow_run.id) + pause_model = self.session.scalars(query).first() + assert pause_model is not None + assert pause_model.resumed_at is None + assert pause_model.id == pause_entity.id + + self.session.refresh(workflow_run) + assert workflow_run.status == WorkflowExecutionStatus.PAUSED + + # Act - Get pause state + retrieved_entity = repository.get_workflow_pause(workflow_run.id) + + # Assert - Pause state retrieved + assert retrieved_entity is not None + assert retrieved_entity.id == pause_entity.id + retrieved_state = retrieved_entity.get_state() + if isinstance(retrieved_state, bytes): + retrieved_state = retrieved_state.decode() + assert retrieved_state == test_state + + # Act - Resume workflow + resumed_entity = repository.resume_workflow_pause( + workflow_run_id=workflow_run.id, + pause_entity=pause_entity, + ) + + # Assert - Workflow resumed + assert resumed_entity is not None + assert resumed_entity.id == pause_entity.id + assert resumed_entity.resumed_at is not None + + # Verify database state + self.session.refresh(workflow_run) + assert workflow_run.status == WorkflowExecutionStatus.RUNNING + self.session.refresh(pause_model) + assert pause_model.resumed_at is not None + + # Act - Delete pause state + repository.delete_workflow_pause(pause_entity) + + # Assert - Pause state deleted + with Session(bind=self.session.get_bind()) as session: + deleted_pause = session.get(WorkflowPauseModel, pause_entity.id) + assert deleted_pause is None + + def test_pause_workflow_success(self): + """Test successful pause workflow scenarios.""" + workflow_run = self._create_test_workflow_run(status=WorkflowExecutionStatus.RUNNING) + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + + assert pause_entity is not None + assert pause_entity.workflow_execution_id == workflow_run.id + + retrieved_state = pause_entity.get_state() + if isinstance(retrieved_state, bytes): + retrieved_state = retrieved_state.decode() + assert retrieved_state == test_state + + self.session.refresh(workflow_run) + assert workflow_run.status == WorkflowExecutionStatus.PAUSED + pause_query = select(WorkflowPauseModel).where(WorkflowPauseModel.workflow_run_id == workflow_run.id) + pause_model = self.session.scalars(pause_query).first() + assert pause_model is not None + assert pause_model.id == pause_entity.id + assert pause_model.resumed_at is None + + @pytest.mark.parametrize("test_case", pause_workflow_failure_cases(), ids=lambda tc: tc.name) + def test_pause_workflow_failure(self, test_case: PauseWorkflowFailureCase): + """Test pause workflow failure scenarios.""" + workflow_run = self._create_test_workflow_run(status=test_case.initial_status) + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + with pytest.raises(_WorkflowRunError): + repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + + @pytest.mark.parametrize("test_case", resume_workflow_success_cases(), ids=lambda tc: tc.name) + def test_resume_workflow_success(self, test_case: ResumeWorkflowSuccessCase): + """Test successful resume workflow scenarios.""" + workflow_run = self._create_test_workflow_run(status=test_case.initial_status) + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + if workflow_run.status != WorkflowExecutionStatus.RUNNING: + workflow_run.status = WorkflowExecutionStatus.RUNNING + self.session.commit() + + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + + self.session.refresh(workflow_run) + assert workflow_run.status == WorkflowExecutionStatus.PAUSED + + resumed_entity = repository.resume_workflow_pause( + workflow_run_id=workflow_run.id, + pause_entity=pause_entity, + ) + assert resumed_entity is not None + assert resumed_entity.id == pause_entity.id + assert resumed_entity.resumed_at is not None + + self.session.refresh(workflow_run) + assert workflow_run.status == WorkflowExecutionStatus.RUNNING + pause_query = select(WorkflowPauseModel).where(WorkflowPauseModel.workflow_run_id == workflow_run.id) + pause_model = self.session.scalars(pause_query).first() + assert pause_model is not None + assert pause_model.id == pause_entity.id + assert pause_model.resumed_at is not None + + def test_resume_running_workflow(self): + """Test resume workflow failure scenarios.""" + workflow_run = self._create_test_workflow_run(status=WorkflowExecutionStatus.RUNNING) + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + + self.session.refresh(workflow_run) + workflow_run.status = WorkflowExecutionStatus.RUNNING + self.session.add(workflow_run) + self.session.commit() + + with pytest.raises(_WorkflowRunError): + repository.resume_workflow_pause( + workflow_run_id=workflow_run.id, + pause_entity=pause_entity, + ) + + def test_resume_resumed_pause(self): + """Test resume workflow failure scenarios.""" + workflow_run = self._create_test_workflow_run(status=WorkflowExecutionStatus.RUNNING) + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + pause_model = self.session.get(WorkflowPauseModel, pause_entity.id) + pause_model.resumed_at = naive_utc_now() + self.session.add(pause_model) + self.session.commit() + + with pytest.raises(_WorkflowRunError): + repository.resume_workflow_pause( + workflow_run_id=workflow_run.id, + pause_entity=pause_entity, + ) + + # ==================== Error Scenario Tests ==================== + + def test_pause_nonexistent_workflow_run(self): + """Test pausing a non-existent workflow run.""" + # Arrange + nonexistent_id = str(uuid.uuid4()) + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + # Act & Assert + with pytest.raises(ValueError, match="WorkflowRun not found"): + repository.create_workflow_pause( + workflow_run_id=nonexistent_id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + + def test_resume_nonexistent_workflow_run(self): + """Test resuming a non-existent workflow run.""" + # Arrange + workflow_run = self._create_test_workflow_run() + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + + nonexistent_id = str(uuid.uuid4()) + + # Act & Assert + with pytest.raises(ValueError, match="WorkflowRun not found"): + repository.resume_workflow_pause( + workflow_run_id=nonexistent_id, + pause_entity=pause_entity, + ) + + # ==================== Prune Functionality Tests ==================== + + @pytest.mark.parametrize("test_case", prune_pauses_test_cases(), ids=lambda tc: tc.name) + def test_prune_pauses_scenarios(self, test_case: PrunePausesTestCase): + """Test various prune pauses scenarios.""" + now = naive_utc_now() + + # Create pause state + workflow_run = self._create_test_workflow_run() + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + + # Manually adjust timestamps for testing + pause_model = self.session.get(WorkflowPauseModel, pause_entity.id) + pause_model.created_at = now - test_case.pause_age + + if test_case.resume_age is not None: + # Resume pause and adjust resume time + repository.resume_workflow_pause( + workflow_run_id=workflow_run.id, + pause_entity=pause_entity, + ) + # Need to refresh to get the updated model + self.session.refresh(pause_model) + # Manually set the resumed_at to an older time for testing + pause_model.resumed_at = now - test_case.resume_age + self.session.commit() # Commit the resumed_at change + # Refresh again to ensure the change is persisted + self.session.refresh(pause_model) + + self.session.commit() + + # Act - Prune pauses + expiration_time = now - timedelta(days=1, seconds=1) # Expire pauses older than 1 day (plus 1 second) + resumption_time = now - timedelta( + days=7, seconds=1 + ) # Clean up pauses resumed more than 7 days ago (plus 1 second) + + # Debug: Check pause state before pruning + self.session.refresh(pause_model) + print(f"Pause created_at: {pause_model.created_at}") + print(f"Pause resumed_at: {pause_model.resumed_at}") + print(f"Expiration time: {expiration_time}") + print(f"Resumption time: {resumption_time}") + + # Force commit to ensure timestamps are saved + self.session.commit() + + # Determine if the pause should be pruned based on timestamps + should_be_pruned = False + if test_case.resume_age is not None: + # If resumed, check if resumed_at is older than resumption_time + should_be_pruned = pause_model.resumed_at < resumption_time + else: + # If not resumed, check if created_at is older than expiration_time + should_be_pruned = pause_model.created_at < expiration_time + + # Act - Prune pauses + pruned_ids = repository.prune_pauses( + expiration=expiration_time, + resumption_expiration=resumption_time, + ) + + # Assert - Check pruning results + if should_be_pruned: + assert len(pruned_ids) == test_case.expected_pruned_count + # Verify pause was actually deleted + # The pause should be in the pruned_ids list if it was pruned + assert pause_entity.id in pruned_ids + else: + assert len(pruned_ids) == 0 + + def test_prune_pauses_with_limit(self): + """Test prune pauses with limit parameter.""" + now = naive_utc_now() + + # Create multiple pause states + pause_entities = [] + repository = self._get_workflow_run_repository() + + for i in range(5): + workflow_run = self._create_test_workflow_run() + test_state = self._create_test_state() + + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + pause_entities.append(pause_entity) + + # Make all pauses old enough to be pruned + pause_model = self.session.get(WorkflowPauseModel, pause_entity.id) + pause_model.created_at = now - timedelta(days=7) + + self.session.commit() + + # Act - Prune with limit + expiration_time = now - timedelta(days=1) + resumption_time = now - timedelta(days=7) + + pruned_ids = repository.prune_pauses( + expiration=expiration_time, + resumption_expiration=resumption_time, + limit=3, + ) + + # Assert + assert len(pruned_ids) == 3 + + # Verify only 3 were deleted + remaining_count = ( + self.session.query(WorkflowPauseModel) + .filter(WorkflowPauseModel.id.in_([pe.id for pe in pause_entities])) + .count() + ) + assert remaining_count == 2 + + # ==================== Multi-tenant Isolation Tests ==================== + + def test_multi_tenant_pause_isolation(self): + """Test that pause states are properly isolated by tenant.""" + # Arrange - Create second tenant + + tenant2 = Tenant( + name="Test Tenant 2", + status="normal", + ) + self.session.add(tenant2) + self.session.commit() + + account2 = Account( + email="test2@example.com", + name="Test User 2", + interface_language="en-US", + status="active", + ) + self.session.add(account2) + self.session.commit() + + tenant2_join = TenantAccountJoin( + tenant_id=tenant2.id, + account_id=account2.id, + role=TenantAccountRole.OWNER, + current=True, + ) + self.session.add(tenant2_join) + self.session.commit() + + # Create workflow for tenant 2 + workflow2 = Workflow( + id=str(uuid.uuid4()), + tenant_id=tenant2.id, + app_id=str(uuid.uuid4()), + type="workflow", + version="draft", + graph='{"nodes": [], "edges": []}', + features='{"file_upload": {"enabled": false}}', + created_by=account2.id, + created_at=naive_utc_now(), + ) + self.session.add(workflow2) + self.session.commit() + + # Create workflow runs for both tenants + workflow_run1 = self._create_test_workflow_run() + workflow_run2 = WorkflowRun( + id=str(uuid.uuid4()), + tenant_id=tenant2.id, + app_id=workflow2.app_id, + workflow_id=workflow2.id, + type="workflow", + triggered_from="debugging", + version="draft", + status=WorkflowExecutionStatus.RUNNING, + created_by=account2.id, + created_by_role="account", + created_at=naive_utc_now(), + ) + self.session.add(workflow_run2) + self.session.commit() + + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + # Act - Create pause for tenant 1 + pause_entity1 = repository.create_workflow_pause( + workflow_run_id=workflow_run1.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + + # Try to access pause from tenant 2 using tenant 1's repository + # This should work because we're using the same repository + pause_entity2 = repository.get_workflow_pause(workflow_run2.id) + assert pause_entity2 is None # No pause for tenant 2 yet + + # Create pause for tenant 2 + pause_entity2 = repository.create_workflow_pause( + workflow_run_id=workflow_run2.id, + state_owner_user_id=account2.id, + state=test_state, + ) + + # Assert - Both pauses should exist and be separate + assert pause_entity1 is not None + assert pause_entity2 is not None + assert pause_entity1.id != pause_entity2.id + assert pause_entity1.workflow_execution_id != pause_entity2.workflow_execution_id + + def test_cross_tenant_access_restriction(self): + """Test that cross-tenant access is properly restricted.""" + # This test would require tenant-specific repositories + # For now, we test that pause entities are properly scoped by tenant_id + workflow_run = self._create_test_workflow_run() + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + + # Verify pause is properly scoped + pause_model = self.session.get(WorkflowPauseModel, pause_entity.id) + assert pause_model.workflow_id == self.test_workflow_id + + # ==================== File Storage Integration Tests ==================== + + def test_file_storage_integration(self): + """Test that state files are properly stored and retrieved.""" + # Arrange + workflow_run = self._create_test_workflow_run() + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + # Act - Create pause state + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + + # Assert - Verify file was uploaded to storage + pause_model = self.session.get(WorkflowPauseModel, pause_entity.id) + assert pause_model.state_object_key != "" + + # Verify file content in storage + + file_key = pause_model.state_object_key + storage_content = storage.load(file_key).decode() + assert storage_content == test_state + + # Verify retrieval through entity + retrieved_state = pause_entity.get_state() + if isinstance(retrieved_state, bytes): + retrieved_state = retrieved_state.decode() + assert retrieved_state == test_state + + def test_file_cleanup_on_pause_deletion(self): + """Test that files are properly handled on pause deletion.""" + # Arrange + workflow_run = self._create_test_workflow_run() + test_state = self._create_test_state() + repository = self._get_workflow_run_repository() + + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=test_state, + ) + + # Get file info before deletion + pause_model = self.session.get(WorkflowPauseModel, pause_entity.id) + file_key = pause_model.state_object_key + + # Act - Delete pause state + repository.delete_workflow_pause(pause_entity) + + # Assert - Pause record should be deleted + self.session.expire_all() # Clear session to ensure fresh query + deleted_pause = self.session.get(WorkflowPauseModel, pause_entity.id) + assert deleted_pause is None + + try: + content = storage.load(file_key).decode() + pytest.fail("File should be deleted from storage after pause deletion") + except FileNotFoundError: + # This is expected - file should be deleted from storage + pass + except Exception as e: + pytest.fail(f"Unexpected error when checking file deletion: {e}") + + def test_large_state_file_handling(self): + """Test handling of large state files.""" + # Arrange - Create a large state (1MB) + large_state = "x" * (1024 * 1024) # 1MB of data + large_state_json = json.dumps({"large_data": large_state}) + + workflow_run = self._create_test_workflow_run() + repository = self._get_workflow_run_repository() + + # Act + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=large_state_json, + ) + + # Assert + assert pause_entity is not None + retrieved_state = pause_entity.get_state() + if isinstance(retrieved_state, bytes): + retrieved_state = retrieved_state.decode() + assert retrieved_state == large_state_json + + # Verify file size in database + pause_model = self.session.get(WorkflowPauseModel, pause_entity.id) + assert pause_model.state_object_key != "" + loaded_state = storage.load(pause_model.state_object_key) + assert loaded_state.decode() == large_state_json + + def test_multiple_pause_resume_cycles(self): + """Test multiple pause/resume cycles on the same workflow run.""" + # Arrange + workflow_run = self._create_test_workflow_run() + repository = self._get_workflow_run_repository() + + # Act & Assert - Multiple cycles + for i in range(3): + state = json.dumps({"cycle": i, "data": f"state_{i}"}) + + # Reset workflow run status to RUNNING before each pause (after first cycle) + if i > 0: + self.session.refresh(workflow_run) # Refresh to get latest state from session + workflow_run.status = WorkflowExecutionStatus.RUNNING + self.session.commit() + self.session.refresh(workflow_run) # Refresh again after commit + + # Pause + pause_entity = repository.create_workflow_pause( + workflow_run_id=workflow_run.id, + state_owner_user_id=self.test_user_id, + state=state, + ) + assert pause_entity is not None + + # Verify pause + self.session.expire_all() # Clear session to ensure fresh query + self.session.refresh(workflow_run) + + # Use the test session directly to verify the pause + stmt = select(WorkflowRun).options(selectinload(WorkflowRun.pause)).where(WorkflowRun.id == workflow_run.id) + workflow_run_with_pause = self.session.scalar(stmt) + pause_model = workflow_run_with_pause.pause + + # Verify pause using test session directly + assert pause_model is not None + assert pause_model.id == pause_entity.id + assert pause_model.state_object_key != "" + + # Load file content using storage directly + file_content = storage.load(pause_model.state_object_key) + if isinstance(file_content, bytes): + file_content = file_content.decode() + assert file_content == state + + # Resume + resumed_entity = repository.resume_workflow_pause( + workflow_run_id=workflow_run.id, + pause_entity=pause_entity, + ) + assert resumed_entity is not None + assert resumed_entity.resumed_at is not None + + # Verify resume - check that pause is marked as resumed + self.session.expire_all() # Clear session to ensure fresh query + stmt = select(WorkflowPauseModel).where(WorkflowPauseModel.id == pause_entity.id) + resumed_pause_model = self.session.scalar(stmt) + assert resumed_pause_model is not None + assert resumed_pause_model.resumed_at is not None + + # Verify workflow run status + self.session.refresh(workflow_run) + assert workflow_run.status == WorkflowExecutionStatus.RUNNING diff --git a/api/tests/unit_tests/controllers/console/auth/test_account_activation.py b/api/tests/unit_tests/controllers/console/auth/test_account_activation.py new file mode 100644 index 0000000000..4192fb2ca7 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/auth/test_account_activation.py @@ -0,0 +1,456 @@ +""" +Test suite for account activation flows. + +This module tests the account activation mechanism including: +- Invitation token validation +- Account activation with user preferences +- Workspace member onboarding +- Initial login after activation +""" + +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask + +from controllers.console.auth.activate import ActivateApi, ActivateCheckApi +from controllers.console.error import AlreadyActivateError +from models.account import AccountStatus + + +class TestActivateCheckApi: + """Test cases for checking activation token validity.""" + + @pytest.fixture + def app(self): + """Create Flask test application.""" + app = Flask(__name__) + app.config["TESTING"] = True + return app + + @pytest.fixture + def mock_invitation(self): + """Create mock invitation object.""" + tenant = MagicMock() + tenant.id = "workspace-123" + tenant.name = "Test Workspace" + + return { + "data": {"email": "invitee@example.com"}, + "tenant": tenant, + } + + @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") + def test_check_valid_invitation_token(self, mock_get_invitation, app, mock_invitation): + """ + Test checking valid invitation token. + + Verifies that: + - Valid token returns invitation data + - Workspace information is included + - Invitee email is returned + """ + # Arrange + mock_get_invitation.return_value = mock_invitation + + # Act + with app.test_request_context( + "/activate/check?workspace_id=workspace-123&email=invitee@example.com&token=valid_token" + ): + api = ActivateCheckApi() + response = api.get() + + # Assert + assert response["is_valid"] is True + assert response["data"]["workspace_name"] == "Test Workspace" + assert response["data"]["workspace_id"] == "workspace-123" + assert response["data"]["email"] == "invitee@example.com" + + @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") + def test_check_invalid_invitation_token(self, mock_get_invitation, app): + """ + Test checking invalid invitation token. + + Verifies that: + - Invalid token returns is_valid as False + - No data is returned for invalid tokens + """ + # Arrange + mock_get_invitation.return_value = None + + # Act + with app.test_request_context( + "/activate/check?workspace_id=workspace-123&email=test@example.com&token=invalid_token" + ): + api = ActivateCheckApi() + response = api.get() + + # Assert + assert response["is_valid"] is False + + @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") + def test_check_token_without_workspace_id(self, mock_get_invitation, app, mock_invitation): + """ + Test checking token without workspace ID. + + Verifies that: + - Token can be checked without workspace_id parameter + - System handles None workspace_id gracefully + """ + # Arrange + mock_get_invitation.return_value = mock_invitation + + # Act + with app.test_request_context("/activate/check?email=invitee@example.com&token=valid_token"): + api = ActivateCheckApi() + response = api.get() + + # Assert + assert response["is_valid"] is True + mock_get_invitation.assert_called_once_with(None, "invitee@example.com", "valid_token") + + @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") + def test_check_token_without_email(self, mock_get_invitation, app, mock_invitation): + """ + Test checking token without email parameter. + + Verifies that: + - Token can be checked without email parameter + - System handles None email gracefully + """ + # Arrange + mock_get_invitation.return_value = mock_invitation + + # Act + with app.test_request_context("/activate/check?workspace_id=workspace-123&token=valid_token"): + api = ActivateCheckApi() + response = api.get() + + # Assert + assert response["is_valid"] is True + mock_get_invitation.assert_called_once_with("workspace-123", None, "valid_token") + + +class TestActivateApi: + """Test cases for account activation endpoint.""" + + @pytest.fixture + def app(self): + """Create Flask test application.""" + app = Flask(__name__) + app.config["TESTING"] = True + return app + + @pytest.fixture + def mock_account(self): + """Create mock account object.""" + account = MagicMock() + account.id = "account-123" + account.email = "invitee@example.com" + account.status = AccountStatus.PENDING + return account + + @pytest.fixture + def mock_invitation(self, mock_account): + """Create mock invitation with account.""" + tenant = MagicMock() + tenant.id = "workspace-123" + tenant.name = "Test Workspace" + + return { + "data": {"email": "invitee@example.com"}, + "tenant": tenant, + "account": mock_account, + } + + @pytest.fixture + def mock_token_pair(self): + """Create mock token pair object.""" + token_pair = MagicMock() + token_pair.access_token = "access_token" + token_pair.refresh_token = "refresh_token" + token_pair.csrf_token = "csrf_token" + token_pair.model_dump.return_value = { + "access_token": "access_token", + "refresh_token": "refresh_token", + "csrf_token": "csrf_token", + } + return token_pair + + @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") + @patch("controllers.console.auth.activate.RegisterService.revoke_token") + @patch("controllers.console.auth.activate.db") + @patch("controllers.console.auth.activate.AccountService.login") + def test_successful_account_activation( + self, + mock_login, + mock_db, + mock_revoke_token, + mock_get_invitation, + app, + mock_invitation, + mock_account, + mock_token_pair, + ): + """ + Test successful account activation. + + Verifies that: + - Account is activated with user preferences + - Account status is set to ACTIVE + - User is logged in after activation + - Invitation token is revoked + """ + # Arrange + mock_get_invitation.return_value = mock_invitation + mock_login.return_value = mock_token_pair + + # Act + with app.test_request_context( + "/activate", + method="POST", + json={ + "workspace_id": "workspace-123", + "email": "invitee@example.com", + "token": "valid_token", + "name": "John Doe", + "interface_language": "en-US", + "timezone": "UTC", + }, + ): + api = ActivateApi() + response = api.post() + + # Assert + assert response["result"] == "success" + assert mock_account.name == "John Doe" + assert mock_account.interface_language == "en-US" + assert mock_account.timezone == "UTC" + assert mock_account.status == AccountStatus.ACTIVE + assert mock_account.initialized_at is not None + mock_revoke_token.assert_called_once_with("workspace-123", "invitee@example.com", "valid_token") + mock_db.session.commit.assert_called_once() + mock_login.assert_called_once() + + @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") + def test_activation_with_invalid_token(self, mock_get_invitation, app): + """ + Test account activation with invalid token. + + Verifies that: + - AlreadyActivateError is raised for invalid tokens + - No account changes are made + """ + # Arrange + mock_get_invitation.return_value = None + + # Act & Assert + with app.test_request_context( + "/activate", + method="POST", + json={ + "workspace_id": "workspace-123", + "email": "invitee@example.com", + "token": "invalid_token", + "name": "John Doe", + "interface_language": "en-US", + "timezone": "UTC", + }, + ): + api = ActivateApi() + with pytest.raises(AlreadyActivateError): + api.post() + + @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") + @patch("controllers.console.auth.activate.RegisterService.revoke_token") + @patch("controllers.console.auth.activate.db") + @patch("controllers.console.auth.activate.AccountService.login") + def test_activation_sets_interface_theme( + self, + mock_login, + mock_db, + mock_revoke_token, + mock_get_invitation, + app, + mock_invitation, + mock_account, + mock_token_pair, + ): + """ + Test that activation sets default interface theme. + + Verifies that: + - Interface theme is set to 'light' by default + """ + # Arrange + mock_get_invitation.return_value = mock_invitation + mock_login.return_value = mock_token_pair + + # Act + with app.test_request_context( + "/activate", + method="POST", + json={ + "workspace_id": "workspace-123", + "email": "invitee@example.com", + "token": "valid_token", + "name": "John Doe", + "interface_language": "en-US", + "timezone": "UTC", + }, + ): + api = ActivateApi() + api.post() + + # Assert + assert mock_account.interface_theme == "light" + + @pytest.mark.parametrize( + ("language", "timezone"), + [ + ("en-US", "UTC"), + ("zh-Hans", "Asia/Shanghai"), + ("ja-JP", "Asia/Tokyo"), + ("es-ES", "Europe/Madrid"), + ], + ) + @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") + @patch("controllers.console.auth.activate.RegisterService.revoke_token") + @patch("controllers.console.auth.activate.db") + @patch("controllers.console.auth.activate.AccountService.login") + def test_activation_with_different_locales( + self, + mock_login, + mock_db, + mock_revoke_token, + mock_get_invitation, + app, + mock_invitation, + mock_account, + mock_token_pair, + language, + timezone, + ): + """ + Test account activation with various language and timezone combinations. + + Verifies that: + - Different languages are accepted + - Different timezones are accepted + - User preferences are properly stored + """ + # Arrange + mock_get_invitation.return_value = mock_invitation + mock_login.return_value = mock_token_pair + + # Act + with app.test_request_context( + "/activate", + method="POST", + json={ + "workspace_id": "workspace-123", + "email": "invitee@example.com", + "token": "valid_token", + "name": "Test User", + "interface_language": language, + "timezone": timezone, + }, + ): + api = ActivateApi() + response = api.post() + + # Assert + assert response["result"] == "success" + assert mock_account.interface_language == language + assert mock_account.timezone == timezone + + @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") + @patch("controllers.console.auth.activate.RegisterService.revoke_token") + @patch("controllers.console.auth.activate.db") + @patch("controllers.console.auth.activate.AccountService.login") + def test_activation_returns_token_data( + self, + mock_login, + mock_db, + mock_revoke_token, + mock_get_invitation, + app, + mock_invitation, + mock_token_pair, + ): + """ + Test that activation returns authentication tokens. + + Verifies that: + - Token pair is returned in response + - All token types are included (access, refresh, csrf) + """ + # Arrange + mock_get_invitation.return_value = mock_invitation + mock_login.return_value = mock_token_pair + + # Act + with app.test_request_context( + "/activate", + method="POST", + json={ + "workspace_id": "workspace-123", + "email": "invitee@example.com", + "token": "valid_token", + "name": "John Doe", + "interface_language": "en-US", + "timezone": "UTC", + }, + ): + api = ActivateApi() + response = api.post() + + # Assert + assert "data" in response + assert response["data"]["access_token"] == "access_token" + assert response["data"]["refresh_token"] == "refresh_token" + assert response["data"]["csrf_token"] == "csrf_token" + + @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") + @patch("controllers.console.auth.activate.RegisterService.revoke_token") + @patch("controllers.console.auth.activate.db") + @patch("controllers.console.auth.activate.AccountService.login") + def test_activation_without_workspace_id( + self, + mock_login, + mock_db, + mock_revoke_token, + mock_get_invitation, + app, + mock_invitation, + mock_token_pair, + ): + """ + Test account activation without workspace_id. + + Verifies that: + - Activation can proceed without workspace_id + - Token revocation handles None workspace_id + """ + # Arrange + mock_get_invitation.return_value = mock_invitation + mock_login.return_value = mock_token_pair + + # Act + with app.test_request_context( + "/activate", + method="POST", + json={ + "email": "invitee@example.com", + "token": "valid_token", + "name": "John Doe", + "interface_language": "en-US", + "timezone": "UTC", + }, + ): + api = ActivateApi() + response = api.post() + + # Assert + assert response["result"] == "success" + mock_revoke_token.assert_called_once_with(None, "invitee@example.com", "valid_token") diff --git a/api/tests/unit_tests/controllers/console/auth/test_email_verification.py b/api/tests/unit_tests/controllers/console/auth/test_email_verification.py new file mode 100644 index 0000000000..a44f518171 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/auth/test_email_verification.py @@ -0,0 +1,546 @@ +""" +Test suite for email verification authentication flows. + +This module tests the email code login mechanism including: +- Email code sending with rate limiting +- Code verification and validation +- Account creation via email verification +- Workspace creation for new users +""" + +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask + +from controllers.console.auth.error import EmailCodeError, InvalidEmailError, InvalidTokenError +from controllers.console.auth.login import EmailCodeLoginApi, EmailCodeLoginSendEmailApi +from controllers.console.error import ( + AccountInFreezeError, + AccountNotFound, + EmailSendIpLimitError, + NotAllowedCreateWorkspace, + WorkspacesLimitExceeded, +) +from services.errors.account import AccountRegisterError + + +class TestEmailCodeLoginSendEmailApi: + """Test cases for sending email verification codes.""" + + @pytest.fixture + def app(self): + """Create Flask test application.""" + app = Flask(__name__) + app.config["TESTING"] = True + return app + + @pytest.fixture + def mock_account(self): + """Create mock account object.""" + account = MagicMock() + account.email = "test@example.com" + account.name = "Test User" + return account + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.is_email_send_ip_limit") + @patch("controllers.console.auth.login.AccountService.get_user_through_email") + @patch("controllers.console.auth.login.AccountService.send_email_code_login_email") + def test_send_email_code_existing_user( + self, mock_send_email, mock_get_user, mock_is_ip_limit, mock_db, app, mock_account + ): + """ + Test sending email code to existing user. + + Verifies that: + - Email code is sent to existing account + - Token is generated and returned + - IP rate limiting is checked + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_ip_limit.return_value = False + mock_get_user.return_value = mock_account + mock_send_email.return_value = "email_token_123" + + # Act + with app.test_request_context( + "/email-code-login", method="POST", json={"email": "test@example.com", "language": "en-US"} + ): + api = EmailCodeLoginSendEmailApi() + response = api.post() + + # Assert + assert response["result"] == "success" + assert response["data"] == "email_token_123" + mock_send_email.assert_called_once_with(account=mock_account, language="en-US") + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.is_email_send_ip_limit") + @patch("controllers.console.auth.login.AccountService.get_user_through_email") + @patch("controllers.console.auth.login.FeatureService.get_system_features") + @patch("controllers.console.auth.login.AccountService.send_email_code_login_email") + def test_send_email_code_new_user_registration_allowed( + self, mock_send_email, mock_get_features, mock_get_user, mock_is_ip_limit, mock_db, app + ): + """ + Test sending email code to new user when registration is allowed. + + Verifies that: + - Email code is sent even for non-existent accounts + - Registration is allowed by system features + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_ip_limit.return_value = False + mock_get_user.return_value = None + mock_get_features.return_value.is_allow_register = True + mock_send_email.return_value = "email_token_123" + + # Act + with app.test_request_context( + "/email-code-login", method="POST", json={"email": "newuser@example.com", "language": "en-US"} + ): + api = EmailCodeLoginSendEmailApi() + response = api.post() + + # Assert + assert response["result"] == "success" + mock_send_email.assert_called_once_with(email="newuser@example.com", language="en-US") + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.is_email_send_ip_limit") + @patch("controllers.console.auth.login.AccountService.get_user_through_email") + @patch("controllers.console.auth.login.FeatureService.get_system_features") + def test_send_email_code_new_user_registration_disabled( + self, mock_get_features, mock_get_user, mock_is_ip_limit, mock_db, app + ): + """ + Test sending email code to new user when registration is disabled. + + Verifies that: + - AccountNotFound is raised for non-existent accounts + - Registration is blocked by system features + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_ip_limit.return_value = False + mock_get_user.return_value = None + mock_get_features.return_value.is_allow_register = False + + # Act & Assert + with app.test_request_context("/email-code-login", method="POST", json={"email": "newuser@example.com"}): + api = EmailCodeLoginSendEmailApi() + with pytest.raises(AccountNotFound): + api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.is_email_send_ip_limit") + def test_send_email_code_ip_rate_limited(self, mock_is_ip_limit, mock_db, app): + """ + Test email code sending blocked by IP rate limit. + + Verifies that: + - EmailSendIpLimitError is raised when IP limit exceeded + - Prevents spam and abuse + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_ip_limit.return_value = True + + # Act & Assert + with app.test_request_context("/email-code-login", method="POST", json={"email": "test@example.com"}): + api = EmailCodeLoginSendEmailApi() + with pytest.raises(EmailSendIpLimitError): + api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.is_email_send_ip_limit") + @patch("controllers.console.auth.login.AccountService.get_user_through_email") + def test_send_email_code_frozen_account(self, mock_get_user, mock_is_ip_limit, mock_db, app): + """ + Test email code sending to frozen account. + + Verifies that: + - AccountInFreezeError is raised for frozen accounts + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_ip_limit.return_value = False + mock_get_user.side_effect = AccountRegisterError("Account frozen") + + # Act & Assert + with app.test_request_context("/email-code-login", method="POST", json={"email": "frozen@example.com"}): + api = EmailCodeLoginSendEmailApi() + with pytest.raises(AccountInFreezeError): + api.post() + + @pytest.mark.parametrize( + ("language_input", "expected_language"), + [ + ("zh-Hans", "zh-Hans"), + ("en-US", "en-US"), + (None, "en-US"), + ], + ) + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.is_email_send_ip_limit") + @patch("controllers.console.auth.login.AccountService.get_user_through_email") + @patch("controllers.console.auth.login.AccountService.send_email_code_login_email") + def test_send_email_code_language_handling( + self, + mock_send_email, + mock_get_user, + mock_is_ip_limit, + mock_db, + app, + mock_account, + language_input, + expected_language, + ): + """ + Test email code sending with different language preferences. + + Verifies that: + - Language parameter is correctly processed + - Defaults to en-US when not specified + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_ip_limit.return_value = False + mock_get_user.return_value = mock_account + mock_send_email.return_value = "token" + + # Act + with app.test_request_context( + "/email-code-login", method="POST", json={"email": "test@example.com", "language": language_input} + ): + api = EmailCodeLoginSendEmailApi() + api.post() + + # Assert + call_args = mock_send_email.call_args + assert call_args.kwargs["language"] == expected_language + + +class TestEmailCodeLoginApi: + """Test cases for email code verification and login.""" + + @pytest.fixture + def app(self): + """Create Flask test application.""" + app = Flask(__name__) + app.config["TESTING"] = True + return app + + @pytest.fixture + def mock_account(self): + """Create mock account object.""" + account = MagicMock() + account.email = "test@example.com" + account.name = "Test User" + return account + + @pytest.fixture + def mock_token_pair(self): + """Create mock token pair object.""" + token_pair = MagicMock() + token_pair.access_token = "access_token" + token_pair.refresh_token = "refresh_token" + token_pair.csrf_token = "csrf_token" + return token_pair + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.get_email_code_login_data") + @patch("controllers.console.auth.login.AccountService.revoke_email_code_login_token") + @patch("controllers.console.auth.login.AccountService.get_user_through_email") + @patch("controllers.console.auth.login.TenantService.get_join_tenants") + @patch("controllers.console.auth.login.AccountService.login") + @patch("controllers.console.auth.login.AccountService.reset_login_error_rate_limit") + def test_email_code_login_existing_user( + self, + mock_reset_rate_limit, + mock_login, + mock_get_tenants, + mock_get_user, + mock_revoke_token, + mock_get_data, + mock_db, + app, + mock_account, + mock_token_pair, + ): + """ + Test successful email code login for existing user. + + Verifies that: + - Email and code are validated + - Token is revoked after use + - User is logged in with token pair + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_get_data.return_value = {"email": "test@example.com", "code": "123456"} + mock_get_user.return_value = mock_account + mock_get_tenants.return_value = [MagicMock()] + mock_login.return_value = mock_token_pair + + # Act + with app.test_request_context( + "/email-code-login/validity", + method="POST", + json={"email": "test@example.com", "code": "123456", "token": "valid_token"}, + ): + api = EmailCodeLoginApi() + response = api.post() + + # Assert + assert response.json["result"] == "success" + mock_revoke_token.assert_called_once_with("valid_token") + mock_login.assert_called_once() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.get_email_code_login_data") + @patch("controllers.console.auth.login.AccountService.revoke_email_code_login_token") + @patch("controllers.console.auth.login.AccountService.get_user_through_email") + @patch("controllers.console.auth.login.AccountService.create_account_and_tenant") + @patch("controllers.console.auth.login.AccountService.login") + @patch("controllers.console.auth.login.AccountService.reset_login_error_rate_limit") + def test_email_code_login_new_user_creates_account( + self, + mock_reset_rate_limit, + mock_login, + mock_create_account, + mock_get_user, + mock_revoke_token, + mock_get_data, + mock_db, + app, + mock_account, + mock_token_pair, + ): + """ + Test email code login creates new account for new user. + + Verifies that: + - New account is created when user doesn't exist + - Workspace is created for new user + - User is logged in after account creation + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_get_data.return_value = {"email": "newuser@example.com", "code": "123456"} + mock_get_user.return_value = None + mock_create_account.return_value = mock_account + mock_login.return_value = mock_token_pair + + # Act + with app.test_request_context( + "/email-code-login/validity", + method="POST", + json={"email": "newuser@example.com", "code": "123456", "token": "valid_token", "language": "en-US"}, + ): + api = EmailCodeLoginApi() + response = api.post() + + # Assert + assert response.json["result"] == "success" + mock_create_account.assert_called_once() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.get_email_code_login_data") + def test_email_code_login_invalid_token(self, mock_get_data, mock_db, app): + """ + Test email code login with invalid token. + + Verifies that: + - InvalidTokenError is raised for invalid/expired tokens + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_get_data.return_value = None + + # Act & Assert + with app.test_request_context( + "/email-code-login/validity", + method="POST", + json={"email": "test@example.com", "code": "123456", "token": "invalid_token"}, + ): + api = EmailCodeLoginApi() + with pytest.raises(InvalidTokenError): + api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.get_email_code_login_data") + def test_email_code_login_email_mismatch(self, mock_get_data, mock_db, app): + """ + Test email code login with mismatched email. + + Verifies that: + - InvalidEmailError is raised when email doesn't match token + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_get_data.return_value = {"email": "original@example.com", "code": "123456"} + + # Act & Assert + with app.test_request_context( + "/email-code-login/validity", + method="POST", + json={"email": "different@example.com", "code": "123456", "token": "token"}, + ): + api = EmailCodeLoginApi() + with pytest.raises(InvalidEmailError): + api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.get_email_code_login_data") + def test_email_code_login_wrong_code(self, mock_get_data, mock_db, app): + """ + Test email code login with incorrect code. + + Verifies that: + - EmailCodeError is raised for wrong verification code + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_get_data.return_value = {"email": "test@example.com", "code": "123456"} + + # Act & Assert + with app.test_request_context( + "/email-code-login/validity", + method="POST", + json={"email": "test@example.com", "code": "wrong_code", "token": "token"}, + ): + api = EmailCodeLoginApi() + with pytest.raises(EmailCodeError): + api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.get_email_code_login_data") + @patch("controllers.console.auth.login.AccountService.revoke_email_code_login_token") + @patch("controllers.console.auth.login.AccountService.get_user_through_email") + @patch("controllers.console.auth.login.TenantService.get_join_tenants") + @patch("controllers.console.auth.login.FeatureService.get_system_features") + def test_email_code_login_creates_workspace_for_user_without_tenant( + self, + mock_get_features, + mock_get_tenants, + mock_get_user, + mock_revoke_token, + mock_get_data, + mock_db, + app, + mock_account, + ): + """ + Test email code login creates workspace for user without tenant. + + Verifies that: + - Workspace is created when user has no tenants + - User is added as owner of new workspace + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_get_data.return_value = {"email": "test@example.com", "code": "123456"} + mock_get_user.return_value = mock_account + mock_get_tenants.return_value = [] + mock_features = MagicMock() + mock_features.is_allow_create_workspace = True + mock_features.license.workspaces.is_available.return_value = True + mock_get_features.return_value = mock_features + + # Act & Assert - Should not raise WorkspacesLimitExceeded + with app.test_request_context( + "/email-code-login/validity", + method="POST", + json={"email": "test@example.com", "code": "123456", "token": "token"}, + ): + api = EmailCodeLoginApi() + # This would complete the flow, but we're testing workspace creation logic + # In real implementation, TenantService.create_tenant would be called + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.get_email_code_login_data") + @patch("controllers.console.auth.login.AccountService.revoke_email_code_login_token") + @patch("controllers.console.auth.login.AccountService.get_user_through_email") + @patch("controllers.console.auth.login.TenantService.get_join_tenants") + @patch("controllers.console.auth.login.FeatureService.get_system_features") + def test_email_code_login_workspace_limit_exceeded( + self, + mock_get_features, + mock_get_tenants, + mock_get_user, + mock_revoke_token, + mock_get_data, + mock_db, + app, + mock_account, + ): + """ + Test email code login fails when workspace limit exceeded. + + Verifies that: + - WorkspacesLimitExceeded is raised when limit reached + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_get_data.return_value = {"email": "test@example.com", "code": "123456"} + mock_get_user.return_value = mock_account + mock_get_tenants.return_value = [] + mock_features = MagicMock() + mock_features.license.workspaces.is_available.return_value = False + mock_get_features.return_value = mock_features + + # Act & Assert + with app.test_request_context( + "/email-code-login/validity", + method="POST", + json={"email": "test@example.com", "code": "123456", "token": "token"}, + ): + api = EmailCodeLoginApi() + with pytest.raises(WorkspacesLimitExceeded): + api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.AccountService.get_email_code_login_data") + @patch("controllers.console.auth.login.AccountService.revoke_email_code_login_token") + @patch("controllers.console.auth.login.AccountService.get_user_through_email") + @patch("controllers.console.auth.login.TenantService.get_join_tenants") + @patch("controllers.console.auth.login.FeatureService.get_system_features") + def test_email_code_login_workspace_creation_not_allowed( + self, + mock_get_features, + mock_get_tenants, + mock_get_user, + mock_revoke_token, + mock_get_data, + mock_db, + app, + mock_account, + ): + """ + Test email code login fails when workspace creation not allowed. + + Verifies that: + - NotAllowedCreateWorkspace is raised when creation disabled + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_get_data.return_value = {"email": "test@example.com", "code": "123456"} + mock_get_user.return_value = mock_account + mock_get_tenants.return_value = [] + mock_features = MagicMock() + mock_features.is_allow_create_workspace = False + mock_get_features.return_value = mock_features + + # Act & Assert + with app.test_request_context( + "/email-code-login/validity", + method="POST", + json={"email": "test@example.com", "code": "123456", "token": "token"}, + ): + api = EmailCodeLoginApi() + with pytest.raises(NotAllowedCreateWorkspace): + api.post() diff --git a/api/tests/unit_tests/controllers/console/auth/test_login_logout.py b/api/tests/unit_tests/controllers/console/auth/test_login_logout.py new file mode 100644 index 0000000000..8799d6484d --- /dev/null +++ b/api/tests/unit_tests/controllers/console/auth/test_login_logout.py @@ -0,0 +1,433 @@ +""" +Test suite for login and logout authentication flows. + +This module tests the core authentication endpoints including: +- Email/password login with rate limiting +- Session management and logout +- Cookie-based token handling +- Account status validation +""" + +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from flask_restx import Api + +from controllers.console.auth.error import ( + AuthenticationFailedError, + EmailPasswordLoginLimitError, + InvalidEmailError, +) +from controllers.console.auth.login import LoginApi, LogoutApi +from controllers.console.error import ( + AccountBannedError, + AccountInFreezeError, + WorkspacesLimitExceeded, +) +from services.errors.account import AccountLoginError, AccountPasswordError + + +class TestLoginApi: + """Test cases for the LoginApi endpoint.""" + + @pytest.fixture + def app(self): + """Create Flask test application.""" + app = Flask(__name__) + app.config["TESTING"] = True + return app + + @pytest.fixture + def api(self, app): + """Create Flask-RESTX API instance.""" + return Api(app) + + @pytest.fixture + def client(self, app, api): + """Create test client.""" + api.add_resource(LoginApi, "/login") + return app.test_client() + + @pytest.fixture + def mock_account(self): + """Create mock account object.""" + account = MagicMock() + account.id = "test-account-id" + account.email = "test@example.com" + account.name = "Test User" + return account + + @pytest.fixture + def mock_token_pair(self): + """Create mock token pair object.""" + token_pair = MagicMock() + token_pair.access_token = "mock_access_token" + token_pair.refresh_token = "mock_refresh_token" + token_pair.csrf_token = "mock_csrf_token" + return token_pair + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.dify_config.BILLING_ENABLED", False) + @patch("controllers.console.auth.login.AccountService.is_login_error_rate_limit") + @patch("controllers.console.auth.login.RegisterService.get_invitation_if_token_valid") + @patch("controllers.console.auth.login.AccountService.authenticate") + @patch("controllers.console.auth.login.TenantService.get_join_tenants") + @patch("controllers.console.auth.login.AccountService.login") + @patch("controllers.console.auth.login.AccountService.reset_login_error_rate_limit") + def test_successful_login_without_invitation( + self, + mock_reset_rate_limit, + mock_login, + mock_get_tenants, + mock_authenticate, + mock_get_invitation, + mock_is_rate_limit, + mock_db, + app, + mock_account, + mock_token_pair, + ): + """ + Test successful login flow without invitation token. + + Verifies that: + - Valid credentials authenticate successfully + - Tokens are generated and set in cookies + - Rate limit is reset after successful login + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_rate_limit.return_value = False + mock_get_invitation.return_value = None + mock_authenticate.return_value = mock_account + mock_get_tenants.return_value = [MagicMock()] # Has at least one tenant + mock_login.return_value = mock_token_pair + + # Act + with app.test_request_context( + "/login", method="POST", json={"email": "test@example.com", "password": "ValidPass123!"} + ): + login_api = LoginApi() + response = login_api.post() + + # Assert + mock_authenticate.assert_called_once_with("test@example.com", "ValidPass123!") + mock_login.assert_called_once() + mock_reset_rate_limit.assert_called_once_with("test@example.com") + assert response.json["result"] == "success" + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.dify_config.BILLING_ENABLED", False) + @patch("controllers.console.auth.login.AccountService.is_login_error_rate_limit") + @patch("controllers.console.auth.login.RegisterService.get_invitation_if_token_valid") + @patch("controllers.console.auth.login.AccountService.authenticate") + @patch("controllers.console.auth.login.TenantService.get_join_tenants") + @patch("controllers.console.auth.login.AccountService.login") + @patch("controllers.console.auth.login.AccountService.reset_login_error_rate_limit") + def test_successful_login_with_valid_invitation( + self, + mock_reset_rate_limit, + mock_login, + mock_get_tenants, + mock_authenticate, + mock_get_invitation, + mock_is_rate_limit, + mock_db, + app, + mock_account, + mock_token_pair, + ): + """ + Test successful login with valid invitation token. + + Verifies that: + - Invitation token is validated + - Email matches invitation email + - Authentication proceeds with invitation token + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_rate_limit.return_value = False + mock_get_invitation.return_value = {"data": {"email": "test@example.com"}} + mock_authenticate.return_value = mock_account + mock_get_tenants.return_value = [MagicMock()] + mock_login.return_value = mock_token_pair + + # Act + with app.test_request_context( + "/login", + method="POST", + json={"email": "test@example.com", "password": "ValidPass123!", "invite_token": "valid_token"}, + ): + login_api = LoginApi() + response = login_api.post() + + # Assert + mock_authenticate.assert_called_once_with("test@example.com", "ValidPass123!", "valid_token") + assert response.json["result"] == "success" + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.dify_config.BILLING_ENABLED", False) + @patch("controllers.console.auth.login.AccountService.is_login_error_rate_limit") + @patch("controllers.console.auth.login.RegisterService.get_invitation_if_token_valid") + def test_login_fails_when_rate_limited(self, mock_get_invitation, mock_is_rate_limit, mock_db, app): + """ + Test login rejection when rate limit is exceeded. + + Verifies that: + - Rate limit check is performed before authentication + - EmailPasswordLoginLimitError is raised when limit exceeded + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_rate_limit.return_value = True + mock_get_invitation.return_value = None + + # Act & Assert + with app.test_request_context( + "/login", method="POST", json={"email": "test@example.com", "password": "password"} + ): + login_api = LoginApi() + with pytest.raises(EmailPasswordLoginLimitError): + login_api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.dify_config.BILLING_ENABLED", True) + @patch("controllers.console.auth.login.BillingService.is_email_in_freeze") + def test_login_fails_when_account_frozen(self, mock_is_frozen, mock_db, app): + """ + Test login rejection for frozen accounts. + + Verifies that: + - Billing freeze status is checked when billing enabled + - AccountInFreezeError is raised for frozen accounts + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_frozen.return_value = True + + # Act & Assert + with app.test_request_context( + "/login", method="POST", json={"email": "frozen@example.com", "password": "password"} + ): + login_api = LoginApi() + with pytest.raises(AccountInFreezeError): + login_api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.dify_config.BILLING_ENABLED", False) + @patch("controllers.console.auth.login.AccountService.is_login_error_rate_limit") + @patch("controllers.console.auth.login.RegisterService.get_invitation_if_token_valid") + @patch("controllers.console.auth.login.AccountService.authenticate") + @patch("controllers.console.auth.login.AccountService.add_login_error_rate_limit") + def test_login_fails_with_invalid_credentials( + self, + mock_add_rate_limit, + mock_authenticate, + mock_get_invitation, + mock_is_rate_limit, + mock_db, + app, + ): + """ + Test login failure with invalid credentials. + + Verifies that: + - AuthenticationFailedError is raised for wrong password + - Login error rate limit counter is incremented + - Generic error message prevents user enumeration + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_rate_limit.return_value = False + mock_get_invitation.return_value = None + mock_authenticate.side_effect = AccountPasswordError("Invalid password") + + # Act & Assert + with app.test_request_context( + "/login", method="POST", json={"email": "test@example.com", "password": "WrongPass123!"} + ): + login_api = LoginApi() + with pytest.raises(AuthenticationFailedError): + login_api.post() + + mock_add_rate_limit.assert_called_once_with("test@example.com") + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.dify_config.BILLING_ENABLED", False) + @patch("controllers.console.auth.login.AccountService.is_login_error_rate_limit") + @patch("controllers.console.auth.login.RegisterService.get_invitation_if_token_valid") + @patch("controllers.console.auth.login.AccountService.authenticate") + def test_login_fails_for_banned_account( + self, mock_authenticate, mock_get_invitation, mock_is_rate_limit, mock_db, app + ): + """ + Test login rejection for banned accounts. + + Verifies that: + - AccountBannedError is raised for banned accounts + - Login is prevented even with valid credentials + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_rate_limit.return_value = False + mock_get_invitation.return_value = None + mock_authenticate.side_effect = AccountLoginError("Account is banned") + + # Act & Assert + with app.test_request_context( + "/login", method="POST", json={"email": "banned@example.com", "password": "ValidPass123!"} + ): + login_api = LoginApi() + with pytest.raises(AccountBannedError): + login_api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.dify_config.BILLING_ENABLED", False) + @patch("controllers.console.auth.login.AccountService.is_login_error_rate_limit") + @patch("controllers.console.auth.login.RegisterService.get_invitation_if_token_valid") + @patch("controllers.console.auth.login.AccountService.authenticate") + @patch("controllers.console.auth.login.TenantService.get_join_tenants") + @patch("controllers.console.auth.login.FeatureService.get_system_features") + def test_login_fails_when_no_workspace_and_limit_exceeded( + self, + mock_get_features, + mock_get_tenants, + mock_authenticate, + mock_get_invitation, + mock_is_rate_limit, + mock_db, + app, + mock_account, + ): + """ + Test login failure when user has no workspace and workspace limit exceeded. + + Verifies that: + - WorkspacesLimitExceeded is raised when limit reached + - User cannot login without an assigned workspace + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_rate_limit.return_value = False + mock_get_invitation.return_value = None + mock_authenticate.return_value = mock_account + mock_get_tenants.return_value = [] # No tenants + + mock_features = MagicMock() + mock_features.is_allow_create_workspace = True + mock_features.license.workspaces.is_available.return_value = False + mock_get_features.return_value = mock_features + + # Act & Assert + with app.test_request_context( + "/login", method="POST", json={"email": "test@example.com", "password": "ValidPass123!"} + ): + login_api = LoginApi() + with pytest.raises(WorkspacesLimitExceeded): + login_api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.dify_config.BILLING_ENABLED", False) + @patch("controllers.console.auth.login.AccountService.is_login_error_rate_limit") + @patch("controllers.console.auth.login.RegisterService.get_invitation_if_token_valid") + def test_login_invitation_email_mismatch(self, mock_get_invitation, mock_is_rate_limit, mock_db, app): + """ + Test login failure when invitation email doesn't match login email. + + Verifies that: + - InvalidEmailError is raised for email mismatch + - Security check prevents invitation token abuse + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_rate_limit.return_value = False + mock_get_invitation.return_value = {"data": {"email": "invited@example.com"}} + + # Act & Assert + with app.test_request_context( + "/login", + method="POST", + json={"email": "different@example.com", "password": "ValidPass123!", "invite_token": "token"}, + ): + login_api = LoginApi() + with pytest.raises(InvalidEmailError): + login_api.post() + + +class TestLogoutApi: + """Test cases for the LogoutApi endpoint.""" + + @pytest.fixture + def app(self): + """Create Flask test application.""" + app = Flask(__name__) + app.config["TESTING"] = True + return app + + @pytest.fixture + def mock_account(self): + """Create mock account object.""" + account = MagicMock() + account.id = "test-account-id" + account.email = "test@example.com" + return account + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.current_account_with_tenant") + @patch("controllers.console.auth.login.AccountService.logout") + @patch("controllers.console.auth.login.flask_login.logout_user") + def test_successful_logout( + self, mock_logout_user, mock_service_logout, mock_current_account, mock_db, app, mock_account + ): + """ + Test successful logout flow. + + Verifies that: + - User session is terminated + - AccountService.logout is called + - All authentication cookies are cleared + - Success response is returned + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_current_account.return_value = (mock_account, MagicMock()) + + # Act + with app.test_request_context("/logout", method="POST"): + logout_api = LogoutApi() + response = logout_api.post() + + # Assert + mock_service_logout.assert_called_once_with(account=mock_account) + mock_logout_user.assert_called_once() + assert response.json["result"] == "success" + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.login.current_account_with_tenant") + @patch("controllers.console.auth.login.flask_login") + def test_logout_anonymous_user(self, mock_flask_login, mock_current_account, mock_db, app): + """ + Test logout for anonymous (not logged in) user. + + Verifies that: + - Anonymous users can call logout endpoint + - No errors are raised + - Success response is returned + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + # Create a mock anonymous user that will pass isinstance check + anonymous_user = MagicMock() + mock_flask_login.AnonymousUserMixin = type("AnonymousUserMixin", (), {}) + anonymous_user.__class__ = mock_flask_login.AnonymousUserMixin + mock_current_account.return_value = (anonymous_user, None) + + # Act + with app.test_request_context("/logout", method="POST"): + logout_api = LogoutApi() + response = logout_api.post() + + # Assert + assert response.json["result"] == "success" diff --git a/api/tests/unit_tests/controllers/console/auth/test_password_reset.py b/api/tests/unit_tests/controllers/console/auth/test_password_reset.py new file mode 100644 index 0000000000..f584952a00 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/auth/test_password_reset.py @@ -0,0 +1,508 @@ +""" +Test suite for password reset authentication flows. + +This module tests the password reset mechanism including: +- Password reset email sending +- Verification code validation +- Password reset with token +- Rate limiting and security checks +""" + +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask + +from controllers.console.auth.error import ( + EmailCodeError, + EmailPasswordResetLimitError, + InvalidEmailError, + InvalidTokenError, + PasswordMismatchError, +) +from controllers.console.auth.forgot_password import ( + ForgotPasswordCheckApi, + ForgotPasswordResetApi, + ForgotPasswordSendEmailApi, +) +from controllers.console.error import AccountNotFound, EmailSendIpLimitError + + +class TestForgotPasswordSendEmailApi: + """Test cases for sending password reset emails.""" + + @pytest.fixture + def app(self): + """Create Flask test application.""" + app = Flask(__name__) + app.config["TESTING"] = True + return app + + @pytest.fixture + def mock_account(self): + """Create mock account object.""" + account = MagicMock() + account.email = "test@example.com" + account.name = "Test User" + return account + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.db") + @patch("controllers.console.auth.forgot_password.AccountService.is_email_send_ip_limit") + @patch("controllers.console.auth.forgot_password.Session") + @patch("controllers.console.auth.forgot_password.select") + @patch("controllers.console.auth.forgot_password.AccountService.send_reset_password_email") + @patch("controllers.console.auth.forgot_password.FeatureService.get_system_features") + def test_send_reset_email_success( + self, + mock_get_features, + mock_send_email, + mock_select, + mock_session, + mock_is_ip_limit, + mock_forgot_db, + mock_wraps_db, + app, + mock_account, + ): + """ + Test successful password reset email sending. + + Verifies that: + - Email is sent to valid account + - Reset token is generated and returned + - IP rate limiting is checked + """ + # Arrange + mock_wraps_db.session.query.return_value.first.return_value = MagicMock() + mock_forgot_db.engine = MagicMock() + mock_is_ip_limit.return_value = False + mock_session_instance = MagicMock() + mock_session_instance.execute.return_value.scalar_one_or_none.return_value = mock_account + mock_session.return_value.__enter__.return_value = mock_session_instance + mock_send_email.return_value = "reset_token_123" + mock_get_features.return_value.is_allow_register = True + + # Act + with app.test_request_context( + "/forgot-password", method="POST", json={"email": "test@example.com", "language": "en-US"} + ): + api = ForgotPasswordSendEmailApi() + response = api.post() + + # Assert + assert response["result"] == "success" + assert response["data"] == "reset_token_123" + mock_send_email.assert_called_once() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.AccountService.is_email_send_ip_limit") + def test_send_reset_email_ip_rate_limited(self, mock_is_ip_limit, mock_db, app): + """ + Test password reset email blocked by IP rate limit. + + Verifies that: + - EmailSendIpLimitError is raised when IP limit exceeded + - No email is sent when rate limited + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_ip_limit.return_value = True + + # Act & Assert + with app.test_request_context("/forgot-password", method="POST", json={"email": "test@example.com"}): + api = ForgotPasswordSendEmailApi() + with pytest.raises(EmailSendIpLimitError): + api.post() + + @pytest.mark.parametrize( + ("language_input", "expected_language"), + [ + ("zh-Hans", "zh-Hans"), + ("en-US", "en-US"), + ("fr-FR", "en-US"), # Defaults to en-US for unsupported + (None, "en-US"), # Defaults to en-US when not provided + ], + ) + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.db") + @patch("controllers.console.auth.forgot_password.AccountService.is_email_send_ip_limit") + @patch("controllers.console.auth.forgot_password.Session") + @patch("controllers.console.auth.forgot_password.select") + @patch("controllers.console.auth.forgot_password.AccountService.send_reset_password_email") + @patch("controllers.console.auth.forgot_password.FeatureService.get_system_features") + def test_send_reset_email_language_handling( + self, + mock_get_features, + mock_send_email, + mock_select, + mock_session, + mock_is_ip_limit, + mock_forgot_db, + mock_wraps_db, + app, + mock_account, + language_input, + expected_language, + ): + """ + Test password reset email with different language preferences. + + Verifies that: + - Language parameter is correctly processed + - Unsupported languages default to en-US + """ + # Arrange + mock_wraps_db.session.query.return_value.first.return_value = MagicMock() + mock_forgot_db.engine = MagicMock() + mock_is_ip_limit.return_value = False + mock_session_instance = MagicMock() + mock_session_instance.execute.return_value.scalar_one_or_none.return_value = mock_account + mock_session.return_value.__enter__.return_value = mock_session_instance + mock_send_email.return_value = "token" + mock_get_features.return_value.is_allow_register = True + + # Act + with app.test_request_context( + "/forgot-password", method="POST", json={"email": "test@example.com", "language": language_input} + ): + api = ForgotPasswordSendEmailApi() + api.post() + + # Assert + call_args = mock_send_email.call_args + assert call_args.kwargs["language"] == expected_language + + +class TestForgotPasswordCheckApi: + """Test cases for verifying password reset codes.""" + + @pytest.fixture + def app(self): + """Create Flask test application.""" + app = Flask(__name__) + app.config["TESTING"] = True + return app + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.AccountService.is_forgot_password_error_rate_limit") + @patch("controllers.console.auth.forgot_password.AccountService.get_reset_password_data") + @patch("controllers.console.auth.forgot_password.AccountService.revoke_reset_password_token") + @patch("controllers.console.auth.forgot_password.AccountService.generate_reset_password_token") + @patch("controllers.console.auth.forgot_password.AccountService.reset_forgot_password_error_rate_limit") + def test_verify_code_success( + self, + mock_reset_rate_limit, + mock_generate_token, + mock_revoke_token, + mock_get_data, + mock_is_rate_limit, + mock_db, + app, + ): + """ + Test successful verification code validation. + + Verifies that: + - Valid code is accepted + - Old token is revoked + - New token is generated for reset phase + - Rate limit is reset on success + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_rate_limit.return_value = False + mock_get_data.return_value = {"email": "test@example.com", "code": "123456"} + mock_generate_token.return_value = (None, "new_token") + + # Act + with app.test_request_context( + "/forgot-password/validity", + method="POST", + json={"email": "test@example.com", "code": "123456", "token": "old_token"}, + ): + api = ForgotPasswordCheckApi() + response = api.post() + + # Assert + assert response["is_valid"] is True + assert response["email"] == "test@example.com" + assert response["token"] == "new_token" + mock_revoke_token.assert_called_once_with("old_token") + mock_reset_rate_limit.assert_called_once_with("test@example.com") + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.AccountService.is_forgot_password_error_rate_limit") + def test_verify_code_rate_limited(self, mock_is_rate_limit, mock_db, app): + """ + Test code verification blocked by rate limit. + + Verifies that: + - EmailPasswordResetLimitError is raised when limit exceeded + - Prevents brute force attacks on verification codes + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_rate_limit.return_value = True + + # Act & Assert + with app.test_request_context( + "/forgot-password/validity", + method="POST", + json={"email": "test@example.com", "code": "123456", "token": "token"}, + ): + api = ForgotPasswordCheckApi() + with pytest.raises(EmailPasswordResetLimitError): + api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.AccountService.is_forgot_password_error_rate_limit") + @patch("controllers.console.auth.forgot_password.AccountService.get_reset_password_data") + def test_verify_code_invalid_token(self, mock_get_data, mock_is_rate_limit, mock_db, app): + """ + Test code verification with invalid token. + + Verifies that: + - InvalidTokenError is raised for invalid/expired tokens + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_rate_limit.return_value = False + mock_get_data.return_value = None + + # Act & Assert + with app.test_request_context( + "/forgot-password/validity", + method="POST", + json={"email": "test@example.com", "code": "123456", "token": "invalid_token"}, + ): + api = ForgotPasswordCheckApi() + with pytest.raises(InvalidTokenError): + api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.AccountService.is_forgot_password_error_rate_limit") + @patch("controllers.console.auth.forgot_password.AccountService.get_reset_password_data") + def test_verify_code_email_mismatch(self, mock_get_data, mock_is_rate_limit, mock_db, app): + """ + Test code verification with mismatched email. + + Verifies that: + - InvalidEmailError is raised when email doesn't match token + - Prevents token abuse + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_rate_limit.return_value = False + mock_get_data.return_value = {"email": "original@example.com", "code": "123456"} + + # Act & Assert + with app.test_request_context( + "/forgot-password/validity", + method="POST", + json={"email": "different@example.com", "code": "123456", "token": "token"}, + ): + api = ForgotPasswordCheckApi() + with pytest.raises(InvalidEmailError): + api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.AccountService.is_forgot_password_error_rate_limit") + @patch("controllers.console.auth.forgot_password.AccountService.get_reset_password_data") + @patch("controllers.console.auth.forgot_password.AccountService.add_forgot_password_error_rate_limit") + def test_verify_code_wrong_code(self, mock_add_rate_limit, mock_get_data, mock_is_rate_limit, mock_db, app): + """ + Test code verification with incorrect code. + + Verifies that: + - EmailCodeError is raised for wrong code + - Rate limit counter is incremented + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_is_rate_limit.return_value = False + mock_get_data.return_value = {"email": "test@example.com", "code": "123456"} + + # Act & Assert + with app.test_request_context( + "/forgot-password/validity", + method="POST", + json={"email": "test@example.com", "code": "wrong_code", "token": "token"}, + ): + api = ForgotPasswordCheckApi() + with pytest.raises(EmailCodeError): + api.post() + + mock_add_rate_limit.assert_called_once_with("test@example.com") + + +class TestForgotPasswordResetApi: + """Test cases for resetting password with verified token.""" + + @pytest.fixture + def app(self): + """Create Flask test application.""" + app = Flask(__name__) + app.config["TESTING"] = True + return app + + @pytest.fixture + def mock_account(self): + """Create mock account object.""" + account = MagicMock() + account.email = "test@example.com" + account.name = "Test User" + return account + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.db") + @patch("controllers.console.auth.forgot_password.AccountService.get_reset_password_data") + @patch("controllers.console.auth.forgot_password.AccountService.revoke_reset_password_token") + @patch("controllers.console.auth.forgot_password.Session") + @patch("controllers.console.auth.forgot_password.select") + @patch("controllers.console.auth.forgot_password.TenantService.get_join_tenants") + def test_reset_password_success( + self, + mock_get_tenants, + mock_select, + mock_session, + mock_revoke_token, + mock_get_data, + mock_forgot_db, + mock_wraps_db, + app, + mock_account, + ): + """ + Test successful password reset. + + Verifies that: + - Password is updated with new hashed value + - Token is revoked after use + - Success response is returned + """ + # Arrange + mock_wraps_db.session.query.return_value.first.return_value = MagicMock() + mock_forgot_db.engine = MagicMock() + mock_get_data.return_value = {"email": "test@example.com", "phase": "reset"} + mock_session_instance = MagicMock() + mock_session_instance.execute.return_value.scalar_one_or_none.return_value = mock_account + mock_session.return_value.__enter__.return_value = mock_session_instance + mock_get_tenants.return_value = [MagicMock()] + + # Act + with app.test_request_context( + "/forgot-password/resets", + method="POST", + json={"token": "valid_token", "new_password": "NewPass123!", "password_confirm": "NewPass123!"}, + ): + api = ForgotPasswordResetApi() + response = api.post() + + # Assert + assert response["result"] == "success" + mock_revoke_token.assert_called_once_with("valid_token") + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.AccountService.get_reset_password_data") + def test_reset_password_mismatch(self, mock_get_data, mock_db, app): + """ + Test password reset with mismatched passwords. + + Verifies that: + - PasswordMismatchError is raised when passwords don't match + - No password update occurs + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_get_data.return_value = {"email": "test@example.com", "phase": "reset"} + + # Act & Assert + with app.test_request_context( + "/forgot-password/resets", + method="POST", + json={"token": "token", "new_password": "NewPass123!", "password_confirm": "DifferentPass123!"}, + ): + api = ForgotPasswordResetApi() + with pytest.raises(PasswordMismatchError): + api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.AccountService.get_reset_password_data") + def test_reset_password_invalid_token(self, mock_get_data, mock_db, app): + """ + Test password reset with invalid token. + + Verifies that: + - InvalidTokenError is raised for invalid/expired tokens + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_get_data.return_value = None + + # Act & Assert + with app.test_request_context( + "/forgot-password/resets", + method="POST", + json={"token": "invalid_token", "new_password": "NewPass123!", "password_confirm": "NewPass123!"}, + ): + api = ForgotPasswordResetApi() + with pytest.raises(InvalidTokenError): + api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.AccountService.get_reset_password_data") + def test_reset_password_wrong_phase(self, mock_get_data, mock_db, app): + """ + Test password reset with token not in reset phase. + + Verifies that: + - InvalidTokenError is raised when token is not in reset phase + - Prevents use of verification-phase tokens for reset + """ + # Arrange + mock_db.session.query.return_value.first.return_value = MagicMock() + mock_get_data.return_value = {"email": "test@example.com", "phase": "verify"} + + # Act & Assert + with app.test_request_context( + "/forgot-password/resets", + method="POST", + json={"token": "token", "new_password": "NewPass123!", "password_confirm": "NewPass123!"}, + ): + api = ForgotPasswordResetApi() + with pytest.raises(InvalidTokenError): + api.post() + + @patch("controllers.console.wraps.db") + @patch("controllers.console.auth.forgot_password.db") + @patch("controllers.console.auth.forgot_password.AccountService.get_reset_password_data") + @patch("controllers.console.auth.forgot_password.AccountService.revoke_reset_password_token") + @patch("controllers.console.auth.forgot_password.Session") + @patch("controllers.console.auth.forgot_password.select") + def test_reset_password_account_not_found( + self, mock_select, mock_session, mock_revoke_token, mock_get_data, mock_forgot_db, mock_wraps_db, app + ): + """ + Test password reset for non-existent account. + + Verifies that: + - AccountNotFound is raised when account doesn't exist + """ + # Arrange + mock_wraps_db.session.query.return_value.first.return_value = MagicMock() + mock_forgot_db.engine = MagicMock() + mock_get_data.return_value = {"email": "nonexistent@example.com", "phase": "reset"} + mock_session_instance = MagicMock() + mock_session_instance.execute.return_value.scalar_one_or_none.return_value = None + mock_session.return_value.__enter__.return_value = mock_session_instance + + # Act & Assert + with app.test_request_context( + "/forgot-password/resets", + method="POST", + json={"token": "token", "new_password": "NewPass123!", "password_confirm": "NewPass123!"}, + ): + api = ForgotPasswordResetApi() + with pytest.raises(AccountNotFound): + api.post() diff --git a/api/tests/unit_tests/controllers/console/auth/test_token_refresh.py b/api/tests/unit_tests/controllers/console/auth/test_token_refresh.py new file mode 100644 index 0000000000..8da930b7fa --- /dev/null +++ b/api/tests/unit_tests/controllers/console/auth/test_token_refresh.py @@ -0,0 +1,198 @@ +""" +Test suite for token refresh authentication flows. + +This module tests the token refresh mechanism including: +- Access token refresh using refresh token +- Cookie-based token extraction and renewal +- Token expiration and validation +- Error handling for invalid tokens +""" + +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from flask_restx import Api + +from controllers.console.auth.login import RefreshTokenApi + + +class TestRefreshTokenApi: + """Test cases for the RefreshTokenApi endpoint.""" + + @pytest.fixture + def app(self): + """Create Flask test application.""" + app = Flask(__name__) + app.config["TESTING"] = True + return app + + @pytest.fixture + def api(self, app): + """Create Flask-RESTX API instance.""" + return Api(app) + + @pytest.fixture + def client(self, app, api): + """Create test client.""" + api.add_resource(RefreshTokenApi, "/refresh-token") + return app.test_client() + + @pytest.fixture + def mock_token_pair(self): + """Create mock token pair object.""" + token_pair = MagicMock() + token_pair.access_token = "new_access_token" + token_pair.refresh_token = "new_refresh_token" + token_pair.csrf_token = "new_csrf_token" + return token_pair + + @patch("controllers.console.auth.login.extract_refresh_token") + @patch("controllers.console.auth.login.AccountService.refresh_token") + def test_successful_token_refresh(self, mock_refresh_token, mock_extract_token, app, mock_token_pair): + """ + Test successful token refresh flow. + + Verifies that: + - Refresh token is extracted from cookies + - New token pair is generated + - New tokens are set in response cookies + - Success response is returned + """ + # Arrange + mock_extract_token.return_value = "valid_refresh_token" + mock_refresh_token.return_value = mock_token_pair + + # Act + with app.test_request_context("/refresh-token", method="POST"): + refresh_api = RefreshTokenApi() + response = refresh_api.post() + + # Assert + mock_extract_token.assert_called_once() + mock_refresh_token.assert_called_once_with("valid_refresh_token") + assert response.json["result"] == "success" + + @patch("controllers.console.auth.login.extract_refresh_token") + def test_refresh_fails_without_token(self, mock_extract_token, app): + """ + Test token refresh failure when no refresh token provided. + + Verifies that: + - Error is returned when refresh token is missing + - 401 status code is returned + - Appropriate error message is provided + """ + # Arrange + mock_extract_token.return_value = None + + # Act + with app.test_request_context("/refresh-token", method="POST"): + refresh_api = RefreshTokenApi() + response, status_code = refresh_api.post() + + # Assert + assert status_code == 401 + assert response["result"] == "fail" + assert "No refresh token provided" in response["message"] + + @patch("controllers.console.auth.login.extract_refresh_token") + @patch("controllers.console.auth.login.AccountService.refresh_token") + def test_refresh_fails_with_invalid_token(self, mock_refresh_token, mock_extract_token, app): + """ + Test token refresh failure with invalid refresh token. + + Verifies that: + - Exception is caught when token is invalid + - 401 status code is returned + - Error message is included in response + """ + # Arrange + mock_extract_token.return_value = "invalid_refresh_token" + mock_refresh_token.side_effect = Exception("Invalid refresh token") + + # Act + with app.test_request_context("/refresh-token", method="POST"): + refresh_api = RefreshTokenApi() + response, status_code = refresh_api.post() + + # Assert + assert status_code == 401 + assert response["result"] == "fail" + assert "Invalid refresh token" in response["message"] + + @patch("controllers.console.auth.login.extract_refresh_token") + @patch("controllers.console.auth.login.AccountService.refresh_token") + def test_refresh_fails_with_expired_token(self, mock_refresh_token, mock_extract_token, app): + """ + Test token refresh failure with expired refresh token. + + Verifies that: + - Expired tokens are rejected + - 401 status code is returned + - Appropriate error handling + """ + # Arrange + mock_extract_token.return_value = "expired_refresh_token" + mock_refresh_token.side_effect = Exception("Refresh token expired") + + # Act + with app.test_request_context("/refresh-token", method="POST"): + refresh_api = RefreshTokenApi() + response, status_code = refresh_api.post() + + # Assert + assert status_code == 401 + assert response["result"] == "fail" + assert "expired" in response["message"].lower() + + @patch("controllers.console.auth.login.extract_refresh_token") + @patch("controllers.console.auth.login.AccountService.refresh_token") + def test_refresh_with_empty_token(self, mock_refresh_token, mock_extract_token, app): + """ + Test token refresh with empty string token. + + Verifies that: + - Empty string is treated as no token + - 401 status code is returned + """ + # Arrange + mock_extract_token.return_value = "" + + # Act + with app.test_request_context("/refresh-token", method="POST"): + refresh_api = RefreshTokenApi() + response, status_code = refresh_api.post() + + # Assert + assert status_code == 401 + assert response["result"] == "fail" + + @patch("controllers.console.auth.login.extract_refresh_token") + @patch("controllers.console.auth.login.AccountService.refresh_token") + def test_refresh_updates_all_tokens(self, mock_refresh_token, mock_extract_token, app, mock_token_pair): + """ + Test that token refresh updates all three tokens. + + Verifies that: + - Access token is updated + - Refresh token is rotated + - CSRF token is regenerated + """ + # Arrange + mock_extract_token.return_value = "valid_refresh_token" + mock_refresh_token.return_value = mock_token_pair + + # Act + with app.test_request_context("/refresh-token", method="POST"): + refresh_api = RefreshTokenApi() + response = refresh_api.post() + + # Assert + assert response.json["result"] == "success" + # Verify new token pair was generated + mock_refresh_token.assert_called_once_with("valid_refresh_token") + # In real implementation, cookies would be set with new values + assert mock_token_pair.access_token == "new_access_token" + assert mock_token_pair.refresh_token == "new_refresh_token" + assert mock_token_pair.csrf_token == "new_csrf_token" diff --git a/api/tests/unit_tests/controllers/console/billing/test_billing.py b/api/tests/unit_tests/controllers/console/billing/test_billing.py new file mode 100644 index 0000000000..eaa489d56b --- /dev/null +++ b/api/tests/unit_tests/controllers/console/billing/test_billing.py @@ -0,0 +1,253 @@ +import base64 +import json +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from werkzeug.exceptions import BadRequest + +from controllers.console.billing.billing import PartnerTenants +from models.account import Account + + +class TestPartnerTenants: + """Unit tests for PartnerTenants controller.""" + + @pytest.fixture + def app(self): + """Create Flask app for testing.""" + app = Flask(__name__) + app.config["TESTING"] = True + app.config["SECRET_KEY"] = "test-secret-key" + return app + + @pytest.fixture + def mock_account(self): + """Create a mock account.""" + account = MagicMock(spec=Account) + account.id = "account-123" + account.email = "test@example.com" + account.current_tenant_id = "tenant-456" + account.is_authenticated = True + return account + + @pytest.fixture + def mock_billing_service(self): + """Mock BillingService.""" + with patch("controllers.console.billing.billing.BillingService") as mock_service: + yield mock_service + + @pytest.fixture + def mock_decorators(self): + """Mock decorators to avoid database access.""" + with ( + patch("controllers.console.wraps.db") as mock_db, + patch("controllers.console.wraps.dify_config.EDITION", "CLOUD"), + patch("libs.login.dify_config.LOGIN_DISABLED", False), + patch("libs.login.check_csrf_token") as mock_csrf, + ): + mock_db.session.query.return_value.first.return_value = MagicMock() # Mock setup exists + mock_csrf.return_value = None + yield {"db": mock_db, "csrf": mock_csrf} + + def test_put_success(self, app, mock_account, mock_billing_service, mock_decorators): + """Test successful partner tenants bindings sync.""" + # Arrange + partner_key_encoded = base64.b64encode(b"partner-key-123").decode("utf-8") + click_id = "click-id-789" + expected_response = {"result": "success", "data": {"synced": True}} + + mock_billing_service.sync_partner_tenants_bindings.return_value = expected_response + + with app.test_request_context( + method="PUT", + json={"click_id": click_id}, + path=f"/billing/partners/{partner_key_encoded}/tenants", + ): + with ( + patch( + "controllers.console.billing.billing.current_account_with_tenant", + return_value=(mock_account, "tenant-456"), + ), + patch("libs.login._get_user", return_value=mock_account), + ): + resource = PartnerTenants() + result = resource.put(partner_key_encoded) + + # Assert + assert result == expected_response + mock_billing_service.sync_partner_tenants_bindings.assert_called_once_with( + mock_account.id, "partner-key-123", click_id + ) + + def test_put_invalid_partner_key_base64(self, app, mock_account, mock_billing_service, mock_decorators): + """Test that invalid base64 partner_key raises BadRequest.""" + # Arrange + invalid_partner_key = "invalid-base64-!@#$" + click_id = "click-id-789" + + with app.test_request_context( + method="PUT", + json={"click_id": click_id}, + path=f"/billing/partners/{invalid_partner_key}/tenants", + ): + with ( + patch( + "controllers.console.billing.billing.current_account_with_tenant", + return_value=(mock_account, "tenant-456"), + ), + patch("libs.login._get_user", return_value=mock_account), + ): + resource = PartnerTenants() + + # Act & Assert + with pytest.raises(BadRequest) as exc_info: + resource.put(invalid_partner_key) + assert "Invalid partner_key" in str(exc_info.value) + + def test_put_missing_click_id(self, app, mock_account, mock_billing_service, mock_decorators): + """Test that missing click_id raises BadRequest.""" + # Arrange + partner_key_encoded = base64.b64encode(b"partner-key-123").decode("utf-8") + + with app.test_request_context( + method="PUT", + json={}, + path=f"/billing/partners/{partner_key_encoded}/tenants", + ): + with ( + patch( + "controllers.console.billing.billing.current_account_with_tenant", + return_value=(mock_account, "tenant-456"), + ), + patch("libs.login._get_user", return_value=mock_account), + ): + resource = PartnerTenants() + + # Act & Assert + # reqparse will raise BadRequest for missing required field + with pytest.raises(BadRequest): + resource.put(partner_key_encoded) + + def test_put_billing_service_json_decode_error(self, app, mock_account, mock_billing_service, mock_decorators): + """Test handling of billing service JSON decode error. + + When billing service returns non-200 status code with invalid JSON response, + response.json() raises JSONDecodeError. This exception propagates to the controller + and should be handled by the global error handler (handle_general_exception), + which returns a 500 status code with error details. + + Note: In unit tests, when directly calling resource.put(), the exception is raised + directly. In actual Flask application, the error handler would catch it and return + a 500 response with JSON: {"code": "unknown", "message": "...", "status": 500} + """ + # Arrange + partner_key_encoded = base64.b64encode(b"partner-key-123").decode("utf-8") + click_id = "click-id-789" + + # Simulate JSON decode error when billing service returns invalid JSON + # This happens when billing service returns non-200 with empty/invalid response body + json_decode_error = json.JSONDecodeError("Expecting value", "", 0) + mock_billing_service.sync_partner_tenants_bindings.side_effect = json_decode_error + + with app.test_request_context( + method="PUT", + json={"click_id": click_id}, + path=f"/billing/partners/{partner_key_encoded}/tenants", + ): + with ( + patch( + "controllers.console.billing.billing.current_account_with_tenant", + return_value=(mock_account, "tenant-456"), + ), + patch("libs.login._get_user", return_value=mock_account), + ): + resource = PartnerTenants() + + # Act & Assert + # JSONDecodeError will be raised from the controller + # In actual Flask app, this would be caught by handle_general_exception + # which returns: {"code": "unknown", "message": str(e), "status": 500} + with pytest.raises(json.JSONDecodeError) as exc_info: + resource.put(partner_key_encoded) + + # Verify the exception is JSONDecodeError + assert isinstance(exc_info.value, json.JSONDecodeError) + assert "Expecting value" in str(exc_info.value) + + def test_put_empty_click_id(self, app, mock_account, mock_billing_service, mock_decorators): + """Test that empty click_id raises BadRequest.""" + # Arrange + partner_key_encoded = base64.b64encode(b"partner-key-123").decode("utf-8") + click_id = "" + + with app.test_request_context( + method="PUT", + json={"click_id": click_id}, + path=f"/billing/partners/{partner_key_encoded}/tenants", + ): + with ( + patch( + "controllers.console.billing.billing.current_account_with_tenant", + return_value=(mock_account, "tenant-456"), + ), + patch("libs.login._get_user", return_value=mock_account), + ): + resource = PartnerTenants() + + # Act & Assert + with pytest.raises(BadRequest) as exc_info: + resource.put(partner_key_encoded) + assert "Invalid partner information" in str(exc_info.value) + + def test_put_empty_partner_key_after_decode(self, app, mock_account, mock_billing_service, mock_decorators): + """Test that empty partner_key after decode raises BadRequest.""" + # Arrange + # Base64 encode an empty string + empty_partner_key_encoded = base64.b64encode(b"").decode("utf-8") + click_id = "click-id-789" + + with app.test_request_context( + method="PUT", + json={"click_id": click_id}, + path=f"/billing/partners/{empty_partner_key_encoded}/tenants", + ): + with ( + patch( + "controllers.console.billing.billing.current_account_with_tenant", + return_value=(mock_account, "tenant-456"), + ), + patch("libs.login._get_user", return_value=mock_account), + ): + resource = PartnerTenants() + + # Act & Assert + with pytest.raises(BadRequest) as exc_info: + resource.put(empty_partner_key_encoded) + assert "Invalid partner information" in str(exc_info.value) + + def test_put_empty_user_id(self, app, mock_account, mock_billing_service, mock_decorators): + """Test that empty user id raises BadRequest.""" + # Arrange + partner_key_encoded = base64.b64encode(b"partner-key-123").decode("utf-8") + click_id = "click-id-789" + mock_account.id = None # Empty user id + + with app.test_request_context( + method="PUT", + json={"click_id": click_id}, + path=f"/billing/partners/{partner_key_encoded}/tenants", + ): + with ( + patch( + "controllers.console.billing.billing.current_account_with_tenant", + return_value=(mock_account, "tenant-456"), + ), + patch("libs.login._get_user", return_value=mock_account), + ): + resource = PartnerTenants() + + # Act & Assert + with pytest.raises(BadRequest) as exc_info: + resource.put(partner_key_encoded) + assert "Invalid partner information" in str(exc_info.value) diff --git a/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_process_data.py b/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_process_data.py deleted file mode 100644 index abe09fb8a4..0000000000 --- a/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_process_data.py +++ /dev/null @@ -1,324 +0,0 @@ -""" -Unit tests for WorkflowResponseConverter focusing on process_data truncation functionality. -""" - -import uuid -from collections.abc import Mapping -from typing import Any -from unittest.mock import Mock - -import pytest - -from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter -from core.app.entities.app_invoke_entities import WorkflowAppGenerateEntity -from core.app.entities.queue_entities import ( - QueueNodeRetryEvent, - QueueNodeStartedEvent, - QueueNodeSucceededEvent, -) -from core.workflow.enums import NodeType -from core.workflow.system_variable import SystemVariable -from libs.datetime_utils import naive_utc_now -from models import Account - - -class TestWorkflowResponseConverterCenarios: - """Test process_data truncation in WorkflowResponseConverter.""" - - def create_mock_generate_entity(self) -> WorkflowAppGenerateEntity: - """Create a mock WorkflowAppGenerateEntity.""" - mock_entity = Mock(spec=WorkflowAppGenerateEntity) - mock_app_config = Mock() - mock_app_config.tenant_id = "test-tenant-id" - mock_entity.app_config = mock_app_config - mock_entity.inputs = {} - return mock_entity - - def create_workflow_response_converter(self) -> WorkflowResponseConverter: - """Create a WorkflowResponseConverter for testing.""" - - mock_entity = self.create_mock_generate_entity() - mock_user = Mock(spec=Account) - mock_user.id = "test-user-id" - mock_user.name = "Test User" - mock_user.email = "test@example.com" - - system_variables = SystemVariable(workflow_id="wf-id", workflow_execution_id="initial-run-id") - return WorkflowResponseConverter( - application_generate_entity=mock_entity, - user=mock_user, - system_variables=system_variables, - ) - - def create_node_started_event(self, *, node_execution_id: str | None = None) -> QueueNodeStartedEvent: - """Create a QueueNodeStartedEvent for testing.""" - return QueueNodeStartedEvent( - node_execution_id=node_execution_id or str(uuid.uuid4()), - node_id="test-node-id", - node_title="Test Node", - node_type=NodeType.CODE, - start_at=naive_utc_now(), - predecessor_node_id=None, - in_iteration_id=None, - in_loop_id=None, - provider_type="built-in", - provider_id="code", - ) - - def create_node_succeeded_event( - self, - *, - node_execution_id: str, - process_data: Mapping[str, Any] | None = None, - ) -> QueueNodeSucceededEvent: - """Create a QueueNodeSucceededEvent for testing.""" - return QueueNodeSucceededEvent( - node_id="test-node-id", - node_type=NodeType.CODE, - node_execution_id=node_execution_id, - start_at=naive_utc_now(), - in_iteration_id=None, - in_loop_id=None, - inputs={}, - process_data=process_data or {}, - outputs={}, - execution_metadata={}, - ) - - def create_node_retry_event( - self, - *, - node_execution_id: str, - process_data: Mapping[str, Any] | None = None, - ) -> QueueNodeRetryEvent: - """Create a QueueNodeRetryEvent for testing.""" - return QueueNodeRetryEvent( - inputs={"data": "inputs"}, - outputs={"data": "outputs"}, - process_data=process_data or {}, - error="oops", - retry_index=1, - node_id="test-node-id", - node_type=NodeType.CODE, - node_title="test code", - provider_type="built-in", - provider_id="code", - node_execution_id=node_execution_id, - start_at=naive_utc_now(), - in_iteration_id=None, - in_loop_id=None, - ) - - def test_workflow_node_finish_response_uses_truncated_process_data(self): - """Test that node finish response uses get_response_process_data().""" - converter = self.create_workflow_response_converter() - - original_data = {"large_field": "x" * 10000, "metadata": "info"} - truncated_data = {"large_field": "[TRUNCATED]", "metadata": "info"} - - converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id") - start_event = self.create_node_started_event() - converter.workflow_node_start_to_stream_response( - event=start_event, - task_id="test-task-id", - ) - - event = self.create_node_succeeded_event( - node_execution_id=start_event.node_execution_id, - process_data=original_data, - ) - - def fake_truncate(mapping): - if mapping == dict(original_data): - return truncated_data, True - return mapping, False - - converter._truncator.truncate_variable_mapping = fake_truncate # type: ignore[assignment] - - response = converter.workflow_node_finish_to_stream_response( - event=event, - task_id="test-task-id", - ) - - # Response should use truncated data, not original - assert response is not None - assert response.data.process_data == truncated_data - assert response.data.process_data != original_data - assert response.data.process_data_truncated is True - - def test_workflow_node_finish_response_without_truncation(self): - """Test node finish response when no truncation is applied.""" - converter = self.create_workflow_response_converter() - - original_data = {"small": "data"} - - converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id") - start_event = self.create_node_started_event() - converter.workflow_node_start_to_stream_response( - event=start_event, - task_id="test-task-id", - ) - - event = self.create_node_succeeded_event( - node_execution_id=start_event.node_execution_id, - process_data=original_data, - ) - - def fake_truncate(mapping): - return mapping, False - - converter._truncator.truncate_variable_mapping = fake_truncate # type: ignore[assignment] - - response = converter.workflow_node_finish_to_stream_response( - event=event, - task_id="test-task-id", - ) - - # Response should use original data - assert response is not None - assert response.data.process_data == original_data - assert response.data.process_data_truncated is False - - def test_workflow_node_finish_response_with_none_process_data(self): - """Test node finish response when process_data is None.""" - converter = self.create_workflow_response_converter() - - converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id") - start_event = self.create_node_started_event() - converter.workflow_node_start_to_stream_response( - event=start_event, - task_id="test-task-id", - ) - - event = self.create_node_succeeded_event( - node_execution_id=start_event.node_execution_id, - process_data=None, - ) - - def fake_truncate(mapping): - return mapping, False - - converter._truncator.truncate_variable_mapping = fake_truncate # type: ignore[assignment] - - response = converter.workflow_node_finish_to_stream_response( - event=event, - task_id="test-task-id", - ) - - # Response should normalize missing process_data to an empty mapping - assert response is not None - assert response.data.process_data == {} - assert response.data.process_data_truncated is False - - def test_workflow_node_retry_response_uses_truncated_process_data(self): - """Test that node retry response uses get_response_process_data().""" - converter = self.create_workflow_response_converter() - - original_data = {"large_field": "x" * 10000, "metadata": "info"} - truncated_data = {"large_field": "[TRUNCATED]", "metadata": "info"} - - converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id") - start_event = self.create_node_started_event() - converter.workflow_node_start_to_stream_response( - event=start_event, - task_id="test-task-id", - ) - - event = self.create_node_retry_event( - node_execution_id=start_event.node_execution_id, - process_data=original_data, - ) - - def fake_truncate(mapping): - if mapping == dict(original_data): - return truncated_data, True - return mapping, False - - converter._truncator.truncate_variable_mapping = fake_truncate # type: ignore[assignment] - - response = converter.workflow_node_retry_to_stream_response( - event=event, - task_id="test-task-id", - ) - - # Response should use truncated data, not original - assert response is not None - assert response.data.process_data == truncated_data - assert response.data.process_data != original_data - assert response.data.process_data_truncated is True - - def test_workflow_node_retry_response_without_truncation(self): - """Test node retry response when no truncation is applied.""" - converter = self.create_workflow_response_converter() - - original_data = {"small": "data"} - - converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id") - start_event = self.create_node_started_event() - converter.workflow_node_start_to_stream_response( - event=start_event, - task_id="test-task-id", - ) - - event = self.create_node_retry_event( - node_execution_id=start_event.node_execution_id, - process_data=original_data, - ) - - def fake_truncate(mapping): - return mapping, False - - converter._truncator.truncate_variable_mapping = fake_truncate # type: ignore[assignment] - - response = converter.workflow_node_retry_to_stream_response( - event=event, - task_id="test-task-id", - ) - - assert response is not None - assert response.data.process_data == original_data - assert response.data.process_data_truncated is False - - def test_iteration_and_loop_nodes_return_none(self): - """Test that iteration and loop nodes return None (no streaming events).""" - converter = self.create_workflow_response_converter() - - iteration_event = QueueNodeSucceededEvent( - node_id="iteration-node", - node_type=NodeType.ITERATION, - node_execution_id=str(uuid.uuid4()), - start_at=naive_utc_now(), - in_iteration_id=None, - in_loop_id=None, - inputs={}, - process_data={}, - outputs={}, - execution_metadata={}, - ) - - response = converter.workflow_node_finish_to_stream_response( - event=iteration_event, - task_id="test-task-id", - ) - assert response is None - - loop_event = iteration_event.model_copy(update={"node_type": NodeType.LOOP}) - response = converter.workflow_node_finish_to_stream_response( - event=loop_event, - task_id="test-task-id", - ) - assert response is None - - def test_finish_without_start_raises(self): - """Ensure finish responses require a prior workflow start.""" - converter = self.create_workflow_response_converter() - event = self.create_node_succeeded_event( - node_execution_id=str(uuid.uuid4()), - process_data={}, - ) - - with pytest.raises(ValueError): - converter.workflow_node_finish_to_stream_response( - event=event, - task_id="test-task-id", - ) diff --git a/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_truncation.py b/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_truncation.py new file mode 100644 index 0000000000..1c9f577a50 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_truncation.py @@ -0,0 +1,810 @@ +""" +Unit tests for WorkflowResponseConverter focusing on process_data truncation functionality. +""" + +import uuid +from collections.abc import Mapping +from dataclasses import dataclass +from typing import Any +from unittest.mock import Mock + +import pytest + +from core.app.app_config.entities import WorkflowUIBasedAppConfig +from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter +from core.app.entities.app_invoke_entities import InvokeFrom, WorkflowAppGenerateEntity +from core.app.entities.queue_entities import ( + QueueEvent, + QueueIterationStartEvent, + QueueLoopStartEvent, + QueueNodeExceptionEvent, + QueueNodeFailedEvent, + QueueNodeRetryEvent, + QueueNodeStartedEvent, + QueueNodeSucceededEvent, +) +from core.workflow.enums import NodeType +from core.workflow.system_variable import SystemVariable +from libs.datetime_utils import naive_utc_now +from models import Account +from models.model import AppMode + + +class TestWorkflowResponseConverter: + """Test truncation in WorkflowResponseConverter.""" + + def create_mock_generate_entity(self) -> WorkflowAppGenerateEntity: + """Create a mock WorkflowAppGenerateEntity.""" + mock_entity = Mock(spec=WorkflowAppGenerateEntity) + mock_app_config = Mock() + mock_app_config.tenant_id = "test-tenant-id" + mock_entity.invoke_from = InvokeFrom.WEB_APP + mock_entity.app_config = mock_app_config + mock_entity.inputs = {} + return mock_entity + + def create_workflow_response_converter(self) -> WorkflowResponseConverter: + """Create a WorkflowResponseConverter for testing.""" + + mock_entity = self.create_mock_generate_entity() + mock_user = Mock(spec=Account) + mock_user.id = "test-user-id" + mock_user.name = "Test User" + mock_user.email = "test@example.com" + + system_variables = SystemVariable(workflow_id="wf-id", workflow_execution_id="initial-run-id") + return WorkflowResponseConverter( + application_generate_entity=mock_entity, + user=mock_user, + system_variables=system_variables, + ) + + def create_node_started_event(self, *, node_execution_id: str | None = None) -> QueueNodeStartedEvent: + """Create a QueueNodeStartedEvent for testing.""" + return QueueNodeStartedEvent( + node_execution_id=node_execution_id or str(uuid.uuid4()), + node_id="test-node-id", + node_title="Test Node", + node_type=NodeType.CODE, + start_at=naive_utc_now(), + in_iteration_id=None, + in_loop_id=None, + provider_type="built-in", + provider_id="code", + ) + + def create_node_succeeded_event( + self, + *, + node_execution_id: str, + process_data: Mapping[str, Any] | None = None, + ) -> QueueNodeSucceededEvent: + """Create a QueueNodeSucceededEvent for testing.""" + return QueueNodeSucceededEvent( + node_id="test-node-id", + node_type=NodeType.CODE, + node_execution_id=node_execution_id, + start_at=naive_utc_now(), + in_iteration_id=None, + in_loop_id=None, + inputs={}, + process_data=process_data or {}, + outputs={}, + execution_metadata={}, + ) + + def create_node_retry_event( + self, + *, + node_execution_id: str, + process_data: Mapping[str, Any] | None = None, + ) -> QueueNodeRetryEvent: + """Create a QueueNodeRetryEvent for testing.""" + return QueueNodeRetryEvent( + inputs={"data": "inputs"}, + outputs={"data": "outputs"}, + process_data=process_data or {}, + error="oops", + retry_index=1, + node_id="test-node-id", + node_type=NodeType.CODE, + node_title="test code", + provider_type="built-in", + provider_id="code", + node_execution_id=node_execution_id, + start_at=naive_utc_now(), + in_iteration_id=None, + in_loop_id=None, + ) + + def test_workflow_node_finish_response_uses_truncated_process_data(self): + """Test that node finish response uses get_response_process_data().""" + converter = self.create_workflow_response_converter() + + original_data = {"large_field": "x" * 10000, "metadata": "info"} + truncated_data = {"large_field": "[TRUNCATED]", "metadata": "info"} + + converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id") + start_event = self.create_node_started_event() + converter.workflow_node_start_to_stream_response( + event=start_event, + task_id="test-task-id", + ) + + event = self.create_node_succeeded_event( + node_execution_id=start_event.node_execution_id, + process_data=original_data, + ) + + def fake_truncate(mapping): + if mapping == dict(original_data): + return truncated_data, True + return mapping, False + + converter._truncator.truncate_variable_mapping = fake_truncate # type: ignore[assignment] + + response = converter.workflow_node_finish_to_stream_response( + event=event, + task_id="test-task-id", + ) + + # Response should use truncated data, not original + assert response is not None + assert response.data.process_data == truncated_data + assert response.data.process_data != original_data + assert response.data.process_data_truncated is True + + def test_workflow_node_finish_response_without_truncation(self): + """Test node finish response when no truncation is applied.""" + converter = self.create_workflow_response_converter() + + original_data = {"small": "data"} + + converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id") + start_event = self.create_node_started_event() + converter.workflow_node_start_to_stream_response( + event=start_event, + task_id="test-task-id", + ) + + event = self.create_node_succeeded_event( + node_execution_id=start_event.node_execution_id, + process_data=original_data, + ) + + def fake_truncate(mapping): + return mapping, False + + converter._truncator.truncate_variable_mapping = fake_truncate # type: ignore[assignment] + + response = converter.workflow_node_finish_to_stream_response( + event=event, + task_id="test-task-id", + ) + + # Response should use original data + assert response is not None + assert response.data.process_data == original_data + assert response.data.process_data_truncated is False + + def test_workflow_node_finish_response_with_none_process_data(self): + """Test node finish response when process_data is None.""" + converter = self.create_workflow_response_converter() + + converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id") + start_event = self.create_node_started_event() + converter.workflow_node_start_to_stream_response( + event=start_event, + task_id="test-task-id", + ) + + event = self.create_node_succeeded_event( + node_execution_id=start_event.node_execution_id, + process_data=None, + ) + + def fake_truncate(mapping): + return mapping, False + + converter._truncator.truncate_variable_mapping = fake_truncate # type: ignore[assignment] + + response = converter.workflow_node_finish_to_stream_response( + event=event, + task_id="test-task-id", + ) + + # Response should normalize missing process_data to an empty mapping + assert response is not None + assert response.data.process_data == {} + assert response.data.process_data_truncated is False + + def test_workflow_node_retry_response_uses_truncated_process_data(self): + """Test that node retry response uses get_response_process_data().""" + converter = self.create_workflow_response_converter() + + original_data = {"large_field": "x" * 10000, "metadata": "info"} + truncated_data = {"large_field": "[TRUNCATED]", "metadata": "info"} + + converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id") + start_event = self.create_node_started_event() + converter.workflow_node_start_to_stream_response( + event=start_event, + task_id="test-task-id", + ) + + event = self.create_node_retry_event( + node_execution_id=start_event.node_execution_id, + process_data=original_data, + ) + + def fake_truncate(mapping): + if mapping == dict(original_data): + return truncated_data, True + return mapping, False + + converter._truncator.truncate_variable_mapping = fake_truncate # type: ignore[assignment] + + response = converter.workflow_node_retry_to_stream_response( + event=event, + task_id="test-task-id", + ) + + # Response should use truncated data, not original + assert response is not None + assert response.data.process_data == truncated_data + assert response.data.process_data != original_data + assert response.data.process_data_truncated is True + + def test_workflow_node_retry_response_without_truncation(self): + """Test node retry response when no truncation is applied.""" + converter = self.create_workflow_response_converter() + + original_data = {"small": "data"} + + converter.workflow_start_to_stream_response(task_id="bootstrap", workflow_run_id="run-id", workflow_id="wf-id") + start_event = self.create_node_started_event() + converter.workflow_node_start_to_stream_response( + event=start_event, + task_id="test-task-id", + ) + + event = self.create_node_retry_event( + node_execution_id=start_event.node_execution_id, + process_data=original_data, + ) + + def fake_truncate(mapping): + return mapping, False + + converter._truncator.truncate_variable_mapping = fake_truncate # type: ignore[assignment] + + response = converter.workflow_node_retry_to_stream_response( + event=event, + task_id="test-task-id", + ) + + assert response is not None + assert response.data.process_data == original_data + assert response.data.process_data_truncated is False + + def test_iteration_and_loop_nodes_return_none(self): + """Test that iteration and loop nodes return None (no streaming events).""" + converter = self.create_workflow_response_converter() + + iteration_event = QueueNodeSucceededEvent( + node_id="iteration-node", + node_type=NodeType.ITERATION, + node_execution_id=str(uuid.uuid4()), + start_at=naive_utc_now(), + in_iteration_id=None, + in_loop_id=None, + inputs={}, + process_data={}, + outputs={}, + execution_metadata={}, + ) + + response = converter.workflow_node_finish_to_stream_response( + event=iteration_event, + task_id="test-task-id", + ) + assert response is None + + loop_event = iteration_event.model_copy(update={"node_type": NodeType.LOOP}) + response = converter.workflow_node_finish_to_stream_response( + event=loop_event, + task_id="test-task-id", + ) + assert response is None + + def test_finish_without_start_raises(self): + """Ensure finish responses require a prior workflow start.""" + converter = self.create_workflow_response_converter() + event = self.create_node_succeeded_event( + node_execution_id=str(uuid.uuid4()), + process_data={}, + ) + + with pytest.raises(ValueError): + converter.workflow_node_finish_to_stream_response( + event=event, + task_id="test-task-id", + ) + + +@dataclass +class TestCase: + """Test case data for table-driven tests.""" + + name: str + invoke_from: InvokeFrom + expected_truncation_enabled: bool + description: str + + +class TestWorkflowResponseConverterServiceApiTruncation: + """Test class for Service API truncation functionality in WorkflowResponseConverter.""" + + def create_test_app_generate_entity(self, invoke_from: InvokeFrom) -> WorkflowAppGenerateEntity: + """Create a test WorkflowAppGenerateEntity with specified invoke_from.""" + # Create a minimal WorkflowUIBasedAppConfig for testing + app_config = WorkflowUIBasedAppConfig( + tenant_id="test_tenant", + app_id="test_app", + app_mode=AppMode.WORKFLOW, + workflow_id="test_workflow_id", + ) + + entity = WorkflowAppGenerateEntity( + task_id="test_task_id", + app_id="test_app_id", + app_config=app_config, + tenant_id="test_tenant", + app_mode=AppMode.WORKFLOW, + invoke_from=invoke_from, + inputs={"test_input": "test_value"}, + user_id="test_user_id", + stream=True, + files=[], + workflow_execution_id="test_workflow_exec_id", + ) + return entity + + def create_test_user(self) -> Account: + """Create a test user account.""" + account = Account( + name="Test User", + email="test@example.com", + ) + # Manually set the ID for testing purposes + account.id = "test_user_id" + return account + + def create_test_system_variables(self) -> SystemVariable: + """Create test system variables.""" + return SystemVariable() + + def create_test_converter(self, invoke_from: InvokeFrom) -> WorkflowResponseConverter: + """Create WorkflowResponseConverter with specified invoke_from.""" + entity = self.create_test_app_generate_entity(invoke_from) + user = self.create_test_user() + system_variables = self.create_test_system_variables() + + converter = WorkflowResponseConverter( + application_generate_entity=entity, + user=user, + system_variables=system_variables, + ) + # ensure `workflow_run_id` is set. + converter.workflow_start_to_stream_response( + task_id="test-task-id", + workflow_run_id="test-workflow-run-id", + workflow_id="test-workflow-id", + ) + return converter + + @pytest.mark.parametrize( + "test_case", + [ + TestCase( + name="service_api_truncation_disabled", + invoke_from=InvokeFrom.SERVICE_API, + expected_truncation_enabled=False, + description="Service API calls should have truncation disabled", + ), + TestCase( + name="web_app_truncation_enabled", + invoke_from=InvokeFrom.WEB_APP, + expected_truncation_enabled=True, + description="Web app calls should have truncation enabled", + ), + TestCase( + name="debugger_truncation_enabled", + invoke_from=InvokeFrom.DEBUGGER, + expected_truncation_enabled=True, + description="Debugger calls should have truncation enabled", + ), + TestCase( + name="explore_truncation_enabled", + invoke_from=InvokeFrom.EXPLORE, + expected_truncation_enabled=True, + description="Explore calls should have truncation enabled", + ), + TestCase( + name="published_truncation_enabled", + invoke_from=InvokeFrom.PUBLISHED, + expected_truncation_enabled=True, + description="Published app calls should have truncation enabled", + ), + ], + ids=lambda x: x.name, + ) + def test_truncator_selection_based_on_invoke_from(self, test_case: TestCase): + """Test that the correct truncator is selected based on invoke_from.""" + converter = self.create_test_converter(test_case.invoke_from) + + # Test truncation behavior instead of checking private attribute + + # Create a test event with large data + large_value = {"key": ["x"] * 2000} # Large data that would be truncated + + event = QueueNodeSucceededEvent( + node_execution_id="test_node_exec_id", + node_id="test_node", + node_type=NodeType.LLM, + start_at=naive_utc_now(), + inputs=large_value, + process_data=large_value, + outputs=large_value, + error=None, + execution_metadata=None, + in_iteration_id=None, + in_loop_id=None, + ) + + response = converter.workflow_node_finish_to_stream_response( + event=event, + task_id="test_task", + ) + + # Verify response is not None + assert response is not None + + # Verify truncation behavior matches expectations + if test_case.expected_truncation_enabled: + # Truncation should be enabled for non-service-api calls + assert response.data.inputs_truncated + assert response.data.process_data_truncated + assert response.data.outputs_truncated + else: + # SERVICE_API should not truncate + assert not response.data.inputs_truncated + assert not response.data.process_data_truncated + assert not response.data.outputs_truncated + + def test_service_api_truncator_no_op_mapping(self): + """Test that Service API truncator doesn't truncate variable mappings.""" + converter = self.create_test_converter(InvokeFrom.SERVICE_API) + + # Create a test event with large data + large_value: dict[str, Any] = { + "large_string": "x" * 10000, # Large string + "large_list": list(range(2000)), # Large array + "nested_data": {"deep_nested": {"very_deep": {"value": "x" * 5000}}}, + } + + event = QueueNodeSucceededEvent( + node_execution_id="test_node_exec_id", + node_id="test_node", + node_type=NodeType.LLM, + start_at=naive_utc_now(), + inputs=large_value, + process_data=large_value, + outputs=large_value, + error=None, + execution_metadata=None, + in_iteration_id=None, + in_loop_id=None, + ) + + response = converter.workflow_node_finish_to_stream_response( + event=event, + task_id="test_task", + ) + + # Verify response is not None + data = response.data + assert data.inputs == large_value + assert data.process_data == large_value + assert data.outputs == large_value + # Service API should not truncate + assert data.inputs_truncated is False + assert data.process_data_truncated is False + assert data.outputs_truncated is False + + def test_web_app_truncator_works_normally(self): + """Test that web app truncator still works normally.""" + converter = self.create_test_converter(InvokeFrom.WEB_APP) + + # Create a test event with large data + large_value = { + "large_string": "x" * 10000, # Large string + "large_list": list(range(2000)), # Large array + } + + event = QueueNodeSucceededEvent( + node_execution_id="test_node_exec_id", + node_id="test_node", + node_type=NodeType.LLM, + start_at=naive_utc_now(), + inputs=large_value, + process_data=large_value, + outputs=large_value, + error=None, + execution_metadata=None, + in_iteration_id=None, + in_loop_id=None, + ) + + response = converter.workflow_node_finish_to_stream_response( + event=event, + task_id="test_task", + ) + + # Verify response is not None + assert response is not None + + # Web app should truncate + data = response.data + assert data.inputs != large_value + assert data.process_data != large_value + assert data.outputs != large_value + # The exact behavior depends on VariableTruncator implementation + # Just verify that truncation flags are present + assert data.inputs_truncated is True + assert data.process_data_truncated is True + assert data.outputs_truncated is True + + @staticmethod + def _create_event_by_type( + type_: QueueEvent, inputs: Mapping[str, Any], process_data: Mapping[str, Any], outputs: Mapping[str, Any] + ) -> QueueNodeSucceededEvent | QueueNodeFailedEvent | QueueNodeExceptionEvent: + if type_ == QueueEvent.NODE_SUCCEEDED: + return QueueNodeSucceededEvent( + node_execution_id="test_node_exec_id", + node_id="test_node", + node_type=NodeType.LLM, + start_at=naive_utc_now(), + inputs=inputs, + process_data=process_data, + outputs=outputs, + error=None, + execution_metadata=None, + in_iteration_id=None, + in_loop_id=None, + ) + elif type_ == QueueEvent.NODE_FAILED: + return QueueNodeFailedEvent( + node_execution_id="test_node_exec_id", + node_id="test_node", + node_type=NodeType.LLM, + start_at=naive_utc_now(), + inputs=inputs, + process_data=process_data, + outputs=outputs, + error="oops", + execution_metadata=None, + in_iteration_id=None, + in_loop_id=None, + ) + elif type_ == QueueEvent.NODE_EXCEPTION: + return QueueNodeExceptionEvent( + node_execution_id="test_node_exec_id", + node_id="test_node", + node_type=NodeType.LLM, + start_at=naive_utc_now(), + inputs=inputs, + process_data=process_data, + outputs=outputs, + error="oops", + execution_metadata=None, + in_iteration_id=None, + in_loop_id=None, + ) + else: + raise Exception("unknown type.") + + @pytest.mark.parametrize( + "event_type", + [ + QueueEvent.NODE_SUCCEEDED, + QueueEvent.NODE_FAILED, + QueueEvent.NODE_EXCEPTION, + ], + ) + def test_service_api_node_finish_event_no_truncation(self, event_type: QueueEvent): + """Test that Service API doesn't truncate node finish events.""" + converter = self.create_test_converter(InvokeFrom.SERVICE_API) + # Create test event with large data + large_inputs = {"input1": "x" * 5000, "input2": list(range(2000))} + large_process_data = {"process1": "y" * 5000, "process2": {"nested": ["z"] * 2000}} + large_outputs = {"output1": "result" * 1000, "output2": list(range(2000))} + + event = TestWorkflowResponseConverterServiceApiTruncation._create_event_by_type( + event_type, large_inputs, large_process_data, large_outputs + ) + + response = converter.workflow_node_finish_to_stream_response( + event=event, + task_id="test_task", + ) + + # Verify response is not None + assert response is not None + + # Verify response contains full data (not truncated) + assert response.data.inputs == large_inputs + assert response.data.process_data == large_process_data + assert response.data.outputs == large_outputs + assert not response.data.inputs_truncated + assert not response.data.process_data_truncated + assert not response.data.outputs_truncated + + def test_service_api_node_retry_event_no_truncation(self): + """Test that Service API doesn't truncate node retry events.""" + converter = self.create_test_converter(InvokeFrom.SERVICE_API) + + # Create test event with large data + large_inputs = {"retry_input": "x" * 5000} + large_process_data = {"retry_process": "y" * 5000} + large_outputs = {"retry_output": "z" * 5000} + + # First, we need to store a snapshot by simulating a start event + start_event = QueueNodeStartedEvent( + node_execution_id="test_node_exec_id", + node_id="test_node", + node_type=NodeType.LLM, + node_title="Test Node", + node_run_index=1, + start_at=naive_utc_now(), + in_iteration_id=None, + in_loop_id=None, + agent_strategy=None, + provider_type="plugin", + provider_id="test/test_plugin", + ) + converter.workflow_node_start_to_stream_response(event=start_event, task_id="test_task") + + # Now create retry event + event = QueueNodeRetryEvent( + node_execution_id="test_node_exec_id", + node_id="test_node", + node_type=NodeType.LLM, + node_title="Test Node", + node_run_index=1, + start_at=naive_utc_now(), + inputs=large_inputs, + process_data=large_process_data, + outputs=large_outputs, + error="Retry error", + execution_metadata=None, + in_iteration_id=None, + in_loop_id=None, + retry_index=1, + provider_type="plugin", + provider_id="test/test_plugin", + ) + + response = converter.workflow_node_retry_to_stream_response( + event=event, + task_id="test_task", + ) + + # Verify response is not None + assert response is not None + + # Verify response contains full data (not truncated) + assert response.data.inputs == large_inputs + assert response.data.process_data == large_process_data + assert response.data.outputs == large_outputs + assert not response.data.inputs_truncated + assert not response.data.process_data_truncated + assert not response.data.outputs_truncated + + def test_service_api_iteration_events_no_truncation(self): + """Test that Service API doesn't truncate iteration events.""" + converter = self.create_test_converter(InvokeFrom.SERVICE_API) + + # Test iteration start event + large_value = {"iteration_input": ["x"] * 2000} + + start_event = QueueIterationStartEvent( + node_execution_id="test_iter_exec_id", + node_id="test_iteration", + node_type=NodeType.ITERATION, + node_title="Test Iteration", + node_run_index=0, + start_at=naive_utc_now(), + inputs=large_value, + metadata={}, + ) + + response = converter.workflow_iteration_start_to_stream_response( + task_id="test_task", + workflow_execution_id="test_workflow_exec_id", + event=start_event, + ) + + assert response is not None + assert response.data.inputs == large_value + assert not response.data.inputs_truncated + + def test_service_api_loop_events_no_truncation(self): + """Test that Service API doesn't truncate loop events.""" + converter = self.create_test_converter(InvokeFrom.SERVICE_API) + + # Test loop start event + large_inputs = {"loop_input": ["x"] * 2000} + + start_event = QueueLoopStartEvent( + node_execution_id="test_loop_exec_id", + node_id="test_loop", + node_type=NodeType.LOOP, + node_title="Test Loop", + start_at=naive_utc_now(), + inputs=large_inputs, + metadata={}, + node_run_index=0, + ) + + response = converter.workflow_loop_start_to_stream_response( + task_id="test_task", + workflow_execution_id="test_workflow_exec_id", + event=start_event, + ) + + assert response is not None + assert response.data.inputs == large_inputs + assert not response.data.inputs_truncated + + def test_web_app_node_finish_event_truncation_works(self): + """Test that web app still truncates node finish events.""" + converter = self.create_test_converter(InvokeFrom.WEB_APP) + + # Create test event with large data that should be truncated + large_inputs = {"input1": ["x"] * 2000} + large_process_data = {"process1": ["y"] * 2000} + large_outputs = {"output1": ["z"] * 2000} + + event = QueueNodeSucceededEvent( + node_execution_id="test_node_exec_id", + node_id="test_node", + node_type=NodeType.LLM, + start_at=naive_utc_now(), + inputs=large_inputs, + process_data=large_process_data, + outputs=large_outputs, + error=None, + execution_metadata=None, + in_iteration_id=None, + in_loop_id=None, + ) + + response = converter.workflow_node_finish_to_stream_response( + event=event, + task_id="test_task", + ) + + # Verify response is not None + assert response is not None + + # Verify response contains truncated data + # The exact behavior depends on VariableTruncator implementation + # Just verify truncation flags are set correctly (may or may not be truncated depending on size) + # At minimum, the truncation mechanism should work + assert isinstance(response.data.inputs, dict) + assert response.data.inputs_truncated + assert isinstance(response.data.process_data, dict) + assert response.data.process_data_truncated + assert isinstance(response.data.outputs, dict) + assert response.data.outputs_truncated diff --git a/api/tests/unit_tests/core/app/apps/test_workflow_app_generator.py b/api/tests/unit_tests/core/app/apps/test_workflow_app_generator.py new file mode 100644 index 0000000000..83ac3a5591 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/test_workflow_app_generator.py @@ -0,0 +1,19 @@ +from core.app.apps.workflow.app_generator import SKIP_PREPARE_USER_INPUTS_KEY, WorkflowAppGenerator + + +def test_should_prepare_user_inputs_defaults_to_true(): + args = {"inputs": {}} + + assert WorkflowAppGenerator()._should_prepare_user_inputs(args) + + +def test_should_prepare_user_inputs_skips_when_flag_truthy(): + args = {"inputs": {}, SKIP_PREPARE_USER_INPUTS_KEY: True} + + assert not WorkflowAppGenerator()._should_prepare_user_inputs(args) + + +def test_should_prepare_user_inputs_keeps_validation_when_flag_false(): + args = {"inputs": {}, SKIP_PREPARE_USER_INPUTS_KEY: False} + + assert WorkflowAppGenerator()._should_prepare_user_inputs(args) diff --git a/api/tests/unit_tests/core/app/layers/test_pause_state_persist_layer.py b/api/tests/unit_tests/core/app/layers/test_pause_state_persist_layer.py new file mode 100644 index 0000000000..807f5e0fa5 --- /dev/null +++ b/api/tests/unit_tests/core/app/layers/test_pause_state_persist_layer.py @@ -0,0 +1,410 @@ +import json +from time import time +from unittest.mock import Mock + +import pytest + +from core.app.app_config.entities import WorkflowUIBasedAppConfig +from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom, WorkflowAppGenerateEntity +from core.app.layers.pause_state_persist_layer import ( + PauseStatePersistenceLayer, + WorkflowResumptionContext, + _AdvancedChatAppGenerateEntityWrapper, + _WorkflowGenerateEntityWrapper, +) +from core.variables.segments import Segment +from core.workflow.entities.pause_reason import SchedulingPause +from core.workflow.graph_engine.entities.commands import GraphEngineCommand +from core.workflow.graph_events.graph import ( + GraphRunFailedEvent, + GraphRunPausedEvent, + GraphRunStartedEvent, + GraphRunSucceededEvent, +) +from core.workflow.runtime.graph_runtime_state_protocol import ReadOnlyVariablePool +from models.model import AppMode +from repositories.factory import DifyAPIRepositoryFactory + + +class TestDataFactory: + """Factory helpers for constructing graph events used in tests.""" + + @staticmethod + def create_graph_run_paused_event(outputs: dict[str, object] | None = None) -> GraphRunPausedEvent: + return GraphRunPausedEvent(reason=SchedulingPause(message="test pause"), outputs=outputs or {}) + + @staticmethod + def create_graph_run_started_event() -> GraphRunStartedEvent: + return GraphRunStartedEvent() + + @staticmethod + def create_graph_run_succeeded_event(outputs: dict[str, object] | None = None) -> GraphRunSucceededEvent: + return GraphRunSucceededEvent(outputs=outputs or {}) + + @staticmethod + def create_graph_run_failed_event( + error: str = "Test error", + exceptions_count: int = 1, + ) -> GraphRunFailedEvent: + return GraphRunFailedEvent(error=error, exceptions_count=exceptions_count) + + +class MockSystemVariableReadOnlyView: + """Minimal read-only system variable view for testing.""" + + def __init__(self, workflow_execution_id: str | None = None) -> None: + self._workflow_execution_id = workflow_execution_id + + @property + def workflow_execution_id(self) -> str | None: + return self._workflow_execution_id + + +class MockReadOnlyVariablePool: + """Mock implementation of ReadOnlyVariablePool for testing.""" + + def __init__(self, variables: dict[tuple[str, str], object] | None = None): + self._variables = variables or {} + + def get(self, node_id: str, variable_key: str) -> Segment | None: + value = self._variables.get((node_id, variable_key)) + if value is None: + return None + mock_segment = Mock(spec=Segment) + mock_segment.value = value + return mock_segment + + def get_all_by_node(self, node_id: str) -> dict[str, object]: + return {key: value for (nid, key), value in self._variables.items() if nid == node_id} + + def get_by_prefix(self, prefix: str) -> dict[str, object]: + return {f"{nid}.{key}": value for (nid, key), value in self._variables.items() if nid.startswith(prefix)} + + +class MockReadOnlyGraphRuntimeState: + """Mock implementation of ReadOnlyGraphRuntimeState for testing.""" + + def __init__( + self, + start_at: float | None = None, + total_tokens: int = 0, + node_run_steps: int = 0, + ready_queue_size: int = 0, + exceptions_count: int = 0, + outputs: dict[str, object] | None = None, + variables: dict[tuple[str, str], object] | None = None, + workflow_execution_id: str | None = None, + ): + self._start_at = start_at or time() + self._total_tokens = total_tokens + self._node_run_steps = node_run_steps + self._ready_queue_size = ready_queue_size + self._exceptions_count = exceptions_count + self._outputs = outputs or {} + self._variable_pool = MockReadOnlyVariablePool(variables) + self._system_variable = MockSystemVariableReadOnlyView(workflow_execution_id) + + @property + def system_variable(self) -> MockSystemVariableReadOnlyView: + return self._system_variable + + @property + def variable_pool(self) -> ReadOnlyVariablePool: + return self._variable_pool + + @property + def start_at(self) -> float: + return self._start_at + + @property + def total_tokens(self) -> int: + return self._total_tokens + + @property + def node_run_steps(self) -> int: + return self._node_run_steps + + @property + def ready_queue_size(self) -> int: + return self._ready_queue_size + + @property + def exceptions_count(self) -> int: + return self._exceptions_count + + @property + def outputs(self) -> dict[str, object]: + return self._outputs.copy() + + @property + def llm_usage(self): + mock_usage = Mock() + mock_usage.prompt_tokens = 10 + mock_usage.completion_tokens = 20 + mock_usage.total_tokens = 30 + return mock_usage + + def get_output(self, key: str, default: object = None) -> object: + return self._outputs.get(key, default) + + def dumps(self) -> str: + return json.dumps( + { + "start_at": self._start_at, + "total_tokens": self._total_tokens, + "node_run_steps": self._node_run_steps, + "ready_queue_size": self._ready_queue_size, + "exceptions_count": self._exceptions_count, + "outputs": self._outputs, + "variables": {f"{k[0]}.{k[1]}": v for k, v in self._variable_pool._variables.items()}, + "workflow_execution_id": self._system_variable.workflow_execution_id, + } + ) + + +class MockCommandChannel: + """Mock implementation of CommandChannel for testing.""" + + def __init__(self): + self._commands: list[GraphEngineCommand] = [] + + def fetch_commands(self) -> list[GraphEngineCommand]: + return self._commands.copy() + + def send_command(self, command: GraphEngineCommand) -> None: + self._commands.append(command) + + +class TestPauseStatePersistenceLayer: + """Unit tests for PauseStatePersistenceLayer.""" + + @staticmethod + def _create_generate_entity(workflow_execution_id: str = "run-123") -> WorkflowAppGenerateEntity: + app_config = WorkflowUIBasedAppConfig( + tenant_id="tenant-123", + app_id="app-123", + app_mode=AppMode.WORKFLOW, + workflow_id="workflow-123", + ) + return WorkflowAppGenerateEntity( + task_id="task-123", + app_config=app_config, + inputs={}, + files=[], + user_id="user-123", + stream=False, + invoke_from=InvokeFrom.DEBUGGER, + workflow_execution_id=workflow_execution_id, + ) + + def test_init_with_dependency_injection(self): + session_factory = Mock(name="session_factory") + state_owner_user_id = "user-123" + + layer = PauseStatePersistenceLayer( + session_factory=session_factory, + state_owner_user_id=state_owner_user_id, + generate_entity=self._create_generate_entity(), + ) + + assert layer._session_maker is session_factory + assert layer._state_owner_user_id == state_owner_user_id + assert not hasattr(layer, "graph_runtime_state") + assert not hasattr(layer, "command_channel") + + def test_initialize_sets_dependencies(self): + session_factory = Mock(name="session_factory") + layer = PauseStatePersistenceLayer( + session_factory=session_factory, + state_owner_user_id="owner", + generate_entity=self._create_generate_entity(), + ) + + graph_runtime_state = MockReadOnlyGraphRuntimeState() + command_channel = MockCommandChannel() + + layer.initialize(graph_runtime_state, command_channel) + + assert layer.graph_runtime_state is graph_runtime_state + assert layer.command_channel is command_channel + + def test_on_event_with_graph_run_paused_event(self, monkeypatch: pytest.MonkeyPatch): + session_factory = Mock(name="session_factory") + generate_entity = self._create_generate_entity(workflow_execution_id="run-123") + layer = PauseStatePersistenceLayer( + session_factory=session_factory, + state_owner_user_id="owner-123", + generate_entity=generate_entity, + ) + + mock_repo = Mock() + mock_factory = Mock(return_value=mock_repo) + monkeypatch.setattr(DifyAPIRepositoryFactory, "create_api_workflow_run_repository", mock_factory) + + graph_runtime_state = MockReadOnlyGraphRuntimeState( + outputs={"result": "test_output"}, + total_tokens=100, + workflow_execution_id="run-123", + ) + command_channel = MockCommandChannel() + layer.initialize(graph_runtime_state, command_channel) + + event = TestDataFactory.create_graph_run_paused_event(outputs={"intermediate": "result"}) + expected_state = graph_runtime_state.dumps() + + layer.on_event(event) + + mock_factory.assert_called_once_with(session_factory) + mock_repo.create_workflow_pause.assert_called_once_with( + workflow_run_id="run-123", + state_owner_user_id="owner-123", + state=mock_repo.create_workflow_pause.call_args.kwargs["state"], + ) + serialized_state = mock_repo.create_workflow_pause.call_args.kwargs["state"] + resumption_context = WorkflowResumptionContext.loads(serialized_state) + assert resumption_context.serialized_graph_runtime_state == expected_state + assert resumption_context.get_generate_entity().model_dump() == generate_entity.model_dump() + + def test_on_event_ignores_non_paused_events(self, monkeypatch: pytest.MonkeyPatch): + session_factory = Mock(name="session_factory") + layer = PauseStatePersistenceLayer( + session_factory=session_factory, + state_owner_user_id="owner-123", + generate_entity=self._create_generate_entity(), + ) + + mock_repo = Mock() + mock_factory = Mock(return_value=mock_repo) + monkeypatch.setattr(DifyAPIRepositoryFactory, "create_api_workflow_run_repository", mock_factory) + + graph_runtime_state = MockReadOnlyGraphRuntimeState() + command_channel = MockCommandChannel() + layer.initialize(graph_runtime_state, command_channel) + + events = [ + TestDataFactory.create_graph_run_started_event(), + TestDataFactory.create_graph_run_succeeded_event(), + TestDataFactory.create_graph_run_failed_event(), + ] + + for event in events: + layer.on_event(event) + + mock_factory.assert_not_called() + mock_repo.create_workflow_pause.assert_not_called() + + def test_on_event_raises_attribute_error_when_graph_runtime_state_is_none(self): + session_factory = Mock(name="session_factory") + layer = PauseStatePersistenceLayer( + session_factory=session_factory, + state_owner_user_id="owner-123", + generate_entity=self._create_generate_entity(), + ) + + event = TestDataFactory.create_graph_run_paused_event() + + with pytest.raises(AttributeError): + layer.on_event(event) + + def test_on_event_asserts_when_workflow_execution_id_missing(self, monkeypatch: pytest.MonkeyPatch): + session_factory = Mock(name="session_factory") + layer = PauseStatePersistenceLayer( + session_factory=session_factory, + state_owner_user_id="owner-123", + generate_entity=self._create_generate_entity(), + ) + + mock_repo = Mock() + mock_factory = Mock(return_value=mock_repo) + monkeypatch.setattr(DifyAPIRepositoryFactory, "create_api_workflow_run_repository", mock_factory) + + graph_runtime_state = MockReadOnlyGraphRuntimeState(workflow_execution_id=None) + command_channel = MockCommandChannel() + layer.initialize(graph_runtime_state, command_channel) + + event = TestDataFactory.create_graph_run_paused_event() + + with pytest.raises(AssertionError): + layer.on_event(event) + + mock_factory.assert_not_called() + mock_repo.create_workflow_pause.assert_not_called() + + +def _build_workflow_generate_entity_for_roundtrip() -> WorkflowResumptionContext: + """Create a WorkflowAppGenerateEntity with realistic data for WorkflowResumptionContext tests.""" + app_config = WorkflowUIBasedAppConfig( + tenant_id="tenant-roundtrip", + app_id="app-roundtrip", + app_mode=AppMode.WORKFLOW, + workflow_id="workflow-roundtrip", + ) + serialized_state = json.dumps({"state": "workflow"}) + + return WorkflowResumptionContext( + serialized_graph_runtime_state=serialized_state, + generate_entity=_WorkflowGenerateEntityWrapper( + entity=WorkflowAppGenerateEntity( + task_id="workflow-task", + app_config=app_config, + inputs={"input_key": "input_value"}, + files=[], + user_id="user-roundtrip", + stream=False, + invoke_from=InvokeFrom.DEBUGGER, + workflow_execution_id="workflow-exec-roundtrip", + ) + ), + ) + + +def _build_advanced_chat_generate_entity_for_roundtrip() -> WorkflowResumptionContext: + """Create an AdvancedChatAppGenerateEntity with realistic data for WorkflowResumptionContext tests.""" + app_config = WorkflowUIBasedAppConfig( + tenant_id="tenant-advanced", + app_id="app-advanced", + app_mode=AppMode.ADVANCED_CHAT, + workflow_id="workflow-advanced", + ) + serialized_state = json.dumps({"state": "workflow"}) + + return WorkflowResumptionContext( + serialized_graph_runtime_state=serialized_state, + generate_entity=_AdvancedChatAppGenerateEntityWrapper( + entity=AdvancedChatAppGenerateEntity( + task_id="advanced-task", + app_config=app_config, + inputs={"topic": "roundtrip"}, + files=[], + user_id="advanced-user", + stream=False, + invoke_from=InvokeFrom.DEBUGGER, + workflow_run_id="advanced-run-id", + query="Explain serialization behavior", + ) + ), + ) + + +@pytest.mark.parametrize( + "state", + [ + pytest.param( + _build_advanced_chat_generate_entity_for_roundtrip(), + id="advanced_chat", + ), + pytest.param( + _build_workflow_generate_entity_for_roundtrip(), + id="workflow", + ), + ], +) +def test_workflow_resumption_context_dumps_loads_roundtrip(state: WorkflowResumptionContext): + """WorkflowResumptionContext roundtrip preserves workflow generate entity metadata.""" + dumped = state.dumps() + loaded = WorkflowResumptionContext.loads(dumped) + + assert loaded == state + assert loaded.serialized_graph_runtime_state == state.serialized_graph_runtime_state + restored_entity = loaded.get_generate_entity() + assert isinstance(restored_entity, type(state.generate_entity.entity)) diff --git a/api/tests/unit_tests/core/file/test_models.py b/api/tests/unit_tests/core/file/test_models.py index 3ada2087c6..f55063ee1a 100644 --- a/api/tests/unit_tests/core/file/test_models.py +++ b/api/tests/unit_tests/core/file/test_models.py @@ -23,3 +23,32 @@ def test_file(): assert file.extension == ".png" assert file.mime_type == "image/png" assert file.size == 67 + + +def test_file_model_validate_with_legacy_fields(): + """Test `File` model can handle data containing compatibility fields.""" + data = { + "id": "test-file", + "tenant_id": "test-tenant-id", + "type": "image", + "transfer_method": "tool_file", + "related_id": "test-related-id", + "filename": "image.png", + "extension": ".png", + "mime_type": "image/png", + "size": 67, + "storage_key": "test-storage-key", + "url": "https://example.com/image.png", + # Extra legacy fields + "tool_file_id": "tool-file-123", + "upload_file_id": "upload-file-456", + "datasource_file_id": "datasource-file-789", + } + + # Should be able to create `File` object without raising an exception + file = File.model_validate(data) + + # The File object does not have tool_file_id, upload_file_id, or datasource_file_id as attributes. + # Instead, check it does not expose unrecognized legacy fields (should raise on getattr). + for legacy_field in ("tool_file_id", "upload_file_id", "datasource_file_id"): + assert not hasattr(file, legacy_field) diff --git a/api/tests/unit_tests/core/helper/code_executor/__init__.py b/api/tests/unit_tests/core/helper/code_executor/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/helper/code_executor/javascript/__init__.py b/api/tests/unit_tests/core/helper/code_executor/javascript/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/helper/code_executor/javascript/test_javascript_transformer.py b/api/tests/unit_tests/core/helper/code_executor/javascript/test_javascript_transformer.py new file mode 100644 index 0000000000..03f37756d7 --- /dev/null +++ b/api/tests/unit_tests/core/helper/code_executor/javascript/test_javascript_transformer.py @@ -0,0 +1,12 @@ +from core.helper.code_executor.javascript.javascript_code_provider import JavascriptCodeProvider +from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer + + +def test_get_runner_script(): + code = JavascriptCodeProvider.get_default_code() + inputs = {"arg1": "hello, ", "arg2": "world!"} + script = NodeJsTemplateTransformer.assemble_runner_script(code, inputs) + script_lines = script.splitlines() + code_lines = code.splitlines() + # Check that the first lines of script are exactly the same as code + assert script_lines[: len(code_lines)] == code_lines diff --git a/api/tests/unit_tests/core/helper/code_executor/python3/__init__.py b/api/tests/unit_tests/core/helper/code_executor/python3/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/helper/code_executor/python3/test_python3_transformer.py b/api/tests/unit_tests/core/helper/code_executor/python3/test_python3_transformer.py new file mode 100644 index 0000000000..1166cb8892 --- /dev/null +++ b/api/tests/unit_tests/core/helper/code_executor/python3/test_python3_transformer.py @@ -0,0 +1,12 @@ +from core.helper.code_executor.python3.python3_code_provider import Python3CodeProvider +from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer + + +def test_get_runner_script(): + code = Python3CodeProvider.get_default_code() + inputs = {"arg1": "hello, ", "arg2": "world!"} + script = Python3TemplateTransformer.assemble_runner_script(code, inputs) + script_lines = script.splitlines() + code_lines = code.splitlines() + # Check that the first lines of script are exactly the same as code + assert script_lines[: len(code_lines)] == code_lines diff --git a/api/tests/unit_tests/core/mcp/__init__.py b/api/tests/unit_tests/core/mcp/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/mcp/auth/__init__.py b/api/tests/unit_tests/core/mcp/auth/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/mcp/auth/test_auth_flow.py b/api/tests/unit_tests/core/mcp/auth/test_auth_flow.py new file mode 100644 index 0000000000..60f37b6de0 --- /dev/null +++ b/api/tests/unit_tests/core/mcp/auth/test_auth_flow.py @@ -0,0 +1,766 @@ +"""Unit tests for MCP OAuth authentication flow.""" + +from unittest.mock import Mock, patch + +import pytest + +from core.entities.mcp_provider import MCPProviderEntity +from core.mcp.auth.auth_flow import ( + OAUTH_STATE_EXPIRY_SECONDS, + OAUTH_STATE_REDIS_KEY_PREFIX, + OAuthCallbackState, + _create_secure_redis_state, + _retrieve_redis_state, + auth, + check_support_resource_discovery, + discover_oauth_metadata, + exchange_authorization, + generate_pkce_challenge, + handle_callback, + refresh_authorization, + register_client, + start_authorization, +) +from core.mcp.entities import AuthActionType, AuthResult +from core.mcp.types import ( + LATEST_PROTOCOL_VERSION, + OAuthClientInformation, + OAuthClientInformationFull, + OAuthClientMetadata, + OAuthMetadata, + OAuthTokens, + ProtectedResourceMetadata, +) + + +class TestPKCEGeneration: + """Test PKCE challenge generation.""" + + def test_generate_pkce_challenge(self): + """Test PKCE challenge and verifier generation.""" + code_verifier, code_challenge = generate_pkce_challenge() + + # Verify format - should be URL-safe base64 without padding + assert "=" not in code_verifier + assert "+" not in code_verifier + assert "/" not in code_verifier + assert "=" not in code_challenge + assert "+" not in code_challenge + assert "/" not in code_challenge + + # Verify length + assert len(code_verifier) > 40 # Should be around 54 characters + assert len(code_challenge) > 40 # Should be around 43 characters + + def test_generate_pkce_challenge_uniqueness(self): + """Test that PKCE generation produces unique values.""" + results = set() + for _ in range(10): + code_verifier, code_challenge = generate_pkce_challenge() + results.add((code_verifier, code_challenge)) + + # All should be unique + assert len(results) == 10 + + +class TestRedisStateManagement: + """Test Redis state management functions.""" + + @patch("core.mcp.auth.auth_flow.redis_client") + def test_create_secure_redis_state(self, mock_redis): + """Test creating secure Redis state.""" + state_data = OAuthCallbackState( + provider_id="test-provider", + tenant_id="test-tenant", + server_url="https://example.com", + metadata=None, + client_information=OAuthClientInformation(client_id="test-client"), + code_verifier="test-verifier", + redirect_uri="https://redirect.example.com", + ) + + state_key = _create_secure_redis_state(state_data) + + # Verify state key format + assert len(state_key) > 20 # Should be a secure random token + + # Verify Redis call + mock_redis.setex.assert_called_once() + call_args = mock_redis.setex.call_args + assert call_args[0][0].startswith(OAUTH_STATE_REDIS_KEY_PREFIX) + assert call_args[0][1] == OAUTH_STATE_EXPIRY_SECONDS + assert state_data.model_dump_json() in call_args[0][2] + + @patch("core.mcp.auth.auth_flow.redis_client") + def test_retrieve_redis_state_success(self, mock_redis): + """Test retrieving state from Redis.""" + state_data = OAuthCallbackState( + provider_id="test-provider", + tenant_id="test-tenant", + server_url="https://example.com", + metadata=None, + client_information=OAuthClientInformation(client_id="test-client"), + code_verifier="test-verifier", + redirect_uri="https://redirect.example.com", + ) + mock_redis.get.return_value = state_data.model_dump_json() + + result = _retrieve_redis_state("test-state-key") + + # Verify result + assert result.provider_id == "test-provider" + assert result.tenant_id == "test-tenant" + assert result.server_url == "https://example.com" + + # Verify Redis calls + mock_redis.get.assert_called_once_with(f"{OAUTH_STATE_REDIS_KEY_PREFIX}test-state-key") + mock_redis.delete.assert_called_once_with(f"{OAUTH_STATE_REDIS_KEY_PREFIX}test-state-key") + + @patch("core.mcp.auth.auth_flow.redis_client") + def test_retrieve_redis_state_not_found(self, mock_redis): + """Test retrieving non-existent state from Redis.""" + mock_redis.get.return_value = None + + with pytest.raises(ValueError) as exc_info: + _retrieve_redis_state("nonexistent-key") + + assert "State parameter has expired or does not exist" in str(exc_info.value) + + @patch("core.mcp.auth.auth_flow.redis_client") + def test_retrieve_redis_state_invalid_json(self, mock_redis): + """Test retrieving invalid JSON state from Redis.""" + mock_redis.get.return_value = '{"invalid": json}' + + with pytest.raises(ValueError) as exc_info: + _retrieve_redis_state("test-key") + + assert "Invalid state parameter" in str(exc_info.value) + # State should still be deleted + mock_redis.delete.assert_called_once() + + +class TestOAuthDiscovery: + """Test OAuth discovery functions.""" + + @patch("core.helper.ssrf_proxy.get") + def test_check_support_resource_discovery_success(self, mock_get): + """Test successful resource discovery check.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"authorization_server_url": ["https://auth.example.com"]} + mock_get.return_value = mock_response + + supported, auth_url = check_support_resource_discovery("https://api.example.com/endpoint") + + assert supported is True + assert auth_url == "https://auth.example.com" + mock_get.assert_called_once_with( + "https://api.example.com/.well-known/oauth-protected-resource", + headers={"MCP-Protocol-Version": LATEST_PROTOCOL_VERSION, "User-Agent": "Dify"}, + ) + + @patch("core.helper.ssrf_proxy.get") + def test_check_support_resource_discovery_not_supported(self, mock_get): + """Test resource discovery not supported.""" + mock_response = Mock() + mock_response.status_code = 404 + mock_get.return_value = mock_response + + supported, auth_url = check_support_resource_discovery("https://api.example.com") + + assert supported is False + assert auth_url == "" + + @patch("core.helper.ssrf_proxy.get") + def test_check_support_resource_discovery_with_query_fragment(self, mock_get): + """Test resource discovery with query and fragment.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"authorization_server_url": ["https://auth.example.com"]} + mock_get.return_value = mock_response + + supported, auth_url = check_support_resource_discovery("https://api.example.com/path?query=1#fragment") + + assert supported is True + assert auth_url == "https://auth.example.com" + mock_get.assert_called_once_with( + "https://api.example.com/.well-known/oauth-protected-resource?query=1#fragment", + headers={"MCP-Protocol-Version": LATEST_PROTOCOL_VERSION, "User-Agent": "Dify"}, + ) + + def test_discover_oauth_metadata_with_resource_discovery(self): + """Test OAuth metadata discovery with resource discovery support.""" + with patch("core.mcp.auth.auth_flow.discover_protected_resource_metadata") as mock_prm: + with patch("core.mcp.auth.auth_flow.discover_oauth_authorization_server_metadata") as mock_asm: + # Mock protected resource metadata with auth server URL + mock_prm.return_value = ProtectedResourceMetadata( + resource="https://api.example.com", + authorization_servers=["https://auth.example.com"], + ) + + # Mock OAuth authorization server metadata + mock_asm.return_value = OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + response_types_supported=["code"], + ) + + oauth_metadata, prm, scope = discover_oauth_metadata("https://api.example.com") + + assert oauth_metadata is not None + assert oauth_metadata.authorization_endpoint == "https://auth.example.com/authorize" + assert oauth_metadata.token_endpoint == "https://auth.example.com/token" + assert prm is not None + assert prm.authorization_servers == ["https://auth.example.com"] + + # Verify the discovery functions were called + mock_prm.assert_called_once() + mock_asm.assert_called_once() + + def test_discover_oauth_metadata_without_resource_discovery(self): + """Test OAuth metadata discovery without resource discovery.""" + with patch("core.mcp.auth.auth_flow.discover_protected_resource_metadata") as mock_prm: + with patch("core.mcp.auth.auth_flow.discover_oauth_authorization_server_metadata") as mock_asm: + # Mock no protected resource metadata + mock_prm.return_value = None + + # Mock OAuth authorization server metadata + mock_asm.return_value = OAuthMetadata( + authorization_endpoint="https://api.example.com/oauth/authorize", + token_endpoint="https://api.example.com/oauth/token", + response_types_supported=["code"], + ) + + oauth_metadata, prm, scope = discover_oauth_metadata("https://api.example.com") + + assert oauth_metadata is not None + assert oauth_metadata.authorization_endpoint == "https://api.example.com/oauth/authorize" + assert prm is None + + # Verify the discovery functions were called + mock_prm.assert_called_once() + mock_asm.assert_called_once() + + @patch("core.helper.ssrf_proxy.get") + def test_discover_oauth_metadata_not_found(self, mock_get): + """Test OAuth metadata discovery when not found.""" + with patch("core.mcp.auth.auth_flow.check_support_resource_discovery") as mock_check: + mock_check.return_value = (False, "") + + mock_response = Mock() + mock_response.status_code = 404 + mock_get.return_value = mock_response + + oauth_metadata, prm, scope = discover_oauth_metadata("https://api.example.com") + + assert oauth_metadata is None + + +class TestAuthorizationFlow: + """Test authorization flow functions.""" + + @patch("core.mcp.auth.auth_flow._create_secure_redis_state") + def test_start_authorization_with_metadata(self, mock_create_state): + """Test starting authorization with metadata.""" + mock_create_state.return_value = "secure-state-key" + + metadata = OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + response_types_supported=["code"], + code_challenge_methods_supported=["S256"], + ) + client_info = OAuthClientInformation(client_id="test-client-id") + + auth_url, code_verifier = start_authorization( + "https://api.example.com", + metadata, + client_info, + "https://redirect.example.com", + "provider-id", + "tenant-id", + ) + + # Verify URL format + assert auth_url.startswith("https://auth.example.com/authorize?") + assert "response_type=code" in auth_url + assert "client_id=test-client-id" in auth_url + assert "code_challenge=" in auth_url + assert "code_challenge_method=S256" in auth_url + assert "redirect_uri=https%3A%2F%2Fredirect.example.com" in auth_url + assert "state=secure-state-key" in auth_url + + # Verify code verifier + assert len(code_verifier) > 40 + + # Verify state was stored + mock_create_state.assert_called_once() + state_data = mock_create_state.call_args[0][0] + assert state_data.provider_id == "provider-id" + assert state_data.tenant_id == "tenant-id" + assert state_data.code_verifier == code_verifier + + def test_start_authorization_without_metadata(self): + """Test starting authorization without metadata.""" + with patch("core.mcp.auth.auth_flow._create_secure_redis_state") as mock_create_state: + mock_create_state.return_value = "secure-state-key" + + client_info = OAuthClientInformation(client_id="test-client-id") + + auth_url, code_verifier = start_authorization( + "https://api.example.com", + None, + client_info, + "https://redirect.example.com", + "provider-id", + "tenant-id", + ) + + # Should use default authorization endpoint + assert auth_url.startswith("https://api.example.com/authorize?") + + def test_start_authorization_invalid_metadata(self): + """Test starting authorization with invalid metadata.""" + metadata = OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + response_types_supported=["token"], # No "code" support + code_challenge_methods_supported=["plain"], # No "S256" support + ) + client_info = OAuthClientInformation(client_id="test-client-id") + + with pytest.raises(ValueError) as exc_info: + start_authorization( + "https://api.example.com", + metadata, + client_info, + "https://redirect.example.com", + "provider-id", + "tenant-id", + ) + + assert "does not support response type code" in str(exc_info.value) + + @patch("core.helper.ssrf_proxy.post") + def test_exchange_authorization_success(self, mock_post): + """Test successful authorization code exchange.""" + mock_response = Mock() + mock_response.is_success = True + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "access_token": "new-access-token", + "token_type": "Bearer", + "expires_in": 3600, + "refresh_token": "new-refresh-token", + } + mock_post.return_value = mock_response + + metadata = OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + response_types_supported=["code"], + grant_types_supported=["authorization_code"], + ) + client_info = OAuthClientInformation(client_id="test-client-id", client_secret="test-secret") + + tokens = exchange_authorization( + "https://api.example.com", + metadata, + client_info, + "auth-code-123", + "code-verifier-xyz", + "https://redirect.example.com", + ) + + assert tokens.access_token == "new-access-token" + assert tokens.token_type == "Bearer" + assert tokens.expires_in == 3600 + assert tokens.refresh_token == "new-refresh-token" + + # Verify request + mock_post.assert_called_once_with( + "https://auth.example.com/token", + data={ + "grant_type": "authorization_code", + "client_id": "test-client-id", + "client_secret": "test-secret", + "code": "auth-code-123", + "code_verifier": "code-verifier-xyz", + "redirect_uri": "https://redirect.example.com", + }, + ) + + @patch("core.helper.ssrf_proxy.post") + def test_exchange_authorization_failure(self, mock_post): + """Test failed authorization code exchange.""" + mock_response = Mock() + mock_response.is_success = False + mock_response.status_code = 400 + mock_post.return_value = mock_response + + client_info = OAuthClientInformation(client_id="test-client-id") + + with pytest.raises(ValueError) as exc_info: + exchange_authorization( + "https://api.example.com", + None, + client_info, + "invalid-code", + "code-verifier", + "https://redirect.example.com", + ) + + assert "Token exchange failed: HTTP 400" in str(exc_info.value) + + @patch("core.helper.ssrf_proxy.post") + def test_refresh_authorization_success(self, mock_post): + """Test successful token refresh.""" + mock_response = Mock() + mock_response.is_success = True + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "access_token": "refreshed-access-token", + "token_type": "Bearer", + "expires_in": 3600, + "refresh_token": "new-refresh-token", + } + mock_post.return_value = mock_response + + metadata = OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + response_types_supported=["code"], + grant_types_supported=["refresh_token"], + ) + client_info = OAuthClientInformation(client_id="test-client-id") + + tokens = refresh_authorization("https://api.example.com", metadata, client_info, "old-refresh-token") + + assert tokens.access_token == "refreshed-access-token" + assert tokens.refresh_token == "new-refresh-token" + + # Verify request + mock_post.assert_called_once_with( + "https://auth.example.com/token", + data={ + "grant_type": "refresh_token", + "client_id": "test-client-id", + "refresh_token": "old-refresh-token", + }, + ) + + @patch("core.helper.ssrf_proxy.post") + def test_register_client_success(self, mock_post): + """Test successful client registration.""" + mock_response = Mock() + mock_response.is_success = True + mock_response.json.return_value = { + "client_id": "new-client-id", + "client_secret": "new-client-secret", + "client_name": "Dify", + "redirect_uris": ["https://redirect.example.com"], + } + mock_post.return_value = mock_response + + metadata = OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + registration_endpoint="https://auth.example.com/register", + response_types_supported=["code"], + ) + client_metadata = OAuthClientMetadata( + client_name="Dify", + redirect_uris=["https://redirect.example.com"], + grant_types=["authorization_code"], + response_types=["code"], + ) + + client_info = register_client("https://api.example.com", metadata, client_metadata) + + assert isinstance(client_info, OAuthClientInformationFull) + assert client_info.client_id == "new-client-id" + assert client_info.client_secret == "new-client-secret" + + # Verify request + mock_post.assert_called_once_with( + "https://auth.example.com/register", + json=client_metadata.model_dump(), + headers={"Content-Type": "application/json"}, + ) + + def test_register_client_no_endpoint(self): + """Test client registration when no endpoint available.""" + metadata = OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + registration_endpoint=None, + response_types_supported=["code"], + ) + client_metadata = OAuthClientMetadata(client_name="Dify", redirect_uris=["https://redirect.example.com"]) + + with pytest.raises(ValueError) as exc_info: + register_client("https://api.example.com", metadata, client_metadata) + + assert "does not support dynamic client registration" in str(exc_info.value) + + +class TestCallbackHandling: + """Test OAuth callback handling.""" + + @patch("core.mcp.auth.auth_flow._retrieve_redis_state") + @patch("core.mcp.auth.auth_flow.exchange_authorization") + def test_handle_callback_success(self, mock_exchange, mock_retrieve_state): + """Test successful callback handling.""" + # Setup state + state_data = OAuthCallbackState( + provider_id="test-provider", + tenant_id="test-tenant", + server_url="https://api.example.com", + metadata=None, + client_information=OAuthClientInformation(client_id="test-client"), + code_verifier="test-verifier", + redirect_uri="https://redirect.example.com", + ) + mock_retrieve_state.return_value = state_data + + # Setup token exchange + tokens = OAuthTokens( + access_token="new-token", + token_type="Bearer", + expires_in=3600, + ) + mock_exchange.return_value = tokens + + # Setup service + mock_service = Mock() + + state_result, tokens_result = handle_callback("state-key", "auth-code") + + assert state_result == state_data + assert tokens_result == tokens + + # Verify calls + mock_retrieve_state.assert_called_once_with("state-key") + mock_exchange.assert_called_once_with( + "https://api.example.com", + None, + state_data.client_information, + "auth-code", + "test-verifier", + "https://redirect.example.com", + ) + # Note: handle_callback no longer saves tokens directly, it just returns them + # The caller (e.g., controller) is responsible for saving via execute_auth_actions + + +class TestAuthOrchestration: + """Test the main auth orchestration function.""" + + @pytest.fixture + def mock_provider(self): + """Create a mock provider entity.""" + provider = Mock(spec=MCPProviderEntity) + provider.id = "provider-id" + provider.tenant_id = "tenant-id" + provider.decrypt_server_url.return_value = "https://api.example.com" + provider.client_metadata = OAuthClientMetadata( + client_name="Dify", + redirect_uris=["https://redirect.example.com"], + ) + provider.redirect_url = "https://redirect.example.com" + provider.retrieve_client_information.return_value = None + provider.retrieve_tokens.return_value = None + return provider + + @pytest.fixture + def mock_service(self): + """Create a mock MCP service.""" + return Mock() + + @patch("core.mcp.auth.auth_flow.discover_oauth_metadata") + @patch("core.mcp.auth.auth_flow.register_client") + @patch("core.mcp.auth.auth_flow.start_authorization") + def test_auth_new_registration(self, mock_start_auth, mock_register, mock_discover, mock_provider, mock_service): + """Test auth flow for new client registration.""" + # Setup + mock_discover.return_value = ( + OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + response_types_supported=["code"], + grant_types_supported=["authorization_code"], + ), + None, + None, + ) + mock_register.return_value = OAuthClientInformationFull( + client_id="new-client-id", + client_name="Dify", + redirect_uris=["https://redirect.example.com"], + ) + mock_start_auth.return_value = ("https://auth.example.com/authorize?...", "code-verifier") + + result = auth(mock_provider) + + # auth() now returns AuthResult + assert isinstance(result, AuthResult) + assert result.response == {"authorization_url": "https://auth.example.com/authorize?..."} + + # Verify that the result contains the correct actions + assert len(result.actions) == 2 + # Check for SAVE_CLIENT_INFO action + client_info_action = next(a for a in result.actions if a.action_type == AuthActionType.SAVE_CLIENT_INFO) + assert client_info_action.data == {"client_information": mock_register.return_value.model_dump()} + assert client_info_action.provider_id == "provider-id" + assert client_info_action.tenant_id == "tenant-id" + + # Check for SAVE_CODE_VERIFIER action + verifier_action = next(a for a in result.actions if a.action_type == AuthActionType.SAVE_CODE_VERIFIER) + assert verifier_action.data == {"code_verifier": "code-verifier"} + assert verifier_action.provider_id == "provider-id" + assert verifier_action.tenant_id == "tenant-id" + + # Verify calls + mock_register.assert_called_once() + + @patch("core.mcp.auth.auth_flow.discover_oauth_metadata") + @patch("core.mcp.auth.auth_flow._retrieve_redis_state") + @patch("core.mcp.auth.auth_flow.exchange_authorization") + def test_auth_exchange_code(self, mock_exchange, mock_retrieve_state, mock_discover, mock_provider, mock_service): + """Test auth flow for exchanging authorization code.""" + # Setup metadata discovery + mock_discover.return_value = ( + OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + response_types_supported=["code"], + grant_types_supported=["authorization_code"], + ), + None, + None, + ) + + # Setup existing client + mock_provider.retrieve_client_information.return_value = OAuthClientInformation(client_id="existing-client") + + # Setup state retrieval + state_data = OAuthCallbackState( + provider_id="provider-id", + tenant_id="tenant-id", + server_url="https://api.example.com", + metadata=None, + client_information=OAuthClientInformation(client_id="existing-client"), + code_verifier="test-verifier", + redirect_uri="https://redirect.example.com", + ) + mock_retrieve_state.return_value = state_data + + # Setup token exchange + tokens = OAuthTokens(access_token="new-token", token_type="Bearer", expires_in=3600) + mock_exchange.return_value = tokens + + result = auth(mock_provider, authorization_code="auth-code", state_param="state-key") + + # auth() now returns AuthResult, not a dict + assert isinstance(result, AuthResult) + assert result.response == {"result": "success"} + + # Verify that the result contains the correct action + assert len(result.actions) == 1 + assert result.actions[0].action_type == AuthActionType.SAVE_TOKENS + assert result.actions[0].data == tokens.model_dump() + assert result.actions[0].provider_id == "provider-id" + assert result.actions[0].tenant_id == "tenant-id" + + @patch("core.mcp.auth.auth_flow.discover_oauth_metadata") + def test_auth_exchange_code_without_state(self, mock_discover, mock_provider, mock_service): + """Test auth flow fails when exchanging code without state.""" + # Setup metadata discovery + mock_discover.return_value = ( + OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + response_types_supported=["code"], + grant_types_supported=["authorization_code"], + ), + None, + None, + ) + + mock_provider.retrieve_client_information.return_value = OAuthClientInformation(client_id="existing-client") + + with pytest.raises(ValueError) as exc_info: + auth(mock_provider, authorization_code="auth-code") + + assert "State parameter is required" in str(exc_info.value) + + @patch("core.mcp.auth.auth_flow.refresh_authorization") + def test_auth_refresh_token(self, mock_refresh, mock_provider, mock_service): + """Test auth flow for refreshing tokens.""" + # Setup existing client and tokens + mock_provider.retrieve_client_information.return_value = OAuthClientInformation(client_id="existing-client") + mock_provider.retrieve_tokens.return_value = OAuthTokens( + access_token="old-token", + token_type="Bearer", + expires_in=0, + refresh_token="refresh-token", + ) + + # Setup refresh + new_tokens = OAuthTokens( + access_token="refreshed-token", + token_type="Bearer", + expires_in=3600, + refresh_token="new-refresh-token", + ) + mock_refresh.return_value = new_tokens + + with patch("core.mcp.auth.auth_flow.discover_oauth_metadata") as mock_discover: + mock_discover.return_value = ( + OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + response_types_supported=["code"], + grant_types_supported=["authorization_code"], + ), + None, + None, + ) + + result = auth(mock_provider) + + # auth() now returns AuthResult + assert isinstance(result, AuthResult) + assert result.response == {"result": "success"} + + # Verify that the result contains the correct action + assert len(result.actions) == 1 + assert result.actions[0].action_type == AuthActionType.SAVE_TOKENS + assert result.actions[0].data == new_tokens.model_dump() + assert result.actions[0].provider_id == "provider-id" + assert result.actions[0].tenant_id == "tenant-id" + + # Verify refresh was called + mock_refresh.assert_called_once() + + @patch("core.mcp.auth.auth_flow.discover_oauth_metadata") + def test_auth_registration_fails_with_code(self, mock_discover, mock_provider, mock_service): + """Test auth fails when no client info exists but code is provided.""" + # Setup metadata discovery + mock_discover.return_value = ( + OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + response_types_supported=["code"], + grant_types_supported=["authorization_code"], + ), + None, + None, + ) + + mock_provider.retrieve_client_information.return_value = None + + with pytest.raises(ValueError) as exc_info: + auth(mock_provider, authorization_code="auth-code") + + assert "Existing OAuth client information is required" in str(exc_info.value) diff --git a/api/tests/unit_tests/core/mcp/client/test_session.py b/api/tests/unit_tests/core/mcp/client/test_session.py index 08d5b7d21c..8b24c8ce75 100644 --- a/api/tests/unit_tests/core/mcp/client/test_session.py +++ b/api/tests/unit_tests/core/mcp/client/test_session.py @@ -395,9 +395,6 @@ def test_client_capabilities_default(): # Assert default capabilities assert received_capabilities is not None - assert received_capabilities.sampling is not None - assert received_capabilities.roots is not None - assert received_capabilities.roots.listChanged is True def test_client_capabilities_with_custom_callbacks(): diff --git a/api/tests/unit_tests/core/mcp/client/test_sse.py b/api/tests/unit_tests/core/mcp/client/test_sse.py index aadd366762..490a647025 100644 --- a/api/tests/unit_tests/core/mcp/client/test_sse.py +++ b/api/tests/unit_tests/core/mcp/client/test_sse.py @@ -139,7 +139,9 @@ def test_sse_client_error_handling(): with patch("core.mcp.client.sse_client.create_ssrf_proxy_mcp_http_client") as mock_client_factory: with patch("core.mcp.client.sse_client.ssrf_proxy_sse_connect") as mock_sse_connect: # Mock 401 HTTP error - mock_error = httpx.HTTPStatusError("Unauthorized", request=Mock(), response=Mock(status_code=401)) + mock_response = Mock(status_code=401) + mock_response.headers = {"WWW-Authenticate": 'Bearer realm="example"'} + mock_error = httpx.HTTPStatusError("Unauthorized", request=Mock(), response=mock_response) mock_sse_connect.side_effect = mock_error with pytest.raises(MCPAuthError): @@ -150,7 +152,9 @@ def test_sse_client_error_handling(): with patch("core.mcp.client.sse_client.create_ssrf_proxy_mcp_http_client") as mock_client_factory: with patch("core.mcp.client.sse_client.ssrf_proxy_sse_connect") as mock_sse_connect: # Mock other HTTP error - mock_error = httpx.HTTPStatusError("Server Error", request=Mock(), response=Mock(status_code=500)) + mock_response = Mock(status_code=500) + mock_response.headers = {} + mock_error = httpx.HTTPStatusError("Server Error", request=Mock(), response=mock_response) mock_sse_connect.side_effect = mock_error with pytest.raises(MCPConnectionError): diff --git a/api/tests/unit_tests/core/mcp/test_auth_client_inheritance.py b/api/tests/unit_tests/core/mcp/test_auth_client_inheritance.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/mcp/test_entities.py b/api/tests/unit_tests/core/mcp/test_entities.py new file mode 100644 index 0000000000..3fede55916 --- /dev/null +++ b/api/tests/unit_tests/core/mcp/test_entities.py @@ -0,0 +1,239 @@ +"""Unit tests for MCP entities module.""" + +from unittest.mock import Mock + +from core.mcp.entities import ( + SUPPORTED_PROTOCOL_VERSIONS, + LifespanContextT, + RequestContext, + SessionT, +) +from core.mcp.session.base_session import BaseSession +from core.mcp.types import LATEST_PROTOCOL_VERSION, RequestParams + + +class TestProtocolVersions: + """Test protocol version constants.""" + + def test_supported_protocol_versions(self): + """Test supported protocol versions list.""" + assert isinstance(SUPPORTED_PROTOCOL_VERSIONS, list) + assert len(SUPPORTED_PROTOCOL_VERSIONS) >= 3 + assert "2024-11-05" in SUPPORTED_PROTOCOL_VERSIONS + assert "2025-03-26" in SUPPORTED_PROTOCOL_VERSIONS + assert LATEST_PROTOCOL_VERSION in SUPPORTED_PROTOCOL_VERSIONS + + def test_latest_protocol_version_is_supported(self): + """Test that latest protocol version is in supported versions.""" + assert LATEST_PROTOCOL_VERSION in SUPPORTED_PROTOCOL_VERSIONS + + +class TestRequestContext: + """Test RequestContext dataclass.""" + + def test_request_context_creation(self): + """Test creating a RequestContext instance.""" + mock_session = Mock(spec=BaseSession) + mock_lifespan = {"key": "value"} + mock_meta = RequestParams.Meta(progressToken="test-token") + + context = RequestContext( + request_id="test-request-123", + meta=mock_meta, + session=mock_session, + lifespan_context=mock_lifespan, + ) + + assert context.request_id == "test-request-123" + assert context.meta == mock_meta + assert context.session == mock_session + assert context.lifespan_context == mock_lifespan + + def test_request_context_with_none_meta(self): + """Test creating RequestContext with None meta.""" + mock_session = Mock(spec=BaseSession) + + context = RequestContext( + request_id=42, # Can be int or string + meta=None, + session=mock_session, + lifespan_context=None, + ) + + assert context.request_id == 42 + assert context.meta is None + assert context.session == mock_session + assert context.lifespan_context is None + + def test_request_context_attributes(self): + """Test RequestContext attributes are accessible.""" + mock_session = Mock(spec=BaseSession) + + context = RequestContext( + request_id="test-123", + meta=None, + session=mock_session, + lifespan_context=None, + ) + + # Verify attributes are accessible + assert hasattr(context, "request_id") + assert hasattr(context, "meta") + assert hasattr(context, "session") + assert hasattr(context, "lifespan_context") + + # Verify values + assert context.request_id == "test-123" + assert context.meta is None + assert context.session == mock_session + assert context.lifespan_context is None + + def test_request_context_generic_typing(self): + """Test RequestContext with different generic types.""" + # Create a mock session with specific type + mock_session = Mock(spec=BaseSession) + + # Create context with string lifespan context + context_str = RequestContext[BaseSession, str]( + request_id="test-1", + meta=None, + session=mock_session, + lifespan_context="string-context", + ) + assert isinstance(context_str.lifespan_context, str) + + # Create context with dict lifespan context + context_dict = RequestContext[BaseSession, dict]( + request_id="test-2", + meta=None, + session=mock_session, + lifespan_context={"key": "value"}, + ) + assert isinstance(context_dict.lifespan_context, dict) + + # Create context with custom object lifespan context + class CustomLifespan: + def __init__(self, data): + self.data = data + + custom_lifespan = CustomLifespan("test-data") + context_custom = RequestContext[BaseSession, CustomLifespan]( + request_id="test-3", + meta=None, + session=mock_session, + lifespan_context=custom_lifespan, + ) + assert isinstance(context_custom.lifespan_context, CustomLifespan) + assert context_custom.lifespan_context.data == "test-data" + + def test_request_context_with_progress_meta(self): + """Test RequestContext with progress metadata.""" + mock_session = Mock(spec=BaseSession) + progress_meta = RequestParams.Meta(progressToken="progress-123") + + context = RequestContext( + request_id="req-456", + meta=progress_meta, + session=mock_session, + lifespan_context=None, + ) + + assert context.meta is not None + assert context.meta.progressToken == "progress-123" + + def test_request_context_equality(self): + """Test RequestContext equality comparison.""" + mock_session1 = Mock(spec=BaseSession) + mock_session2 = Mock(spec=BaseSession) + + context1 = RequestContext( + request_id="test-123", + meta=None, + session=mock_session1, + lifespan_context="context", + ) + + context2 = RequestContext( + request_id="test-123", + meta=None, + session=mock_session1, + lifespan_context="context", + ) + + context3 = RequestContext( + request_id="test-456", + meta=None, + session=mock_session1, + lifespan_context="context", + ) + + # Same values should be equal + assert context1 == context2 + + # Different request_id should not be equal + assert context1 != context3 + + # Different session should not be equal + context4 = RequestContext( + request_id="test-123", + meta=None, + session=mock_session2, + lifespan_context="context", + ) + assert context1 != context4 + + def test_request_context_repr(self): + """Test RequestContext string representation.""" + mock_session = Mock(spec=BaseSession) + mock_session.__repr__ = Mock(return_value="") + + context = RequestContext( + request_id="test-123", + meta=None, + session=mock_session, + lifespan_context={"data": "test"}, + ) + + repr_str = repr(context) + assert "RequestContext" in repr_str + assert "test-123" in repr_str + assert "MockSession" in repr_str + + +class TestTypeVariables: + """Test type variables defined in the module.""" + + def test_session_type_var(self): + """Test SessionT type variable.""" + + # Create a custom session class + class CustomSession(BaseSession): + pass + + # Use in generic context + def process_session(session: SessionT) -> SessionT: + return session + + mock_session = Mock(spec=CustomSession) + result = process_session(mock_session) + assert result == mock_session + + def test_lifespan_context_type_var(self): + """Test LifespanContextT type variable.""" + + # Use in generic context + def process_lifespan(context: LifespanContextT) -> LifespanContextT: + return context + + # Test with different types + str_context = "string-context" + assert process_lifespan(str_context) == str_context + + dict_context = {"key": "value"} + assert process_lifespan(dict_context) == dict_context + + class CustomContext: + pass + + custom_context = CustomContext() + assert process_lifespan(custom_context) == custom_context diff --git a/api/tests/unit_tests/core/mcp/test_error.py b/api/tests/unit_tests/core/mcp/test_error.py new file mode 100644 index 0000000000..3a95fae673 --- /dev/null +++ b/api/tests/unit_tests/core/mcp/test_error.py @@ -0,0 +1,205 @@ +"""Unit tests for MCP error classes.""" + +import pytest + +from core.mcp.error import MCPAuthError, MCPConnectionError, MCPError + + +class TestMCPError: + """Test MCPError base exception class.""" + + def test_mcp_error_creation(self): + """Test creating MCPError instance.""" + error = MCPError("Test error message") + assert str(error) == "Test error message" + assert isinstance(error, Exception) + + def test_mcp_error_inheritance(self): + """Test MCPError inherits from Exception.""" + error = MCPError() + assert isinstance(error, Exception) + assert type(error).__name__ == "MCPError" + + def test_mcp_error_with_empty_message(self): + """Test MCPError with empty message.""" + error = MCPError() + assert str(error) == "" + + def test_mcp_error_raise(self): + """Test raising MCPError.""" + with pytest.raises(MCPError) as exc_info: + raise MCPError("Something went wrong") + + assert str(exc_info.value) == "Something went wrong" + + +class TestMCPConnectionError: + """Test MCPConnectionError exception class.""" + + def test_mcp_connection_error_creation(self): + """Test creating MCPConnectionError instance.""" + error = MCPConnectionError("Connection failed") + assert str(error) == "Connection failed" + assert isinstance(error, MCPError) + assert isinstance(error, Exception) + + def test_mcp_connection_error_inheritance(self): + """Test MCPConnectionError inheritance chain.""" + error = MCPConnectionError() + assert isinstance(error, MCPConnectionError) + assert isinstance(error, MCPError) + assert isinstance(error, Exception) + + def test_mcp_connection_error_raise(self): + """Test raising MCPConnectionError.""" + with pytest.raises(MCPConnectionError) as exc_info: + raise MCPConnectionError("Unable to connect to server") + + assert str(exc_info.value) == "Unable to connect to server" + + def test_mcp_connection_error_catch_as_mcp_error(self): + """Test catching MCPConnectionError as MCPError.""" + with pytest.raises(MCPError) as exc_info: + raise MCPConnectionError("Connection issue") + + assert isinstance(exc_info.value, MCPConnectionError) + assert str(exc_info.value) == "Connection issue" + + +class TestMCPAuthError: + """Test MCPAuthError exception class.""" + + def test_mcp_auth_error_creation(self): + """Test creating MCPAuthError instance.""" + error = MCPAuthError("Authentication failed") + assert str(error) == "Authentication failed" + assert isinstance(error, MCPConnectionError) + assert isinstance(error, MCPError) + assert isinstance(error, Exception) + + def test_mcp_auth_error_inheritance(self): + """Test MCPAuthError inheritance chain.""" + error = MCPAuthError() + assert isinstance(error, MCPAuthError) + assert isinstance(error, MCPConnectionError) + assert isinstance(error, MCPError) + assert isinstance(error, Exception) + + def test_mcp_auth_error_raise(self): + """Test raising MCPAuthError.""" + with pytest.raises(MCPAuthError) as exc_info: + raise MCPAuthError("Invalid credentials") + + assert str(exc_info.value) == "Invalid credentials" + + def test_mcp_auth_error_catch_hierarchy(self): + """Test catching MCPAuthError at different levels.""" + # Catch as MCPAuthError + with pytest.raises(MCPAuthError) as exc_info: + raise MCPAuthError("Auth specific error") + assert str(exc_info.value) == "Auth specific error" + + # Catch as MCPConnectionError + with pytest.raises(MCPConnectionError) as exc_info: + raise MCPAuthError("Auth connection error") + assert isinstance(exc_info.value, MCPAuthError) + assert str(exc_info.value) == "Auth connection error" + + # Catch as MCPError + with pytest.raises(MCPError) as exc_info: + raise MCPAuthError("Auth base error") + assert isinstance(exc_info.value, MCPAuthError) + assert str(exc_info.value) == "Auth base error" + + +class TestErrorHierarchy: + """Test the complete error hierarchy.""" + + def test_exception_hierarchy(self): + """Test the complete exception hierarchy.""" + # Create instances + base_error = MCPError("base") + connection_error = MCPConnectionError("connection") + auth_error = MCPAuthError("auth") + + # Test type relationships + assert not isinstance(base_error, MCPConnectionError) + assert not isinstance(base_error, MCPAuthError) + + assert isinstance(connection_error, MCPError) + assert not isinstance(connection_error, MCPAuthError) + + assert isinstance(auth_error, MCPError) + assert isinstance(auth_error, MCPConnectionError) + + def test_error_handling_patterns(self): + """Test common error handling patterns.""" + + def raise_auth_error(): + raise MCPAuthError("401 Unauthorized") + + def raise_connection_error(): + raise MCPConnectionError("Connection timeout") + + def raise_base_error(): + raise MCPError("Generic error") + + # Pattern 1: Catch specific errors first + errors_caught = [] + + for error_func in [raise_auth_error, raise_connection_error, raise_base_error]: + try: + error_func() + except MCPAuthError: + errors_caught.append("auth") + except MCPConnectionError: + errors_caught.append("connection") + except MCPError: + errors_caught.append("base") + + assert errors_caught == ["auth", "connection", "base"] + + # Pattern 2: Catch all as base error + for error_func in [raise_auth_error, raise_connection_error, raise_base_error]: + with pytest.raises(MCPError) as exc_info: + error_func() + assert isinstance(exc_info.value, MCPError) + + def test_error_with_cause(self): + """Test errors with cause (chained exceptions).""" + original_error = ValueError("Original error") + + def raise_chained_error(): + try: + raise original_error + except ValueError as e: + raise MCPConnectionError("Connection failed") from e + + with pytest.raises(MCPConnectionError) as exc_info: + raise_chained_error() + + assert str(exc_info.value) == "Connection failed" + assert exc_info.value.__cause__ == original_error + + def test_error_comparison(self): + """Test error instance comparison.""" + error1 = MCPError("Test message") + error2 = MCPError("Test message") + error3 = MCPError("Different message") + + # Errors are not equal even with same message (different instances) + assert error1 != error2 + assert error1 != error3 + + # But they have the same type + assert type(error1) == type(error2) == type(error3) + + def test_error_representation(self): + """Test error string representation.""" + base_error = MCPError("Base error message") + connection_error = MCPConnectionError("Connection error message") + auth_error = MCPAuthError("Auth error message") + + assert repr(base_error) == "MCPError('Base error message')" + assert repr(connection_error) == "MCPConnectionError('Connection error message')" + assert repr(auth_error) == "MCPAuthError('Auth error message')" diff --git a/api/tests/unit_tests/core/mcp/test_mcp_client.py b/api/tests/unit_tests/core/mcp/test_mcp_client.py new file mode 100644 index 0000000000..c0420d3371 --- /dev/null +++ b/api/tests/unit_tests/core/mcp/test_mcp_client.py @@ -0,0 +1,382 @@ +"""Unit tests for MCP client.""" + +from contextlib import ExitStack +from types import TracebackType +from unittest.mock import Mock, patch + +import pytest + +from core.mcp.error import MCPConnectionError +from core.mcp.mcp_client import MCPClient +from core.mcp.types import CallToolResult, ListToolsResult, TextContent, Tool, ToolAnnotations + + +class TestMCPClient: + """Test suite for MCPClient.""" + + def test_init(self): + """Test client initialization.""" + client = MCPClient( + server_url="http://test.example.com/mcp", + headers={"Authorization": "Bearer test"}, + timeout=30.0, + sse_read_timeout=60.0, + ) + + assert client.server_url == "http://test.example.com/mcp" + assert client.headers == {"Authorization": "Bearer test"} + assert client.timeout == 30.0 + assert client.sse_read_timeout == 60.0 + assert client._session is None + assert isinstance(client._exit_stack, ExitStack) + assert client._initialized is False + + def test_init_defaults(self): + """Test client initialization with defaults.""" + client = MCPClient(server_url="http://test.example.com") + + assert client.server_url == "http://test.example.com" + assert client.headers == {} + assert client.timeout is None + assert client.sse_read_timeout is None + + @patch("core.mcp.mcp_client.streamablehttp_client") + @patch("core.mcp.mcp_client.ClientSession") + def test_initialize_with_mcp_url(self, mock_client_session, mock_streamable_client): + """Test initialization with MCP URL.""" + # Setup mocks + mock_read_stream = Mock() + mock_write_stream = Mock() + mock_client_context = Mock() + mock_streamable_client.return_value.__enter__.return_value = ( + mock_read_stream, + mock_write_stream, + mock_client_context, + ) + + mock_session = Mock() + mock_client_session.return_value.__enter__.return_value = mock_session + + client = MCPClient(server_url="http://test.example.com/mcp") + client._initialize() + + # Verify streamable client was called + mock_streamable_client.assert_called_once_with( + url="http://test.example.com/mcp", + headers={}, + timeout=None, + sse_read_timeout=None, + ) + + # Verify session was created + mock_client_session.assert_called_once_with(mock_read_stream, mock_write_stream) + mock_session.initialize.assert_called_once() + assert client._session == mock_session + + @patch("core.mcp.mcp_client.sse_client") + @patch("core.mcp.mcp_client.ClientSession") + def test_initialize_with_sse_url(self, mock_client_session, mock_sse_client): + """Test initialization with SSE URL.""" + # Setup mocks + mock_read_stream = Mock() + mock_write_stream = Mock() + mock_sse_client.return_value.__enter__.return_value = (mock_read_stream, mock_write_stream) + + mock_session = Mock() + mock_client_session.return_value.__enter__.return_value = mock_session + + client = MCPClient(server_url="http://test.example.com/sse") + client._initialize() + + # Verify SSE client was called + mock_sse_client.assert_called_once_with( + url="http://test.example.com/sse", + headers={}, + timeout=None, + sse_read_timeout=None, + ) + + # Verify session was created + mock_client_session.assert_called_once_with(mock_read_stream, mock_write_stream) + mock_session.initialize.assert_called_once() + assert client._session == mock_session + + @patch("core.mcp.mcp_client.sse_client") + @patch("core.mcp.mcp_client.streamablehttp_client") + @patch("core.mcp.mcp_client.ClientSession") + def test_initialize_with_unknown_method_fallback_to_sse( + self, mock_client_session, mock_streamable_client, mock_sse_client + ): + """Test initialization with unknown method falls back to SSE.""" + # Setup mocks + mock_read_stream = Mock() + mock_write_stream = Mock() + mock_sse_client.return_value.__enter__.return_value = (mock_read_stream, mock_write_stream) + + mock_session = Mock() + mock_client_session.return_value.__enter__.return_value = mock_session + + client = MCPClient(server_url="http://test.example.com/unknown") + client._initialize() + + # Verify SSE client was tried + mock_sse_client.assert_called_once() + mock_streamable_client.assert_not_called() + + # Verify session was created + assert client._session == mock_session + + @patch("core.mcp.mcp_client.sse_client") + @patch("core.mcp.mcp_client.streamablehttp_client") + @patch("core.mcp.mcp_client.ClientSession") + def test_initialize_fallback_from_sse_to_mcp(self, mock_client_session, mock_streamable_client, mock_sse_client): + """Test initialization falls back from SSE to MCP on connection error.""" + # Setup SSE to fail + mock_sse_client.side_effect = MCPConnectionError("SSE connection failed") + + # Setup MCP to succeed + mock_read_stream = Mock() + mock_write_stream = Mock() + mock_client_context = Mock() + mock_streamable_client.return_value.__enter__.return_value = ( + mock_read_stream, + mock_write_stream, + mock_client_context, + ) + + mock_session = Mock() + mock_client_session.return_value.__enter__.return_value = mock_session + + client = MCPClient(server_url="http://test.example.com/unknown") + client._initialize() + + # Verify both were tried + mock_sse_client.assert_called_once() + mock_streamable_client.assert_called_once() + + # Verify session was created with MCP + assert client._session == mock_session + + @patch("core.mcp.mcp_client.streamablehttp_client") + @patch("core.mcp.mcp_client.ClientSession") + def test_connect_server_mcp(self, mock_client_session, mock_streamable_client): + """Test connect_server with MCP method.""" + # Setup mocks + mock_read_stream = Mock() + mock_write_stream = Mock() + mock_client_context = Mock() + mock_streamable_client.return_value.__enter__.return_value = ( + mock_read_stream, + mock_write_stream, + mock_client_context, + ) + + mock_session = Mock() + mock_client_session.return_value.__enter__.return_value = mock_session + + client = MCPClient(server_url="http://test.example.com") + client.connect_server(mock_streamable_client, "mcp") + + # Verify correct streams were passed + mock_client_session.assert_called_once_with(mock_read_stream, mock_write_stream) + mock_session.initialize.assert_called_once() + + @patch("core.mcp.mcp_client.sse_client") + @patch("core.mcp.mcp_client.ClientSession") + def test_connect_server_sse(self, mock_client_session, mock_sse_client): + """Test connect_server with SSE method.""" + # Setup mocks + mock_read_stream = Mock() + mock_write_stream = Mock() + mock_sse_client.return_value.__enter__.return_value = (mock_read_stream, mock_write_stream) + + mock_session = Mock() + mock_client_session.return_value.__enter__.return_value = mock_session + + client = MCPClient(server_url="http://test.example.com") + client.connect_server(mock_sse_client, "sse") + + # Verify correct streams were passed + mock_client_session.assert_called_once_with(mock_read_stream, mock_write_stream) + mock_session.initialize.assert_called_once() + + def test_context_manager_enter(self): + """Test context manager enter.""" + client = MCPClient(server_url="http://test.example.com") + + with patch.object(client, "_initialize") as mock_initialize: + result = client.__enter__() + + assert result == client + assert client._initialized is True + mock_initialize.assert_called_once() + + def test_context_manager_exit(self): + """Test context manager exit.""" + client = MCPClient(server_url="http://test.example.com") + + with patch.object(client, "cleanup") as mock_cleanup: + exc_type: type[BaseException] | None = None + exc_val: BaseException | None = None + exc_tb: TracebackType | None = None + client.__exit__(exc_type, exc_val, exc_tb) + + mock_cleanup.assert_called_once() + + def test_list_tools_not_initialized(self): + """Test list_tools when session not initialized.""" + client = MCPClient(server_url="http://test.example.com") + + with pytest.raises(ValueError) as exc_info: + client.list_tools() + + assert "Session not initialized" in str(exc_info.value) + + def test_list_tools_success(self): + """Test successful list_tools call.""" + client = MCPClient(server_url="http://test.example.com") + + # Setup mock session + mock_session = Mock() + expected_tools = [ + Tool( + name="test-tool", + description="A test tool", + inputSchema={"type": "object", "properties": {}}, + annotations=ToolAnnotations(title="Test Tool"), + ) + ] + mock_session.list_tools.return_value = ListToolsResult(tools=expected_tools) + client._session = mock_session + + result = client.list_tools() + + assert result == expected_tools + mock_session.list_tools.assert_called_once() + + def test_invoke_tool_not_initialized(self): + """Test invoke_tool when session not initialized.""" + client = MCPClient(server_url="http://test.example.com") + + with pytest.raises(ValueError) as exc_info: + client.invoke_tool("test-tool", {"arg": "value"}) + + assert "Session not initialized" in str(exc_info.value) + + def test_invoke_tool_success(self): + """Test successful invoke_tool call.""" + client = MCPClient(server_url="http://test.example.com") + + # Setup mock session + mock_session = Mock() + expected_result = CallToolResult( + content=[TextContent(type="text", text="Tool executed successfully")], + isError=False, + ) + mock_session.call_tool.return_value = expected_result + client._session = mock_session + + result = client.invoke_tool("test-tool", {"arg": "value"}) + + assert result == expected_result + mock_session.call_tool.assert_called_once_with("test-tool", {"arg": "value"}) + + def test_cleanup(self): + """Test cleanup method.""" + client = MCPClient(server_url="http://test.example.com") + mock_exit_stack = Mock(spec=ExitStack) + client._exit_stack = mock_exit_stack + client._session = Mock() + client._initialized = True + + client.cleanup() + + mock_exit_stack.close.assert_called_once() + assert client._session is None + assert client._initialized is False + + def test_cleanup_with_error(self): + """Test cleanup method with error.""" + client = MCPClient(server_url="http://test.example.com") + mock_exit_stack = Mock(spec=ExitStack) + mock_exit_stack.close.side_effect = Exception("Cleanup error") + client._exit_stack = mock_exit_stack + client._session = Mock() + client._initialized = True + + with pytest.raises(ValueError) as exc_info: + client.cleanup() + + assert "Error during cleanup: Cleanup error" in str(exc_info.value) + assert client._session is None + assert client._initialized is False + + @patch("core.mcp.mcp_client.streamablehttp_client") + @patch("core.mcp.mcp_client.ClientSession") + def test_full_context_manager_flow(self, mock_client_session, mock_streamable_client): + """Test full context manager flow.""" + # Setup mocks + mock_read_stream = Mock() + mock_write_stream = Mock() + mock_client_context = Mock() + mock_streamable_client.return_value.__enter__.return_value = ( + mock_read_stream, + mock_write_stream, + mock_client_context, + ) + + mock_session = Mock() + mock_client_session.return_value.__enter__.return_value = mock_session + + expected_tools = [Tool(name="test-tool", description="Test", inputSchema={})] + mock_session.list_tools.return_value = ListToolsResult(tools=expected_tools) + + with MCPClient(server_url="http://test.example.com/mcp") as client: + assert client._initialized is True + assert client._session == mock_session + + # Test tool operations + tools = client.list_tools() + assert tools == expected_tools + + # After exit, should be cleaned up + assert client._initialized is False + assert client._session is None + + def test_headers_passed_to_clients(self): + """Test that headers are properly passed to underlying clients.""" + custom_headers = { + "Authorization": "Bearer test-token", + "X-Custom-Header": "test-value", + } + + with patch("core.mcp.mcp_client.streamablehttp_client") as mock_streamable_client: + with patch("core.mcp.mcp_client.ClientSession") as mock_client_session: + # Setup mocks + mock_read_stream = Mock() + mock_write_stream = Mock() + mock_client_context = Mock() + mock_streamable_client.return_value.__enter__.return_value = ( + mock_read_stream, + mock_write_stream, + mock_client_context, + ) + + mock_session = Mock() + mock_client_session.return_value.__enter__.return_value = mock_session + + client = MCPClient( + server_url="http://test.example.com/mcp", + headers=custom_headers, + timeout=30.0, + sse_read_timeout=60.0, + ) + client._initialize() + + # Verify headers were passed + mock_streamable_client.assert_called_once_with( + url="http://test.example.com/mcp", + headers=custom_headers, + timeout=30.0, + sse_read_timeout=60.0, + ) diff --git a/api/tests/unit_tests/core/mcp/test_types.py b/api/tests/unit_tests/core/mcp/test_types.py new file mode 100644 index 0000000000..d4fe353f0a --- /dev/null +++ b/api/tests/unit_tests/core/mcp/test_types.py @@ -0,0 +1,492 @@ +"""Unit tests for MCP types module.""" + +import pytest +from pydantic import ValidationError + +from core.mcp.types import ( + INTERNAL_ERROR, + INVALID_PARAMS, + INVALID_REQUEST, + LATEST_PROTOCOL_VERSION, + METHOD_NOT_FOUND, + PARSE_ERROR, + SERVER_LATEST_PROTOCOL_VERSION, + Annotations, + CallToolRequest, + CallToolRequestParams, + CallToolResult, + ClientCapabilities, + CompleteRequest, + CompleteRequestParams, + CompleteResult, + Completion, + CompletionArgument, + CompletionContext, + ErrorData, + ImageContent, + Implementation, + InitializeRequest, + InitializeRequestParams, + InitializeResult, + JSONRPCError, + JSONRPCMessage, + JSONRPCNotification, + JSONRPCRequest, + JSONRPCResponse, + ListToolsRequest, + ListToolsResult, + OAuthClientInformation, + OAuthClientMetadata, + OAuthMetadata, + OAuthTokens, + PingRequest, + ProgressNotification, + ProgressNotificationParams, + PromptReference, + RequestParams, + ResourceTemplateReference, + Result, + ServerCapabilities, + TextContent, + Tool, + ToolAnnotations, +) + + +class TestConstants: + """Test module constants.""" + + def test_protocol_versions(self): + """Test protocol version constants.""" + assert LATEST_PROTOCOL_VERSION == "2025-06-18" + assert SERVER_LATEST_PROTOCOL_VERSION == "2024-11-05" + + def test_error_codes(self): + """Test JSON-RPC error code constants.""" + assert PARSE_ERROR == -32700 + assert INVALID_REQUEST == -32600 + assert METHOD_NOT_FOUND == -32601 + assert INVALID_PARAMS == -32602 + assert INTERNAL_ERROR == -32603 + + +class TestRequestParams: + """Test RequestParams and related classes.""" + + def test_request_params_basic(self): + """Test basic RequestParams creation.""" + params = RequestParams() + assert params.meta is None + + def test_request_params_with_meta(self): + """Test RequestParams with meta.""" + meta = RequestParams.Meta(progressToken="test-token") + params = RequestParams(_meta=meta) + assert params.meta is not None + assert params.meta.progressToken == "test-token" + + def test_request_params_meta_extra_fields(self): + """Test RequestParams.Meta allows extra fields.""" + meta = RequestParams.Meta(progressToken="token", customField="value") + assert meta.progressToken == "token" + assert meta.customField == "value" # type: ignore + + def test_request_params_serialization(self): + """Test RequestParams serialization with _meta alias.""" + meta = RequestParams.Meta(progressToken="test") + params = RequestParams(_meta=meta) + + # Model dump should use the alias + dumped = params.model_dump(by_alias=True) + assert "_meta" in dumped + assert dumped["_meta"] is not None + assert dumped["_meta"]["progressToken"] == "test" + + +class TestJSONRPCMessages: + """Test JSON-RPC message types.""" + + def test_jsonrpc_request(self): + """Test JSONRPCRequest creation and validation.""" + request = JSONRPCRequest(jsonrpc="2.0", id="test-123", method="test_method", params={"key": "value"}) + + assert request.jsonrpc == "2.0" + assert request.id == "test-123" + assert request.method == "test_method" + assert request.params == {"key": "value"} + + def test_jsonrpc_request_numeric_id(self): + """Test JSONRPCRequest with numeric ID.""" + request = JSONRPCRequest(jsonrpc="2.0", id=123, method="test", params=None) + assert request.id == 123 + + def test_jsonrpc_notification(self): + """Test JSONRPCNotification creation.""" + notification = JSONRPCNotification(jsonrpc="2.0", method="notification_method", params={"data": "test"}) + + assert notification.jsonrpc == "2.0" + assert notification.method == "notification_method" + assert not hasattr(notification, "id") # Notifications don't have ID + + def test_jsonrpc_response(self): + """Test JSONRPCResponse creation.""" + response = JSONRPCResponse(jsonrpc="2.0", id="req-123", result={"success": True}) + + assert response.jsonrpc == "2.0" + assert response.id == "req-123" + assert response.result == {"success": True} + + def test_jsonrpc_error(self): + """Test JSONRPCError creation.""" + error_data = ErrorData(code=INVALID_PARAMS, message="Invalid parameters", data={"field": "missing"}) + + error = JSONRPCError(jsonrpc="2.0", id="req-123", error=error_data) + + assert error.jsonrpc == "2.0" + assert error.id == "req-123" + assert error.error.code == INVALID_PARAMS + assert error.error.message == "Invalid parameters" + assert error.error.data == {"field": "missing"} + + def test_jsonrpc_message_parsing(self): + """Test JSONRPCMessage parsing different message types.""" + # Parse request + request_json = '{"jsonrpc": "2.0", "id": 1, "method": "test", "params": null}' + msg = JSONRPCMessage.model_validate_json(request_json) + assert isinstance(msg.root, JSONRPCRequest) + + # Parse response + response_json = '{"jsonrpc": "2.0", "id": 1, "result": {"data": "test"}}' + msg = JSONRPCMessage.model_validate_json(response_json) + assert isinstance(msg.root, JSONRPCResponse) + + # Parse error + error_json = '{"jsonrpc": "2.0", "id": 1, "error": {"code": -32600, "message": "Invalid Request"}}' + msg = JSONRPCMessage.model_validate_json(error_json) + assert isinstance(msg.root, JSONRPCError) + + +class TestCapabilities: + """Test capability classes.""" + + def test_client_capabilities(self): + """Test ClientCapabilities creation.""" + caps = ClientCapabilities( + experimental={"feature": {"enabled": True}}, + sampling={"model_config": {"extra": "allow"}}, + roots={"listChanged": True}, + ) + + assert caps.experimental == {"feature": {"enabled": True}} + assert caps.sampling is not None + assert caps.roots.listChanged is True # type: ignore + + def test_server_capabilities(self): + """Test ServerCapabilities creation.""" + caps = ServerCapabilities( + tools={"listChanged": True}, + resources={"subscribe": True, "listChanged": False}, + prompts={"listChanged": True}, + logging={}, + completions={}, + ) + + assert caps.tools.listChanged is True # type: ignore + assert caps.resources.subscribe is True # type: ignore + assert caps.resources.listChanged is False # type: ignore + + +class TestInitialization: + """Test initialization request/response types.""" + + def test_initialize_request(self): + """Test InitializeRequest creation.""" + client_info = Implementation(name="test-client", version="1.0.0") + capabilities = ClientCapabilities() + + params = InitializeRequestParams( + protocolVersion=LATEST_PROTOCOL_VERSION, capabilities=capabilities, clientInfo=client_info + ) + + request = InitializeRequest(params=params) + + assert request.method == "initialize" + assert request.params.protocolVersion == LATEST_PROTOCOL_VERSION + assert request.params.clientInfo.name == "test-client" + + def test_initialize_result(self): + """Test InitializeResult creation.""" + server_info = Implementation(name="test-server", version="1.0.0") + capabilities = ServerCapabilities() + + result = InitializeResult( + protocolVersion=LATEST_PROTOCOL_VERSION, + capabilities=capabilities, + serverInfo=server_info, + instructions="Welcome to test server", + ) + + assert result.protocolVersion == LATEST_PROTOCOL_VERSION + assert result.serverInfo.name == "test-server" + assert result.instructions == "Welcome to test server" + + +class TestTools: + """Test tool-related types.""" + + def test_tool_creation(self): + """Test Tool creation with all fields.""" + tool = Tool( + name="test_tool", + title="Test Tool", + description="A tool for testing", + inputSchema={"type": "object", "properties": {"input": {"type": "string"}}, "required": ["input"]}, + outputSchema={"type": "object", "properties": {"result": {"type": "string"}}}, + annotations=ToolAnnotations( + title="Test Tool", readOnlyHint=False, destructiveHint=False, idempotentHint=True + ), + ) + + assert tool.name == "test_tool" + assert tool.title == "Test Tool" + assert tool.description == "A tool for testing" + assert tool.inputSchema["properties"]["input"]["type"] == "string" + assert tool.annotations.idempotentHint is True + + def test_call_tool_request(self): + """Test CallToolRequest creation.""" + params = CallToolRequestParams(name="test_tool", arguments={"input": "test value"}) + + request = CallToolRequest(params=params) + + assert request.method == "tools/call" + assert request.params.name == "test_tool" + assert request.params.arguments == {"input": "test value"} + + def test_call_tool_result(self): + """Test CallToolResult creation.""" + result = CallToolResult( + content=[TextContent(type="text", text="Tool executed successfully")], + structuredContent={"status": "success", "data": "test"}, + isError=False, + ) + + assert len(result.content) == 1 + assert result.content[0].text == "Tool executed successfully" # type: ignore + assert result.structuredContent == {"status": "success", "data": "test"} + assert result.isError is False + + def test_list_tools_request(self): + """Test ListToolsRequest creation.""" + request = ListToolsRequest() + assert request.method == "tools/list" + + def test_list_tools_result(self): + """Test ListToolsResult creation.""" + tool1 = Tool(name="tool1", inputSchema={}) + tool2 = Tool(name="tool2", inputSchema={}) + + result = ListToolsResult(tools=[tool1, tool2]) + + assert len(result.tools) == 2 + assert result.tools[0].name == "tool1" + assert result.tools[1].name == "tool2" + + +class TestContent: + """Test content types.""" + + def test_text_content(self): + """Test TextContent creation.""" + annotations = Annotations(audience=["user"], priority=0.8) + content = TextContent(type="text", text="Hello, world!", annotations=annotations) + + assert content.type == "text" + assert content.text == "Hello, world!" + assert content.annotations is not None + assert content.annotations.priority == 0.8 + + def test_image_content(self): + """Test ImageContent creation.""" + content = ImageContent(type="image", data="base64encodeddata", mimeType="image/png") + + assert content.type == "image" + assert content.data == "base64encodeddata" + assert content.mimeType == "image/png" + + +class TestOAuth: + """Test OAuth-related types.""" + + def test_oauth_client_metadata(self): + """Test OAuthClientMetadata creation.""" + metadata = OAuthClientMetadata( + client_name="Test Client", + redirect_uris=["https://example.com/callback"], + grant_types=["authorization_code", "refresh_token"], + response_types=["code"], + token_endpoint_auth_method="none", + client_uri="https://example.com", + scope="read write", + ) + + assert metadata.client_name == "Test Client" + assert len(metadata.redirect_uris) == 1 + assert "authorization_code" in metadata.grant_types + + def test_oauth_client_information(self): + """Test OAuthClientInformation creation.""" + info = OAuthClientInformation(client_id="test-client-id", client_secret="test-secret") + + assert info.client_id == "test-client-id" + assert info.client_secret == "test-secret" + + def test_oauth_client_information_without_secret(self): + """Test OAuthClientInformation without secret.""" + info = OAuthClientInformation(client_id="public-client") + + assert info.client_id == "public-client" + assert info.client_secret is None + + def test_oauth_tokens(self): + """Test OAuthTokens creation.""" + tokens = OAuthTokens( + access_token="access-token-123", + token_type="Bearer", + expires_in=3600, + refresh_token="refresh-token-456", + scope="read write", + ) + + assert tokens.access_token == "access-token-123" + assert tokens.token_type == "Bearer" + assert tokens.expires_in == 3600 + assert tokens.refresh_token == "refresh-token-456" + assert tokens.scope == "read write" + + def test_oauth_metadata(self): + """Test OAuthMetadata creation.""" + metadata = OAuthMetadata( + authorization_endpoint="https://auth.example.com/authorize", + token_endpoint="https://auth.example.com/token", + registration_endpoint="https://auth.example.com/register", + response_types_supported=["code", "token"], + grant_types_supported=["authorization_code", "refresh_token"], + code_challenge_methods_supported=["plain", "S256"], + ) + + assert metadata.authorization_endpoint == "https://auth.example.com/authorize" + assert "code" in metadata.response_types_supported + assert "S256" in metadata.code_challenge_methods_supported + + +class TestNotifications: + """Test notification types.""" + + def test_progress_notification(self): + """Test ProgressNotification creation.""" + params = ProgressNotificationParams( + progressToken="progress-123", progress=50.0, total=100.0, message="Processing... 50%" + ) + + notification = ProgressNotification(params=params) + + assert notification.method == "notifications/progress" + assert notification.params.progressToken == "progress-123" + assert notification.params.progress == 50.0 + assert notification.params.total == 100.0 + assert notification.params.message == "Processing... 50%" + + def test_ping_request(self): + """Test PingRequest creation.""" + request = PingRequest() + assert request.method == "ping" + assert request.params is None + + +class TestCompletion: + """Test completion-related types.""" + + def test_completion_context(self): + """Test CompletionContext creation.""" + context = CompletionContext(arguments={"template_var": "value"}) + assert context.arguments == {"template_var": "value"} + + def test_resource_template_reference(self): + """Test ResourceTemplateReference creation.""" + ref = ResourceTemplateReference(type="ref/resource", uri="file:///path/to/{filename}") + assert ref.type == "ref/resource" + assert ref.uri == "file:///path/to/{filename}" + + def test_prompt_reference(self): + """Test PromptReference creation.""" + ref = PromptReference(type="ref/prompt", name="test_prompt") + assert ref.type == "ref/prompt" + assert ref.name == "test_prompt" + + def test_complete_request(self): + """Test CompleteRequest creation.""" + ref = PromptReference(type="ref/prompt", name="test_prompt") + arg = CompletionArgument(name="arg1", value="val") + + params = CompleteRequestParams(ref=ref, argument=arg, context=CompletionContext(arguments={"key": "value"})) + + request = CompleteRequest(params=params) + + assert request.method == "completion/complete" + assert request.params.ref.name == "test_prompt" # type: ignore + assert request.params.argument.name == "arg1" + + def test_complete_result(self): + """Test CompleteResult creation.""" + completion = Completion(values=["option1", "option2", "option3"], total=10, hasMore=True) + + result = CompleteResult(completion=completion) + + assert len(result.completion.values) == 3 + assert result.completion.total == 10 + assert result.completion.hasMore is True + + +class TestValidation: + """Test validation of various types.""" + + def test_invalid_jsonrpc_version(self): + """Test invalid JSON-RPC version validation.""" + with pytest.raises(ValidationError): + JSONRPCRequest( + jsonrpc="1.0", # Invalid version + id=1, + method="test", + ) + + def test_tool_annotations_validation(self): + """Test ToolAnnotations with invalid values.""" + # Valid annotations + annotations = ToolAnnotations( + title="Test", readOnlyHint=True, destructiveHint=False, idempotentHint=True, openWorldHint=False + ) + assert annotations.title == "Test" + + def test_extra_fields_allowed(self): + """Test that extra fields are allowed in models.""" + # Most models should allow extra fields + tool = Tool( + name="test", + inputSchema={}, + customField="allowed", # type: ignore + ) + assert tool.customField == "allowed" # type: ignore + + def test_result_meta_alias(self): + """Test Result model with _meta alias.""" + # Create with the field name (not alias) + result = Result(_meta={"key": "value"}) + + # Verify the field is set correctly + assert result.meta == {"key": "value"} + + # Dump with alias + dumped = result.model_dump(by_alias=True) + assert "_meta" in dumped + assert dumped["_meta"] == {"key": "value"} diff --git a/api/tests/unit_tests/core/mcp/test_utils.py b/api/tests/unit_tests/core/mcp/test_utils.py new file mode 100644 index 0000000000..ca41d5f4c1 --- /dev/null +++ b/api/tests/unit_tests/core/mcp/test_utils.py @@ -0,0 +1,355 @@ +"""Unit tests for MCP utils module.""" + +import json +from collections.abc import Generator +from unittest.mock import MagicMock, Mock, patch + +import httpx +import httpx_sse +import pytest + +from core.mcp.utils import ( + STATUS_FORCELIST, + create_mcp_error_response, + create_ssrf_proxy_mcp_http_client, + ssrf_proxy_sse_connect, +) + + +class TestConstants: + """Test module constants.""" + + def test_status_forcelist(self): + """Test STATUS_FORCELIST contains expected HTTP status codes.""" + assert STATUS_FORCELIST == [429, 500, 502, 503, 504] + assert 429 in STATUS_FORCELIST # Too Many Requests + assert 500 in STATUS_FORCELIST # Internal Server Error + assert 502 in STATUS_FORCELIST # Bad Gateway + assert 503 in STATUS_FORCELIST # Service Unavailable + assert 504 in STATUS_FORCELIST # Gateway Timeout + + +class TestCreateSSRFProxyMCPHTTPClient: + """Test create_ssrf_proxy_mcp_http_client function.""" + + @patch("core.mcp.utils.dify_config") + def test_create_client_with_all_url_proxy(self, mock_config): + """Test client creation with SSRF_PROXY_ALL_URL configured.""" + mock_config.SSRF_PROXY_ALL_URL = "http://proxy.example.com:8080" + mock_config.HTTP_REQUEST_NODE_SSL_VERIFY = True + + client = create_ssrf_proxy_mcp_http_client( + headers={"Authorization": "Bearer token"}, timeout=httpx.Timeout(30.0) + ) + + assert isinstance(client, httpx.Client) + assert client.headers["Authorization"] == "Bearer token" + assert client.timeout.connect == 30.0 + assert client.follow_redirects is True + + # Clean up + client.close() + + @patch("core.mcp.utils.dify_config") + def test_create_client_with_http_https_proxies(self, mock_config): + """Test client creation with separate HTTP/HTTPS proxies.""" + mock_config.SSRF_PROXY_ALL_URL = None + mock_config.SSRF_PROXY_HTTP_URL = "http://http-proxy.example.com:8080" + mock_config.SSRF_PROXY_HTTPS_URL = "http://https-proxy.example.com:8443" + mock_config.HTTP_REQUEST_NODE_SSL_VERIFY = False + + client = create_ssrf_proxy_mcp_http_client() + + assert isinstance(client, httpx.Client) + assert client.follow_redirects is True + + # Clean up + client.close() + + @patch("core.mcp.utils.dify_config") + def test_create_client_without_proxy(self, mock_config): + """Test client creation without proxy configuration.""" + mock_config.SSRF_PROXY_ALL_URL = None + mock_config.SSRF_PROXY_HTTP_URL = None + mock_config.SSRF_PROXY_HTTPS_URL = None + mock_config.HTTP_REQUEST_NODE_SSL_VERIFY = True + + headers = {"X-Custom-Header": "value"} + timeout = httpx.Timeout(timeout=30.0, connect=5.0, read=10.0, write=30.0) + + client = create_ssrf_proxy_mcp_http_client(headers=headers, timeout=timeout) + + assert isinstance(client, httpx.Client) + assert client.headers["X-Custom-Header"] == "value" + assert client.timeout.connect == 5.0 + assert client.timeout.read == 10.0 + assert client.follow_redirects is True + + # Clean up + client.close() + + @patch("core.mcp.utils.dify_config") + def test_create_client_default_params(self, mock_config): + """Test client creation with default parameters.""" + mock_config.SSRF_PROXY_ALL_URL = None + mock_config.SSRF_PROXY_HTTP_URL = None + mock_config.SSRF_PROXY_HTTPS_URL = None + mock_config.HTTP_REQUEST_NODE_SSL_VERIFY = True + + client = create_ssrf_proxy_mcp_http_client() + + assert isinstance(client, httpx.Client) + # httpx.Client adds default headers, so we just check it's a Headers object + assert isinstance(client.headers, httpx.Headers) + # When no timeout is provided, httpx uses its default timeout + assert client.timeout is not None + + # Clean up + client.close() + + +class TestSSRFProxySSEConnect: + """Test ssrf_proxy_sse_connect function.""" + + @patch("core.mcp.utils.connect_sse") + @patch("core.mcp.utils.create_ssrf_proxy_mcp_http_client") + def test_sse_connect_with_provided_client(self, mock_create_client, mock_connect_sse): + """Test SSE connection with pre-configured client.""" + # Setup mocks + mock_client = Mock(spec=httpx.Client) + mock_event_source = Mock(spec=httpx_sse.EventSource) + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_event_source + mock_connect_sse.return_value = mock_context + + # Call with provided client + result = ssrf_proxy_sse_connect( + "http://example.com/sse", client=mock_client, method="POST", headers={"Authorization": "Bearer token"} + ) + + # Verify client creation was not called + mock_create_client.assert_not_called() + + # Verify connect_sse was called correctly + mock_connect_sse.assert_called_once_with( + mock_client, "POST", "http://example.com/sse", headers={"Authorization": "Bearer token"} + ) + + # Verify result + assert result == mock_context + + @patch("core.mcp.utils.connect_sse") + @patch("core.mcp.utils.create_ssrf_proxy_mcp_http_client") + @patch("core.mcp.utils.dify_config") + def test_sse_connect_without_client(self, mock_config, mock_create_client, mock_connect_sse): + """Test SSE connection without pre-configured client.""" + # Setup config + mock_config.SSRF_DEFAULT_TIME_OUT = 30.0 + mock_config.SSRF_DEFAULT_CONNECT_TIME_OUT = 10.0 + mock_config.SSRF_DEFAULT_READ_TIME_OUT = 60.0 + mock_config.SSRF_DEFAULT_WRITE_TIME_OUT = 30.0 + + # Setup mocks + mock_client = Mock(spec=httpx.Client) + mock_create_client.return_value = mock_client + + mock_event_source = Mock(spec=httpx_sse.EventSource) + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_event_source + mock_connect_sse.return_value = mock_context + + # Call without client + result = ssrf_proxy_sse_connect("http://example.com/sse", headers={"X-Custom": "value"}) + + # Verify client was created + mock_create_client.assert_called_once() + call_args = mock_create_client.call_args + assert call_args[1]["headers"] == {"X-Custom": "value"} + + timeout = call_args[1]["timeout"] + # httpx.Timeout object has these attributes + assert isinstance(timeout, httpx.Timeout) + assert timeout.connect == 10.0 + assert timeout.read == 60.0 + assert timeout.write == 30.0 + + # Verify connect_sse was called + mock_connect_sse.assert_called_once_with( + mock_client, + "GET", # Default method + "http://example.com/sse", + ) + + # Verify result + assert result == mock_context + + @patch("core.mcp.utils.connect_sse") + @patch("core.mcp.utils.create_ssrf_proxy_mcp_http_client") + def test_sse_connect_with_custom_timeout(self, mock_create_client, mock_connect_sse): + """Test SSE connection with custom timeout.""" + # Setup mocks + mock_client = Mock(spec=httpx.Client) + mock_create_client.return_value = mock_client + + mock_event_source = Mock(spec=httpx_sse.EventSource) + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_event_source + mock_connect_sse.return_value = mock_context + + custom_timeout = httpx.Timeout(timeout=60.0, read=120.0) + + # Call with custom timeout + result = ssrf_proxy_sse_connect("http://example.com/sse", timeout=custom_timeout) + + # Verify client was created with custom timeout + mock_create_client.assert_called_once() + call_args = mock_create_client.call_args + assert call_args[1]["timeout"] == custom_timeout + + # Verify result + assert result == mock_context + + @patch("core.mcp.utils.connect_sse") + @patch("core.mcp.utils.create_ssrf_proxy_mcp_http_client") + def test_sse_connect_error_cleanup(self, mock_create_client, mock_connect_sse): + """Test SSE connection cleans up client on error.""" + # Setup mocks + mock_client = Mock(spec=httpx.Client) + mock_create_client.return_value = mock_client + + # Make connect_sse raise an exception + mock_connect_sse.side_effect = httpx.ConnectError("Connection failed") + + # Call should raise the exception + with pytest.raises(httpx.ConnectError): + ssrf_proxy_sse_connect("http://example.com/sse") + + # Verify client was cleaned up + mock_client.close.assert_called_once() + + @patch("core.mcp.utils.connect_sse") + def test_sse_connect_error_no_cleanup_with_provided_client(self, mock_connect_sse): + """Test SSE connection doesn't clean up provided client on error.""" + # Setup mocks + mock_client = Mock(spec=httpx.Client) + + # Make connect_sse raise an exception + mock_connect_sse.side_effect = httpx.ConnectError("Connection failed") + + # Call should raise the exception + with pytest.raises(httpx.ConnectError): + ssrf_proxy_sse_connect("http://example.com/sse", client=mock_client) + + # Verify client was NOT cleaned up (because it was provided) + mock_client.close.assert_not_called() + + +class TestCreateMCPErrorResponse: + """Test create_mcp_error_response function.""" + + def test_create_error_response_basic(self): + """Test creating basic error response.""" + generator = create_mcp_error_response(request_id="req-123", code=-32600, message="Invalid Request") + + # Generator should yield bytes + assert isinstance(generator, Generator) + + # Get the response + response_bytes = next(generator) + assert isinstance(response_bytes, bytes) + + # Parse the response + response_str = response_bytes.decode("utf-8") + response_json = json.loads(response_str) + + assert response_json["jsonrpc"] == "2.0" + assert response_json["id"] == "req-123" + assert response_json["error"]["code"] == -32600 + assert response_json["error"]["message"] == "Invalid Request" + assert response_json["error"]["data"] is None + + # Generator should be exhausted + with pytest.raises(StopIteration): + next(generator) + + def test_create_error_response_with_data(self): + """Test creating error response with additional data.""" + error_data = {"field": "username", "reason": "required"} + + generator = create_mcp_error_response( + request_id=456, # Numeric ID + code=-32602, + message="Invalid params", + data=error_data, + ) + + response_bytes = next(generator) + response_json = json.loads(response_bytes.decode("utf-8")) + + assert response_json["id"] == 456 + assert response_json["error"]["code"] == -32602 + assert response_json["error"]["message"] == "Invalid params" + assert response_json["error"]["data"] == error_data + + def test_create_error_response_without_request_id(self): + """Test creating error response without request ID.""" + generator = create_mcp_error_response(request_id=None, code=-32700, message="Parse error") + + response_bytes = next(generator) + response_json = json.loads(response_bytes.decode("utf-8")) + + # Should default to ID 1 + assert response_json["id"] == 1 + assert response_json["error"]["code"] == -32700 + assert response_json["error"]["message"] == "Parse error" + + def test_create_error_response_with_complex_data(self): + """Test creating error response with complex error data.""" + complex_data = { + "errors": [{"field": "name", "message": "Too short"}, {"field": "email", "message": "Invalid format"}], + "timestamp": "2024-01-01T00:00:00Z", + } + + generator = create_mcp_error_response( + request_id="complex-req", code=-32602, message="Validation failed", data=complex_data + ) + + response_bytes = next(generator) + response_json = json.loads(response_bytes.decode("utf-8")) + + assert response_json["error"]["data"] == complex_data + assert len(response_json["error"]["data"]["errors"]) == 2 + + def test_create_error_response_encoding(self): + """Test error response with non-ASCII characters.""" + generator = create_mcp_error_response( + request_id="unicode-req", + code=-32603, + message="内部错误", # Chinese characters + data={"details": "エラー詳細"}, # Japanese characters + ) + + response_bytes = next(generator) + + # Should be valid UTF-8 + response_str = response_bytes.decode("utf-8") + response_json = json.loads(response_str) + + assert response_json["error"]["message"] == "内部错误" + assert response_json["error"]["data"]["details"] == "エラー詳細" + + def test_create_error_response_yields_once(self): + """Test that error response generator yields exactly once.""" + generator = create_mcp_error_response(request_id="test", code=-32600, message="Test") + + # First yield should work + first_yield = next(generator) + assert isinstance(first_yield, bytes) + + # Second yield should raise StopIteration + with pytest.raises(StopIteration): + next(generator) + + # Subsequent calls should also raise + with pytest.raises(StopIteration): + next(generator) diff --git a/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py new file mode 100644 index 0000000000..3635e4dbf9 --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py @@ -0,0 +1,49 @@ +"""Primarily used for testing merged cell scenarios""" + +from docx import Document + +from core.rag.extractor.word_extractor import WordExtractor + + +def _generate_table_with_merged_cells(): + doc = Document() + + """ + The table looks like this: + +-----+-----+-----+ + | 1-1 & 1-2 | 1-3 | + +-----+-----+-----+ + | 2-1 | 2-2 | 2-3 | + | & |-----+-----+ + | 3-1 | 3-2 | 3-3 | + +-----+-----+-----+ + """ + table = doc.add_table(rows=3, cols=3) + table.style = "Table Grid" + + for i in range(3): + for j in range(3): + cell = table.cell(i, j) + cell.text = f"{i + 1}-{j + 1}" + + # Merge cells + cell_0_0 = table.cell(0, 0) + cell_0_1 = table.cell(0, 1) + merged_cell_1 = cell_0_0.merge(cell_0_1) + merged_cell_1.text = "1-1 & 1-2" + + cell_1_0 = table.cell(1, 0) + cell_2_0 = table.cell(2, 0) + merged_cell_2 = cell_1_0.merge(cell_2_0) + merged_cell_2.text = "2-1 & 3-1" + + ground_truth = [["1-1 & 1-2", "", "1-3"], ["2-1 & 3-1", "2-2", "2-3"], ["2-1 & 3-1", "3-2", "3-3"]] + + return doc.tables[0], ground_truth + + +def test_parse_row(): + table, gt = _generate_table_with_merged_cells() + extractor = object.__new__(WordExtractor) + for idx, row in enumerate(table.rows): + assert extractor._parse_row(row, {}, 3) == gt[idx] diff --git a/api/tests/unit_tests/core/rag/pipeline/test_queue.py b/api/tests/unit_tests/core/rag/pipeline/test_queue.py new file mode 100644 index 0000000000..17c5f3c6b7 --- /dev/null +++ b/api/tests/unit_tests/core/rag/pipeline/test_queue.py @@ -0,0 +1,301 @@ +""" +Unit tests for TenantIsolatedTaskQueue. + +These tests verify the Redis-based task queue functionality for tenant-specific +task management with proper serialization and deserialization. +""" + +import json +from unittest.mock import MagicMock, patch +from uuid import uuid4 + +import pytest +from pydantic import ValidationError + +from core.rag.pipeline.queue import TaskWrapper, TenantIsolatedTaskQueue + + +class TestTaskWrapper: + """Test cases for TaskWrapper serialization/deserialization.""" + + def test_serialize_simple_data(self): + """Test serialization of simple data types.""" + data = {"key": "value", "number": 42, "list": [1, 2, 3]} + wrapper = TaskWrapper(data=data) + + serialized = wrapper.serialize() + assert isinstance(serialized, str) + + # Verify it's valid JSON + parsed = json.loads(serialized) + assert parsed["data"] == data + + def test_serialize_complex_data(self): + """Test serialization of complex nested data.""" + data = { + "nested": {"deep": {"value": "test", "numbers": [1, 2, 3, 4, 5]}}, + "unicode": "测试中文", + "special_chars": "!@#$%^&*()", + } + wrapper = TaskWrapper(data=data) + + serialized = wrapper.serialize() + parsed = json.loads(serialized) + assert parsed["data"] == data + + def test_deserialize_valid_data(self): + """Test deserialization of valid JSON data.""" + original_data = {"key": "value", "number": 42} + # Serialize using TaskWrapper to get the correct format + wrapper = TaskWrapper(data=original_data) + serialized = wrapper.serialize() + + wrapper = TaskWrapper.deserialize(serialized) + assert wrapper.data == original_data + + def test_deserialize_invalid_json(self): + """Test deserialization handles invalid JSON gracefully.""" + invalid_json = "{invalid json}" + + # Pydantic will raise ValidationError for invalid JSON + with pytest.raises(ValidationError): + TaskWrapper.deserialize(invalid_json) + + def test_serialize_ensure_ascii_false(self): + """Test that serialization preserves Unicode characters.""" + data = {"chinese": "中文测试", "emoji": "🚀"} + wrapper = TaskWrapper(data=data) + + serialized = wrapper.serialize() + assert "中文测试" in serialized + assert "🚀" in serialized + + +class TestTenantIsolatedTaskQueue: + """Test cases for TenantIsolatedTaskQueue functionality.""" + + @pytest.fixture + def mock_redis_client(self): + """Mock Redis client for testing.""" + mock_redis = MagicMock() + return mock_redis + + @pytest.fixture + def sample_queue(self, mock_redis_client): + """Create a sample TenantIsolatedTaskQueue instance.""" + return TenantIsolatedTaskQueue("tenant-123", "test-key") + + def test_initialization(self, sample_queue): + """Test queue initialization with correct key generation.""" + assert sample_queue._tenant_id == "tenant-123" + assert sample_queue._unique_key == "test-key" + assert sample_queue._queue == "tenant_self_test-key_task_queue:tenant-123" + assert sample_queue._task_key == "tenant_test-key_task:tenant-123" + + @patch("core.rag.pipeline.queue.redis_client") + def test_get_task_key_exists(self, mock_redis, sample_queue): + """Test getting task key when it exists.""" + mock_redis.get.return_value = "1" + + result = sample_queue.get_task_key() + + assert result == "1" + mock_redis.get.assert_called_once_with("tenant_test-key_task:tenant-123") + + @patch("core.rag.pipeline.queue.redis_client") + def test_get_task_key_not_exists(self, mock_redis, sample_queue): + """Test getting task key when it doesn't exist.""" + mock_redis.get.return_value = None + + result = sample_queue.get_task_key() + + assert result is None + mock_redis.get.assert_called_once_with("tenant_test-key_task:tenant-123") + + @patch("core.rag.pipeline.queue.redis_client") + def test_set_task_waiting_time_default_ttl(self, mock_redis, sample_queue): + """Test setting task waiting flag with default TTL.""" + sample_queue.set_task_waiting_time() + + mock_redis.setex.assert_called_once_with( + "tenant_test-key_task:tenant-123", + 3600, # DEFAULT_TASK_TTL + 1, + ) + + @patch("core.rag.pipeline.queue.redis_client") + def test_set_task_waiting_time_custom_ttl(self, mock_redis, sample_queue): + """Test setting task waiting flag with custom TTL.""" + custom_ttl = 1800 + sample_queue.set_task_waiting_time(custom_ttl) + + mock_redis.setex.assert_called_once_with("tenant_test-key_task:tenant-123", custom_ttl, 1) + + @patch("core.rag.pipeline.queue.redis_client") + def test_delete_task_key(self, mock_redis, sample_queue): + """Test deleting task key.""" + sample_queue.delete_task_key() + + mock_redis.delete.assert_called_once_with("tenant_test-key_task:tenant-123") + + @patch("core.rag.pipeline.queue.redis_client") + def test_push_tasks_string_list(self, mock_redis, sample_queue): + """Test pushing string tasks directly.""" + tasks = ["task1", "task2", "task3"] + + sample_queue.push_tasks(tasks) + + mock_redis.lpush.assert_called_once_with( + "tenant_self_test-key_task_queue:tenant-123", "task1", "task2", "task3" + ) + + @patch("core.rag.pipeline.queue.redis_client") + def test_push_tasks_mixed_types(self, mock_redis, sample_queue): + """Test pushing mixed string and object tasks.""" + tasks = ["string_task", {"object_task": "data", "id": 123}, "another_string"] + + sample_queue.push_tasks(tasks) + + # Verify lpush was called + mock_redis.lpush.assert_called_once() + call_args = mock_redis.lpush.call_args + + # Check queue name + assert call_args[0][0] == "tenant_self_test-key_task_queue:tenant-123" + + # Check serialized tasks + serialized_tasks = call_args[0][1:] + assert len(serialized_tasks) == 3 + assert serialized_tasks[0] == "string_task" + assert serialized_tasks[2] == "another_string" + + # Check object task is serialized as TaskWrapper JSON (without prefix) + # It should be a valid JSON string that can be deserialized by TaskWrapper + wrapper = TaskWrapper.deserialize(serialized_tasks[1]) + assert wrapper.data == {"object_task": "data", "id": 123} + + @patch("core.rag.pipeline.queue.redis_client") + def test_push_tasks_empty_list(self, mock_redis, sample_queue): + """Test pushing empty task list.""" + sample_queue.push_tasks([]) + + mock_redis.lpush.assert_not_called() + + @patch("core.rag.pipeline.queue.redis_client") + def test_pull_tasks_default_count(self, mock_redis, sample_queue): + """Test pulling tasks with default count (1).""" + mock_redis.rpop.side_effect = ["task1", None] + + result = sample_queue.pull_tasks() + + assert result == ["task1"] + assert mock_redis.rpop.call_count == 1 + + @patch("core.rag.pipeline.queue.redis_client") + def test_pull_tasks_custom_count(self, mock_redis, sample_queue): + """Test pulling tasks with custom count.""" + # First test: pull 3 tasks + mock_redis.rpop.side_effect = ["task1", "task2", "task3", None] + + result = sample_queue.pull_tasks(3) + + assert result == ["task1", "task2", "task3"] + assert mock_redis.rpop.call_count == 3 + + # Reset mock for second test + mock_redis.reset_mock() + mock_redis.rpop.side_effect = ["task1", "task2", None] + + result = sample_queue.pull_tasks(3) + + assert result == ["task1", "task2"] + assert mock_redis.rpop.call_count == 3 + + @patch("core.rag.pipeline.queue.redis_client") + def test_pull_tasks_zero_count(self, mock_redis, sample_queue): + """Test pulling tasks with zero count returns empty list.""" + result = sample_queue.pull_tasks(0) + + assert result == [] + mock_redis.rpop.assert_not_called() + + @patch("core.rag.pipeline.queue.redis_client") + def test_pull_tasks_negative_count(self, mock_redis, sample_queue): + """Test pulling tasks with negative count returns empty list.""" + result = sample_queue.pull_tasks(-1) + + assert result == [] + mock_redis.rpop.assert_not_called() + + @patch("core.rag.pipeline.queue.redis_client") + def test_pull_tasks_with_wrapped_objects(self, mock_redis, sample_queue): + """Test pulling tasks that include wrapped objects.""" + # Create a wrapped task + task_data = {"task_id": 123, "data": "test"} + wrapper = TaskWrapper(data=task_data) + wrapped_task = wrapper.serialize() + + mock_redis.rpop.side_effect = [ + "string_task", + wrapped_task.encode("utf-8"), # Simulate bytes from Redis + None, + ] + + result = sample_queue.pull_tasks(2) + + assert len(result) == 2 + assert result[0] == "string_task" + assert result[1] == {"task_id": 123, "data": "test"} + + @patch("core.rag.pipeline.queue.redis_client") + def test_pull_tasks_with_invalid_wrapped_data(self, mock_redis, sample_queue): + """Test pulling tasks with invalid JSON falls back to string.""" + # Invalid JSON string that cannot be deserialized + invalid_json = "invalid json data" + mock_redis.rpop.side_effect = [invalid_json, None] + + result = sample_queue.pull_tasks(1) + + assert result == [invalid_json] + + @patch("core.rag.pipeline.queue.redis_client") + def test_pull_tasks_bytes_decoding(self, mock_redis, sample_queue): + """Test pulling tasks handles bytes from Redis correctly.""" + mock_redis.rpop.side_effect = [ + b"task1", # bytes + "task2", # string + None, + ] + + result = sample_queue.pull_tasks(2) + + assert result == ["task1", "task2"] + + @patch("core.rag.pipeline.queue.redis_client") + def test_complex_object_serialization_roundtrip(self, mock_redis, sample_queue): + """Test complex object serialization and deserialization roundtrip.""" + complex_task = { + "id": uuid4().hex, + "data": {"nested": {"deep": [1, 2, 3], "unicode": "测试中文", "special": "!@#$%^&*()"}}, + "metadata": {"created_at": "2024-01-01T00:00:00Z", "tags": ["tag1", "tag2", "tag3"]}, + } + + # Push the complex task + sample_queue.push_tasks([complex_task]) + + # Verify it was serialized as TaskWrapper JSON + call_args = mock_redis.lpush.call_args + wrapped_task = call_args[0][1] + # Verify it's a valid TaskWrapper JSON (starts with {"data":) + assert wrapped_task.startswith('{"data":') + + # Verify it can be deserialized + wrapper = TaskWrapper.deserialize(wrapped_task) + assert wrapper.data == complex_task + + # Simulate pulling it back + mock_redis.rpop.return_value = wrapped_task + result = sample_queue.pull_tasks(1) + + assert len(result) == 1 + assert result[0] == complex_task diff --git a/api/tests/unit_tests/core/test_provider_manager.py b/api/tests/unit_tests/core/test_provider_manager.py index 0c3887beab..dbbda5f74c 100644 --- a/api/tests/unit_tests/core/test_provider_manager.py +++ b/api/tests/unit_tests/core/test_provider_manager.py @@ -28,17 +28,17 @@ def mock_provider_entity(mocker: MockerFixture): def test__to_model_settings(mocker: MockerFixture, mock_provider_entity): # Mocking the inputs - provider_model_settings = [ - ProviderModelSetting( - id="id", - tenant_id="tenant_id", - provider_name="openai", - model_name="gpt-4", - model_type="text-generation", - enabled=True, - load_balancing_enabled=True, - ) - ] + ps = ProviderModelSetting( + tenant_id="tenant_id", + provider_name="openai", + model_name="gpt-4", + model_type="text-generation", + enabled=True, + load_balancing_enabled=True, + ) + ps.id = "id" + + provider_model_settings = [ps] load_balancing_model_configs = [ LoadBalancingModelConfig( id="id1", @@ -88,17 +88,17 @@ def test__to_model_settings(mocker: MockerFixture, mock_provider_entity): def test__to_model_settings_only_one_lb(mocker: MockerFixture, mock_provider_entity): # Mocking the inputs - provider_model_settings = [ - ProviderModelSetting( - id="id", - tenant_id="tenant_id", - provider_name="openai", - model_name="gpt-4", - model_type="text-generation", - enabled=True, - load_balancing_enabled=True, - ) - ] + + ps = ProviderModelSetting( + tenant_id="tenant_id", + provider_name="openai", + model_name="gpt-4", + model_type="text-generation", + enabled=True, + load_balancing_enabled=True, + ) + ps.id = "id" + provider_model_settings = [ps] load_balancing_model_configs = [ LoadBalancingModelConfig( id="id1", @@ -136,17 +136,16 @@ def test__to_model_settings_only_one_lb(mocker: MockerFixture, mock_provider_ent def test__to_model_settings_lb_disabled(mocker: MockerFixture, mock_provider_entity): # Mocking the inputs - provider_model_settings = [ - ProviderModelSetting( - id="id", - tenant_id="tenant_id", - provider_name="openai", - model_name="gpt-4", - model_type="text-generation", - enabled=True, - load_balancing_enabled=False, - ) - ] + ps = ProviderModelSetting( + tenant_id="tenant_id", + provider_name="openai", + model_name="gpt-4", + model_type="text-generation", + enabled=True, + load_balancing_enabled=False, + ) + ps.id = "id" + provider_model_settings = [ps] load_balancing_model_configs = [ LoadBalancingModelConfig( id="id1", diff --git a/api/tests/unit_tests/core/test_trigger_debug_event_selectors.py b/api/tests/unit_tests/core/test_trigger_debug_event_selectors.py new file mode 100644 index 0000000000..2b508ca654 --- /dev/null +++ b/api/tests/unit_tests/core/test_trigger_debug_event_selectors.py @@ -0,0 +1,102 @@ +import hashlib +import json +from datetime import UTC, datetime + +import pytest +import pytz + +from core.trigger.debug import event_selectors +from core.workflow.nodes.trigger_schedule.entities import ScheduleConfig + + +class _DummyRedis: + def __init__(self): + self.store: dict[str, str] = {} + + def get(self, key: str): + return self.store.get(key) + + def setex(self, name: str, time: int, value: str): + self.store[name] = value + + def expire(self, name: str, ttl: int): + # Expiration not required for these tests. + pass + + def delete(self, name: str): + self.store.pop(name, None) + + +@pytest.fixture +def dummy_schedule_config() -> ScheduleConfig: + return ScheduleConfig( + node_id="node-1", + cron_expression="* * * * *", + timezone="Asia/Shanghai", + ) + + +@pytest.fixture(autouse=True) +def patch_schedule_service(monkeypatch: pytest.MonkeyPatch, dummy_schedule_config: ScheduleConfig): + # Ensure poller always receives the deterministic config. + monkeypatch.setattr( + "services.trigger.schedule_service.ScheduleService.to_schedule_config", + staticmethod(lambda *_args, **_kwargs: dummy_schedule_config), + ) + + +def _make_poller( + monkeypatch: pytest.MonkeyPatch, redis_client: _DummyRedis +) -> event_selectors.ScheduleTriggerDebugEventPoller: + monkeypatch.setattr(event_selectors, "redis_client", redis_client) + return event_selectors.ScheduleTriggerDebugEventPoller( + tenant_id="tenant-1", + user_id="user-1", + app_id="app-1", + node_config={"id": "node-1", "data": {"mode": "cron"}}, + node_id="node-1", + ) + + +def test_schedule_poller_handles_aware_next_run(monkeypatch: pytest.MonkeyPatch): + redis_client = _DummyRedis() + poller = _make_poller(monkeypatch, redis_client) + + base_now = datetime(2025, 1, 1, 12, 0, 10) + aware_next_run = datetime(2025, 1, 1, 12, 0, 5, tzinfo=UTC) + + monkeypatch.setattr(event_selectors, "naive_utc_now", lambda: base_now) + monkeypatch.setattr(event_selectors, "calculate_next_run_at", lambda *_: aware_next_run) + + event = poller.poll() + + assert event is not None + assert event.node_id == "node-1" + assert event.workflow_args["inputs"] == {} + + +def test_schedule_runtime_cache_normalizes_timezone( + monkeypatch: pytest.MonkeyPatch, dummy_schedule_config: ScheduleConfig +): + redis_client = _DummyRedis() + poller = _make_poller(monkeypatch, redis_client) + + localized_time = pytz.timezone("Asia/Shanghai").localize(datetime(2025, 1, 1, 20, 0, 0)) + + cron_hash = hashlib.sha256(dummy_schedule_config.cron_expression.encode()).hexdigest() + cache_key = poller.schedule_debug_runtime_key(cron_hash) + + redis_client.store[cache_key] = json.dumps( + { + "cache_key": cache_key, + "timezone": dummy_schedule_config.timezone, + "cron_expression": dummy_schedule_config.cron_expression, + "next_run_at": localized_time.isoformat(), + } + ) + + runtime = poller.get_or_create_schedule_debug_runtime() + + expected = localized_time.astimezone(UTC).replace(tzinfo=None) + assert runtime.next_run_at == expected + assert runtime.next_run_at.tzinfo is None diff --git a/api/tests/unit_tests/core/tools/utils/test_encryption.py b/api/tests/unit_tests/core/tools/utils/test_encryption.py index 3b7c1f5678..94be0bb573 100644 --- a/api/tests/unit_tests/core/tools/utils/test_encryption.py +++ b/api/tests/unit_tests/core/tools/utils/test_encryption.py @@ -4,7 +4,7 @@ from unittest.mock import patch import pytest from core.entities.provider_entities import BasicProviderConfig -from core.tools.utils.encryption import ProviderConfigEncrypter +from core.helper.provider_encryption import ProviderConfigEncrypter # --------------------------- @@ -88,7 +88,7 @@ def test_encrypt_missing_secret_key_is_ok(encrypter_obj): # ============================================================ -# ProviderConfigEncrypter.mask_tool_credentials() +# ProviderConfigEncrypter.mask_plugin_credentials() # ============================================================ @@ -107,7 +107,7 @@ def test_mask_tool_credentials_long_secret(encrypter_obj, raw, prefix, suffix): data_in = {"username": "alice", "password": raw} data_copy = copy.deepcopy(data_in) - out = encrypter_obj.mask_tool_credentials(data_in) + out = encrypter_obj.mask_plugin_credentials(data_in) masked = out["password"] assert masked.startswith(prefix) @@ -122,7 +122,7 @@ def test_mask_tool_credentials_short_secret(encrypter_obj, raw): """ For length <= 6: fully mask with '*' of same length. """ - out = encrypter_obj.mask_tool_credentials({"password": raw}) + out = encrypter_obj.mask_plugin_credentials({"password": raw}) assert out["password"] == ("*" * len(raw)) @@ -131,7 +131,7 @@ def test_mask_tool_credentials_missing_key_noop(encrypter_obj): data_in = {"username": "alice"} data_copy = copy.deepcopy(data_in) - out = encrypter_obj.mask_tool_credentials(data_in) + out = encrypter_obj.mask_plugin_credentials(data_in) assert out["username"] == "alice" assert data_in == data_copy diff --git a/api/tests/unit_tests/core/variables/test_segment_type.py b/api/tests/unit_tests/core/variables/test_segment_type.py index a197b617f3..3bfc5a957f 100644 --- a/api/tests/unit_tests/core/variables/test_segment_type.py +++ b/api/tests/unit_tests/core/variables/test_segment_type.py @@ -1,3 +1,5 @@ +import pytest + from core.variables.types import ArrayValidation, SegmentType @@ -83,3 +85,81 @@ class TestSegmentTypeIsValidArrayValidation: value = [1, 2, 3] # validation is None, skip assert SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.NONE) + + +class TestSegmentTypeGetZeroValue: + """ + Test class for SegmentType.get_zero_value static method. + + Provides comprehensive coverage of all supported SegmentType values to ensure + correct zero value generation for each type. + """ + + def test_array_types_return_empty_list(self): + """Test that all array types return empty list segments.""" + array_types = [ + SegmentType.ARRAY_ANY, + SegmentType.ARRAY_STRING, + SegmentType.ARRAY_NUMBER, + SegmentType.ARRAY_OBJECT, + SegmentType.ARRAY_BOOLEAN, + ] + + for seg_type in array_types: + result = SegmentType.get_zero_value(seg_type) + assert result.value == [] + assert result.value_type == seg_type + + def test_object_returns_empty_dict(self): + """Test that OBJECT type returns empty dictionary segment.""" + result = SegmentType.get_zero_value(SegmentType.OBJECT) + assert result.value == {} + assert result.value_type == SegmentType.OBJECT + + def test_string_returns_empty_string(self): + """Test that STRING type returns empty string segment.""" + result = SegmentType.get_zero_value(SegmentType.STRING) + assert result.value == "" + assert result.value_type == SegmentType.STRING + + def test_integer_returns_zero(self): + """Test that INTEGER type returns zero segment.""" + result = SegmentType.get_zero_value(SegmentType.INTEGER) + assert result.value == 0 + assert result.value_type == SegmentType.INTEGER + + def test_float_returns_zero_point_zero(self): + """Test that FLOAT type returns 0.0 segment.""" + result = SegmentType.get_zero_value(SegmentType.FLOAT) + assert result.value == 0.0 + assert result.value_type == SegmentType.FLOAT + + def test_number_returns_zero(self): + """Test that NUMBER type returns zero segment.""" + result = SegmentType.get_zero_value(SegmentType.NUMBER) + assert result.value == 0 + # NUMBER type with integer value returns INTEGER segment type + # (NUMBER is a union type that can be INTEGER or FLOAT) + assert result.value_type == SegmentType.INTEGER + # Verify that exposed_type returns NUMBER for frontend compatibility + assert result.value_type.exposed_type() == SegmentType.NUMBER + + def test_boolean_returns_false(self): + """Test that BOOLEAN type returns False segment.""" + result = SegmentType.get_zero_value(SegmentType.BOOLEAN) + assert result.value is False + assert result.value_type == SegmentType.BOOLEAN + + def test_unsupported_types_raise_value_error(self): + """Test that unsupported types raise ValueError.""" + unsupported_types = [ + SegmentType.SECRET, + SegmentType.FILE, + SegmentType.NONE, + SegmentType.GROUP, + SegmentType.ARRAY_FILE, + ] + + for seg_type in unsupported_types: + with pytest.raises(ValueError, match="unsupported variable type"): + SegmentType.get_zero_value(seg_type) diff --git a/api/tests/unit_tests/core/variables/test_segment_type_validation.py b/api/tests/unit_tests/core/variables/test_segment_type_validation.py index e0541280d3..3a0054cd46 100644 --- a/api/tests/unit_tests/core/variables/test_segment_type_validation.py +++ b/api/tests/unit_tests/core/variables/test_segment_type_validation.py @@ -12,6 +12,16 @@ import pytest from core.file.enums import FileTransferMethod, FileType from core.file.models import File +from core.variables.segment_group import SegmentGroup +from core.variables.segments import ( + ArrayFileSegment, + BooleanSegment, + FileSegment, + IntegerSegment, + NoneSegment, + ObjectSegment, + StringSegment, +) from core.variables.types import ArrayValidation, SegmentType @@ -202,6 +212,45 @@ def get_none_cases() -> list[ValidationTestCase]: ] +def get_group_cases() -> list[ValidationTestCase]: + """Get test cases for valid group values.""" + test_file = create_test_file() + segments = [ + StringSegment(value="hello"), + IntegerSegment(value=42), + BooleanSegment(value=True), + ObjectSegment(value={"key": "value"}), + FileSegment(value=test_file), + NoneSegment(value=None), + ] + + return [ + # valid cases + ValidationTestCase( + SegmentType.GROUP, SegmentGroup(value=segments), True, "Valid SegmentGroup with mixed segments" + ), + ValidationTestCase( + SegmentType.GROUP, [StringSegment(value="test"), IntegerSegment(value=123)], True, "List of Segment objects" + ), + ValidationTestCase(SegmentType.GROUP, SegmentGroup(value=[]), True, "Empty SegmentGroup"), + ValidationTestCase(SegmentType.GROUP, [], True, "Empty list"), + # invalid cases + ValidationTestCase(SegmentType.GROUP, "not a list", False, "String value"), + ValidationTestCase(SegmentType.GROUP, 123, False, "Integer value"), + ValidationTestCase(SegmentType.GROUP, True, False, "Boolean value"), + ValidationTestCase(SegmentType.GROUP, None, False, "None value"), + ValidationTestCase(SegmentType.GROUP, {"key": "value"}, False, "Dict value"), + ValidationTestCase(SegmentType.GROUP, test_file, False, "File value"), + ValidationTestCase(SegmentType.GROUP, ["string", 123, True], False, "List with non-Segment objects"), + ValidationTestCase( + SegmentType.GROUP, + [StringSegment(value="test"), "not a segment"], + False, + "Mixed list with some non-Segment objects", + ), + ] + + def get_array_any_validation_cases() -> list[ArrayValidationTestCase]: """Get test cases for ARRAY_ANY validation.""" return [ @@ -477,11 +526,77 @@ class TestSegmentTypeIsValid: def test_none_validation_valid_cases(self, case): assert case.segment_type.is_valid(case.value) == case.expected - def test_unsupported_segment_type_raises_assertion_error(self): - """Test that unsupported SegmentType values raise AssertionError.""" - # GROUP is not handled in is_valid method - with pytest.raises(AssertionError, match="this statement should be unreachable"): - SegmentType.GROUP.is_valid("any value") + @pytest.mark.parametrize("case", get_group_cases(), ids=lambda case: case.description) + def test_group_validation(self, case): + """Test GROUP type validation with various inputs.""" + assert case.segment_type.is_valid(case.value) == case.expected + + def test_group_validation_edge_cases(self): + """Test GROUP validation edge cases.""" + test_file = create_test_file() + + # Test with nested SegmentGroups + inner_group = SegmentGroup(value=[StringSegment(value="inner"), IntegerSegment(value=42)]) + outer_group = SegmentGroup(value=[StringSegment(value="outer"), inner_group]) + assert SegmentType.GROUP.is_valid(outer_group) is True + + # Test with ArrayFileSegment (which is also a Segment) + file_segment = FileSegment(value=test_file) + array_file_segment = ArrayFileSegment(value=[test_file, test_file]) + group_with_arrays = SegmentGroup(value=[file_segment, array_file_segment, StringSegment(value="test")]) + assert SegmentType.GROUP.is_valid(group_with_arrays) is True + + # Test performance with large number of segments + large_segment_list = [StringSegment(value=f"item_{i}") for i in range(1000)] + large_group = SegmentGroup(value=large_segment_list) + assert SegmentType.GROUP.is_valid(large_group) is True + + def test_no_truly_unsupported_segment_types_exist(self): + """Test that all SegmentType enum values are properly handled in is_valid method. + + This test ensures there are no SegmentType values that would raise AssertionError. + If this test fails, it means a new SegmentType was added without proper validation support. + """ + # Test that ALL segment types are handled and don't raise AssertionError + all_segment_types = set(SegmentType) + + for segment_type in all_segment_types: + # Create a valid test value for each type + test_value: Any = None + if segment_type == SegmentType.STRING: + test_value = "test" + elif segment_type in {SegmentType.NUMBER, SegmentType.INTEGER}: + test_value = 42 + elif segment_type == SegmentType.FLOAT: + test_value = 3.14 + elif segment_type == SegmentType.BOOLEAN: + test_value = True + elif segment_type == SegmentType.OBJECT: + test_value = {"key": "value"} + elif segment_type == SegmentType.SECRET: + test_value = "secret" + elif segment_type == SegmentType.FILE: + test_value = create_test_file() + elif segment_type == SegmentType.NONE: + test_value = None + elif segment_type == SegmentType.GROUP: + test_value = SegmentGroup(value=[StringSegment(value="test")]) + elif segment_type.is_array_type(): + test_value = [] # Empty array is valid for all array types + else: + # If we get here, there's a segment type we don't know how to test + # This should prompt us to add validation logic + pytest.fail(f"Unknown segment type {segment_type} needs validation logic and test case") + + # This should NOT raise AssertionError + try: + result = segment_type.is_valid(test_value) + assert isinstance(result, bool), f"is_valid should return boolean for {segment_type}" + except AssertionError as e: + pytest.fail( + f"SegmentType.{segment_type.name}.is_valid() raised AssertionError: {e}. " + "This segment type needs to be handled in the is_valid method." + ) class TestSegmentTypeArrayValidation: @@ -611,6 +726,7 @@ class TestSegmentTypeValidationIntegration: SegmentType.SECRET, SegmentType.FILE, SegmentType.NONE, + SegmentType.GROUP, ] for segment_type in non_array_types: @@ -630,6 +746,8 @@ class TestSegmentTypeValidationIntegration: valid_value = create_test_file() elif segment_type == SegmentType.NONE: valid_value = None + elif segment_type == SegmentType.GROUP: + valid_value = SegmentGroup(value=[StringSegment(value="test")]) else: continue # Skip unsupported types @@ -656,6 +774,7 @@ class TestSegmentTypeValidationIntegration: SegmentType.SECRET, SegmentType.FILE, SegmentType.NONE, + SegmentType.GROUP, # Array types SegmentType.ARRAY_ANY, SegmentType.ARRAY_STRING, @@ -667,7 +786,6 @@ class TestSegmentTypeValidationIntegration: # Types that are not handled by is_valid (should raise AssertionError) unhandled_types = { - SegmentType.GROUP, SegmentType.INTEGER, # Handled by NUMBER validation logic SegmentType.FLOAT, # Handled by NUMBER validation logic } @@ -696,6 +814,8 @@ class TestSegmentTypeValidationIntegration: assert segment_type.is_valid(create_test_file()) is True elif segment_type == SegmentType.NONE: assert segment_type.is_valid(None) is True + elif segment_type == SegmentType.GROUP: + assert segment_type.is_valid(SegmentGroup(value=[StringSegment(value="test")])) is True def test_boolean_vs_integer_type_distinction(self): """Test the important distinction between boolean and integer types in validation.""" diff --git a/api/tests/unit_tests/core/workflow/entities/test_private_workflow_pause.py b/api/tests/unit_tests/core/workflow/entities/test_private_workflow_pause.py new file mode 100644 index 0000000000..ccb2dff85a --- /dev/null +++ b/api/tests/unit_tests/core/workflow/entities/test_private_workflow_pause.py @@ -0,0 +1,171 @@ +"""Tests for _PrivateWorkflowPauseEntity implementation.""" + +from datetime import datetime +from unittest.mock import MagicMock, patch + +from models.workflow import WorkflowPause as WorkflowPauseModel +from repositories.sqlalchemy_api_workflow_run_repository import _PrivateWorkflowPauseEntity + + +class TestPrivateWorkflowPauseEntity: + """Test _PrivateWorkflowPauseEntity implementation.""" + + def test_entity_initialization(self): + """Test entity initialization with required parameters.""" + # Create mock models + mock_pause_model = MagicMock(spec=WorkflowPauseModel) + mock_pause_model.id = "pause-123" + mock_pause_model.workflow_run_id = "execution-456" + mock_pause_model.resumed_at = None + + # Create entity + entity = _PrivateWorkflowPauseEntity( + pause_model=mock_pause_model, + ) + + # Verify initialization + assert entity._pause_model is mock_pause_model + assert entity._cached_state is None + + def test_from_models_classmethod(self): + """Test from_models class method.""" + # Create mock models + mock_pause_model = MagicMock(spec=WorkflowPauseModel) + mock_pause_model.id = "pause-123" + mock_pause_model.workflow_run_id = "execution-456" + + # Create entity using from_models + entity = _PrivateWorkflowPauseEntity.from_models( + workflow_pause_model=mock_pause_model, + ) + + # Verify entity creation + assert isinstance(entity, _PrivateWorkflowPauseEntity) + assert entity._pause_model is mock_pause_model + + def test_id_property(self): + """Test id property returns pause model ID.""" + mock_pause_model = MagicMock(spec=WorkflowPauseModel) + mock_pause_model.id = "pause-123" + + entity = _PrivateWorkflowPauseEntity( + pause_model=mock_pause_model, + ) + + assert entity.id == "pause-123" + + def test_workflow_execution_id_property(self): + """Test workflow_execution_id property returns workflow run ID.""" + mock_pause_model = MagicMock(spec=WorkflowPauseModel) + mock_pause_model.workflow_run_id = "execution-456" + + entity = _PrivateWorkflowPauseEntity( + pause_model=mock_pause_model, + ) + + assert entity.workflow_execution_id == "execution-456" + + def test_resumed_at_property(self): + """Test resumed_at property returns pause model resumed_at.""" + resumed_at = datetime(2023, 12, 25, 15, 30, 45) + + mock_pause_model = MagicMock(spec=WorkflowPauseModel) + mock_pause_model.resumed_at = resumed_at + + entity = _PrivateWorkflowPauseEntity( + pause_model=mock_pause_model, + ) + + assert entity.resumed_at == resumed_at + + def test_resumed_at_property_none(self): + """Test resumed_at property returns None when not set.""" + mock_pause_model = MagicMock(spec=WorkflowPauseModel) + mock_pause_model.resumed_at = None + + entity = _PrivateWorkflowPauseEntity( + pause_model=mock_pause_model, + ) + + assert entity.resumed_at is None + + @patch("repositories.sqlalchemy_api_workflow_run_repository.storage") + def test_get_state_first_call(self, mock_storage): + """Test get_state loads from storage on first call.""" + state_data = b'{"test": "data", "step": 5}' + mock_storage.load.return_value = state_data + + mock_pause_model = MagicMock(spec=WorkflowPauseModel) + mock_pause_model.state_object_key = "test-state-key" + + entity = _PrivateWorkflowPauseEntity( + pause_model=mock_pause_model, + ) + + # First call should load from storage + result = entity.get_state() + + assert result == state_data + mock_storage.load.assert_called_once_with("test-state-key") + assert entity._cached_state == state_data + + @patch("repositories.sqlalchemy_api_workflow_run_repository.storage") + def test_get_state_cached_call(self, mock_storage): + """Test get_state returns cached data on subsequent calls.""" + state_data = b'{"test": "data", "step": 5}' + mock_storage.load.return_value = state_data + + mock_pause_model = MagicMock(spec=WorkflowPauseModel) + mock_pause_model.state_object_key = "test-state-key" + + entity = _PrivateWorkflowPauseEntity( + pause_model=mock_pause_model, + ) + + # First call + result1 = entity.get_state() + # Second call should use cache + result2 = entity.get_state() + + assert result1 == state_data + assert result2 == state_data + # Storage should only be called once + mock_storage.load.assert_called_once_with("test-state-key") + + @patch("repositories.sqlalchemy_api_workflow_run_repository.storage") + def test_get_state_with_pre_cached_data(self, mock_storage): + """Test get_state returns pre-cached data.""" + state_data = b'{"test": "data", "step": 5}' + + mock_pause_model = MagicMock(spec=WorkflowPauseModel) + + entity = _PrivateWorkflowPauseEntity( + pause_model=mock_pause_model, + ) + + # Pre-cache data + entity._cached_state = state_data + + # Should return cached data without calling storage + result = entity.get_state() + + assert result == state_data + mock_storage.load.assert_not_called() + + def test_entity_with_binary_state_data(self): + """Test entity with binary state data.""" + # Test with binary data that's not valid JSON + binary_data = b"\x00\x01\x02\x03\x04\x05\xff\xfe" + + with patch("repositories.sqlalchemy_api_workflow_run_repository.storage") as mock_storage: + mock_storage.load.return_value = binary_data + + mock_pause_model = MagicMock(spec=WorkflowPauseModel) + + entity = _PrivateWorkflowPauseEntity( + pause_model=mock_pause_model, + ) + + result = entity.get_state() + + assert result == binary_data diff --git a/api/tests/unit_tests/core/workflow/entities/test_variable_pool.py b/api/tests/unit_tests/core/workflow/entities/test_variable_pool.py index f9de456b19..18f6753b05 100644 --- a/api/tests/unit_tests/core/workflow/entities/test_variable_pool.py +++ b/api/tests/unit_tests/core/workflow/entities/test_variable_pool.py @@ -111,3 +111,26 @@ class TestVariablePoolGetAndNestedAttribute: assert segment_false is not None assert isinstance(segment_false, BooleanSegment) assert segment_false.value is False + + +class TestVariablePoolGetNotModifyVariableDictionary: + _NODE_ID = "start" + _VAR_NAME = "name" + + def test_convert_to_template_should_not_introduce_extra_keys(self): + pool = VariablePool.empty() + pool.add([self._NODE_ID, self._VAR_NAME], 0) + pool.convert_template("The start.name is {{#start.name#}}") + assert "The start" not in pool.variable_dictionary + + def test_get_should_not_modify_variable_dictionary(self): + pool = VariablePool.empty() + pool.get([self._NODE_ID, self._VAR_NAME]) + assert len(pool.variable_dictionary) == 1 # only contains `sys` node id + assert "start" not in pool.variable_dictionary + + pool = VariablePool.empty() + pool.add([self._NODE_ID, self._VAR_NAME], "Joe") + pool.get([self._NODE_ID, "count"]) + start_subdict = pool.variable_dictionary[self._NODE_ID] + assert "count" not in start_subdict diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py b/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py index b55d4998c4..c55c40c5b4 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py @@ -64,6 +64,15 @@ class _TestNode(Node): ) self.data = dict(data) + node_type_value = data.get("type") + if isinstance(node_type_value, NodeType): + self.node_type = node_type_value + elif isinstance(node_type_value, str): + try: + self.node_type = NodeType(node_type_value) + except ValueError: + pass + def _run(self): raise NotImplementedError @@ -179,3 +188,22 @@ def test_graph_promotes_fail_branch_nodes_to_branch_execution_type( graph = Graph.init(graph_config=graph_config, node_factory=node_factory) assert graph.nodes["branch"].execution_type == NodeExecutionType.BRANCH + + +def test_graph_validation_blocks_start_and_trigger_coexistence( + graph_init_dependencies: tuple[_SimpleNodeFactory, dict[str, object]], +) -> None: + node_factory, graph_config = graph_init_dependencies + graph_config["nodes"] = [ + {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, + { + "id": "trigger", + "data": {"type": NodeType.TRIGGER_WEBHOOK, "title": "Webhook", "execution_type": NodeExecutionType.ROOT}, + }, + ] + graph_config["edges"] = [] + + with pytest.raises(GraphValidationError) as exc_info: + Graph.init(graph_config=graph_config, node_factory=node_factory) + + assert any(issue.code == "TRIGGER_START_NODE_CONFLICT" for issue in exc_info.value.issues) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py b/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py new file mode 100644 index 0000000000..e6d4508fdf --- /dev/null +++ b/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py @@ -0,0 +1,189 @@ +"""Tests for dispatcher command checking behavior.""" + +from __future__ import annotations + +import queue +from datetime import datetime +from unittest import mock + +from core.workflow.entities.pause_reason import SchedulingPause +from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus +from core.workflow.graph_engine.event_management.event_handlers import EventHandler +from core.workflow.graph_engine.orchestration.dispatcher import Dispatcher +from core.workflow.graph_engine.orchestration.execution_coordinator import ExecutionCoordinator +from core.workflow.graph_events import ( + GraphNodeEventBase, + NodeRunPauseRequestedEvent, + NodeRunStartedEvent, + NodeRunSucceededEvent, +) +from core.workflow.node_events import NodeRunResult + + +def test_dispatcher_should_consume_remains_events_after_pause(): + event_queue = queue.Queue() + event_queue.put( + GraphNodeEventBase( + id="test", + node_id="test", + node_type=NodeType.START, + ) + ) + event_handler = mock.Mock(spec=EventHandler) + execution_coordinator = mock.Mock(spec=ExecutionCoordinator) + execution_coordinator.paused.return_value = True + dispatcher = Dispatcher( + event_queue=event_queue, + event_handler=event_handler, + execution_coordinator=execution_coordinator, + ) + dispatcher._dispatcher_loop() + assert event_queue.empty() + + +class _StubExecutionCoordinator: + """Stub execution coordinator that tracks command checks.""" + + def __init__(self) -> None: + self.command_checks = 0 + self.scaling_checks = 0 + self.execution_complete = False + self.failed = False + self._paused = False + + def process_commands(self) -> None: + self.command_checks += 1 + + def check_scaling(self) -> None: + self.scaling_checks += 1 + + @property + def paused(self) -> bool: + return self._paused + + @property + def aborted(self) -> bool: + return False + + def mark_complete(self) -> None: + self.execution_complete = True + + def mark_failed(self, error: Exception) -> None: # pragma: no cover - defensive, not triggered in tests + self.failed = True + + +class _StubEventHandler: + """Minimal event handler that marks execution complete after handling an event.""" + + def __init__(self, coordinator: _StubExecutionCoordinator) -> None: + self._coordinator = coordinator + self.events = [] + + def dispatch(self, event) -> None: + self.events.append(event) + self._coordinator.mark_complete() + + +def _run_dispatcher_for_event(event) -> int: + """Run the dispatcher loop for a single event and return command check count.""" + event_queue: queue.Queue = queue.Queue() + event_queue.put(event) + + coordinator = _StubExecutionCoordinator() + event_handler = _StubEventHandler(coordinator) + + dispatcher = Dispatcher( + event_queue=event_queue, + event_handler=event_handler, + execution_coordinator=coordinator, + ) + + dispatcher._dispatcher_loop() + + return coordinator.command_checks + + +def _make_started_event() -> NodeRunStartedEvent: + return NodeRunStartedEvent( + id="start-event", + node_id="node-1", + node_type=NodeType.CODE, + node_title="Test Node", + start_at=datetime.utcnow(), + ) + + +def _make_succeeded_event() -> NodeRunSucceededEvent: + return NodeRunSucceededEvent( + id="success-event", + node_id="node-1", + node_type=NodeType.CODE, + node_title="Test Node", + start_at=datetime.utcnow(), + node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED), + ) + + +def test_dispatcher_checks_commands_during_idle_and_on_completion() -> None: + """Dispatcher polls commands when idle and after completion events.""" + started_checks = _run_dispatcher_for_event(_make_started_event()) + succeeded_checks = _run_dispatcher_for_event(_make_succeeded_event()) + + assert started_checks == 2 + assert succeeded_checks == 3 + + +class _PauseStubEventHandler: + """Minimal event handler that marks execution complete after handling an event.""" + + def __init__(self, coordinator: _StubExecutionCoordinator) -> None: + self._coordinator = coordinator + self.events = [] + + def dispatch(self, event) -> None: + self.events.append(event) + if isinstance(event, NodeRunPauseRequestedEvent): + self._coordinator.mark_complete() + + +def test_dispatcher_drain_event_queue(): + events = [ + NodeRunStartedEvent( + id="start-event", + node_id="node-1", + node_type=NodeType.CODE, + node_title="Code", + start_at=datetime.utcnow(), + ), + NodeRunPauseRequestedEvent( + id="pause-event", + node_id="node-1", + node_type=NodeType.CODE, + reason=SchedulingPause(message="test pause"), + ), + NodeRunSucceededEvent( + id="success-event", + node_id="node-1", + node_type=NodeType.CODE, + start_at=datetime.utcnow(), + node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED), + ), + ] + + event_queue: queue.Queue = queue.Queue() + for e in events: + event_queue.put(e) + + coordinator = _StubExecutionCoordinator() + event_handler = _PauseStubEventHandler(coordinator) + + dispatcher = Dispatcher( + event_queue=event_queue, + event_handler=event_handler, + execution_coordinator=coordinator, + ) + + dispatcher._dispatcher_loop() + + # ensure all events are drained. + assert event_queue.empty() diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_command_system.py b/api/tests/unit_tests/core/workflow/graph_engine/test_command_system.py index d451e7e608..868edf9832 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_command_system.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_command_system.py @@ -3,12 +3,17 @@ import time from unittest.mock import MagicMock +from core.app.entities.app_invoke_entities import InvokeFrom +from core.workflow.entities.graph_init_params import GraphInitParams +from core.workflow.entities.pause_reason import SchedulingPause from core.workflow.graph import Graph from core.workflow.graph_engine import GraphEngine from core.workflow.graph_engine.command_channels import InMemoryChannel from core.workflow.graph_engine.entities.commands import AbortCommand, CommandType, PauseCommand from core.workflow.graph_events import GraphRunAbortedEvent, GraphRunPausedEvent, GraphRunStartedEvent +from core.workflow.nodes.start.start_node import StartNode from core.workflow.runtime import GraphRuntimeState, VariablePool +from models.enums import UserFrom def test_abort_command(): @@ -25,11 +30,23 @@ def test_abort_command(): mock_graph.root_node.id = "start" # Create mock nodes with required attributes - using shared runtime state - mock_start_node = MagicMock() - mock_start_node.state = None - mock_start_node.id = "start" - mock_start_node.graph_runtime_state = shared_runtime_state # Use shared instance - mock_graph.nodes["start"] = mock_start_node + start_node = StartNode( + id="start", + config={"id": "start"}, + graph_init_params=GraphInitParams( + tenant_id="test_tenant", + app_id="test_app", + workflow_id="test_workflow", + graph_config={}, + user_id="test_user", + user_from=UserFrom.ACCOUNT, + invoke_from=InvokeFrom.DEBUGGER, + call_depth=0, + ), + graph_runtime_state=shared_runtime_state, + ) + start_node.init_node_data({"title": "start", "variables": []}) + mock_graph.nodes["start"] = start_node # Mock graph methods mock_graph.get_outgoing_edges = MagicMock(return_value=[]) @@ -123,11 +140,23 @@ def test_pause_command(): mock_graph.root_node = MagicMock() mock_graph.root_node.id = "start" - mock_start_node = MagicMock() - mock_start_node.state = None - mock_start_node.id = "start" - mock_start_node.graph_runtime_state = shared_runtime_state - mock_graph.nodes["start"] = mock_start_node + start_node = StartNode( + id="start", + config={"id": "start"}, + graph_init_params=GraphInitParams( + tenant_id="test_tenant", + app_id="test_app", + workflow_id="test_workflow", + graph_config={}, + user_id="test_user", + user_from=UserFrom.ACCOUNT, + invoke_from=InvokeFrom.DEBUGGER, + call_depth=0, + ), + graph_runtime_state=shared_runtime_state, + ) + start_node.init_node_data({"title": "start", "variables": []}) + mock_graph.nodes["start"] = start_node mock_graph.get_outgoing_edges = MagicMock(return_value=[]) mock_graph.get_incoming_edges = MagicMock(return_value=[]) @@ -149,8 +178,8 @@ def test_pause_command(): assert any(isinstance(e, GraphRunStartedEvent) for e in events) pause_events = [e for e in events if isinstance(e, GraphRunPausedEvent)] assert len(pause_events) == 1 - assert pause_events[0].reason == "User requested pause" + assert pause_events[0].reason == SchedulingPause(message="User requested pause") graph_execution = engine.graph_runtime_state.graph_execution - assert graph_execution.is_paused - assert graph_execution.pause_reason == "User requested pause" + assert graph_execution.paused + assert graph_execution.pause_reason == SchedulingPause(message="User requested pause") diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_database_utils.py b/api/tests/unit_tests/core/workflow/graph_engine/test_database_utils.py new file mode 100644 index 0000000000..ae7dd48bb1 --- /dev/null +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_database_utils.py @@ -0,0 +1,46 @@ +""" +Utilities for detecting if database service is available for workflow tests. +""" + +import psycopg2 +import pytest + +from configs import dify_config + + +def is_database_available() -> bool: + """ + Check if the database service is available by attempting to connect to it. + + Returns: + True if database is available, False otherwise. + """ + try: + # Try to establish a database connection using a context manager + with psycopg2.connect( + host=dify_config.DB_HOST, + port=dify_config.DB_PORT, + database=dify_config.DB_DATABASE, + user=dify_config.DB_USERNAME, + password=dify_config.DB_PASSWORD, + connect_timeout=2, # 2 second timeout + ) as conn: + pass # Connection established and will be closed automatically + return True + except (psycopg2.OperationalError, psycopg2.Error): + return False + + +def skip_if_database_unavailable(): + """ + Pytest skip decorator that skips tests when database service is unavailable. + + Usage: + @skip_if_database_unavailable() + def test_my_workflow(): + ... + """ + return pytest.mark.skipif( + not is_database_available(), + reason="Database service is not available (connection refused or authentication failed)", + ) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher.py b/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher.py deleted file mode 100644 index 3fe4ce3400..0000000000 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher.py +++ /dev/null @@ -1,109 +0,0 @@ -"""Tests for dispatcher command checking behavior.""" - -from __future__ import annotations - -import queue -from datetime import datetime - -from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus -from core.workflow.graph_engine.event_management.event_manager import EventManager -from core.workflow.graph_engine.orchestration.dispatcher import Dispatcher -from core.workflow.graph_events import NodeRunStartedEvent, NodeRunSucceededEvent -from core.workflow.node_events import NodeRunResult - - -class _StubExecutionCoordinator: - """Stub execution coordinator that tracks command checks.""" - - def __init__(self) -> None: - self.command_checks = 0 - self.scaling_checks = 0 - self._execution_complete = False - self.mark_complete_called = False - self.failed = False - self._paused = False - - def check_commands(self) -> None: - self.command_checks += 1 - - def check_scaling(self) -> None: - self.scaling_checks += 1 - - @property - def is_paused(self) -> bool: - return self._paused - - def is_execution_complete(self) -> bool: - return self._execution_complete - - def mark_complete(self) -> None: - self.mark_complete_called = True - - def mark_failed(self, error: Exception) -> None: # pragma: no cover - defensive, not triggered in tests - self.failed = True - - def set_execution_complete(self) -> None: - self._execution_complete = True - - -class _StubEventHandler: - """Minimal event handler that marks execution complete after handling an event.""" - - def __init__(self, coordinator: _StubExecutionCoordinator) -> None: - self._coordinator = coordinator - self.events = [] - - def dispatch(self, event) -> None: - self.events.append(event) - self._coordinator.set_execution_complete() - - -def _run_dispatcher_for_event(event) -> int: - """Run the dispatcher loop for a single event and return command check count.""" - event_queue: queue.Queue = queue.Queue() - event_queue.put(event) - - coordinator = _StubExecutionCoordinator() - event_handler = _StubEventHandler(coordinator) - event_manager = EventManager() - - dispatcher = Dispatcher( - event_queue=event_queue, - event_handler=event_handler, - event_collector=event_manager, - execution_coordinator=coordinator, - ) - - dispatcher._dispatcher_loop() - - return coordinator.command_checks - - -def _make_started_event() -> NodeRunStartedEvent: - return NodeRunStartedEvent( - id="start-event", - node_id="node-1", - node_type=NodeType.CODE, - node_title="Test Node", - start_at=datetime.utcnow(), - ) - - -def _make_succeeded_event() -> NodeRunSucceededEvent: - return NodeRunSucceededEvent( - id="success-event", - node_id="node-1", - node_type=NodeType.CODE, - node_title="Test Node", - start_at=datetime.utcnow(), - node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED), - ) - - -def test_dispatcher_checks_commands_during_idle_and_on_completion() -> None: - """Dispatcher polls commands when idle and after completion events.""" - started_checks = _run_dispatcher_for_event(_make_started_event()) - succeeded_checks = _run_dispatcher_for_event(_make_succeeded_event()) - - assert started_checks == 1 - assert succeeded_checks == 2 diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_execution_coordinator.py b/api/tests/unit_tests/core/workflow/graph_engine/test_execution_coordinator.py index 025393e435..0d67a76169 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_execution_coordinator.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_execution_coordinator.py @@ -48,15 +48,3 @@ def test_handle_pause_noop_when_execution_running() -> None: worker_pool.stop.assert_not_called() state_manager.clear_executing.assert_not_called() - - -def test_is_execution_complete_when_paused() -> None: - """Paused execution should be treated as complete.""" - graph_execution = GraphExecution(workflow_id="workflow") - graph_execution.start() - graph_execution.pause("Awaiting input") - - coordinator, state_manager, _worker_pool = _build_coordinator(graph_execution) - state_manager.is_execution_complete.return_value = False - - assert coordinator.is_execution_complete() diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_iteration_flatten_output.py b/api/tests/unit_tests/core/workflow/graph_engine/test_iteration_flatten_output.py new file mode 100644 index 0000000000..98f344babf --- /dev/null +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_iteration_flatten_output.py @@ -0,0 +1,100 @@ +""" +Test cases for the Iteration node's flatten_output functionality. + +This module tests the iteration node's ability to: +1. Flatten array outputs when flatten_output=True (default) +2. Preserve nested array structure when flatten_output=False +""" + +from .test_database_utils import skip_if_database_unavailable +from .test_table_runner import TableTestRunner, WorkflowTestCase + + +@skip_if_database_unavailable() +def test_iteration_with_flatten_output_enabled(): + """ + Test iteration node with flatten_output=True (default behavior). + + The fixture implements an iteration that: + 1. Iterates over [1, 2, 3] + 2. For each item, outputs [item, item*2] + 3. With flatten_output=True, should output [1, 2, 2, 4, 3, 6] + """ + runner = TableTestRunner() + + test_case = WorkflowTestCase( + fixture_path="iteration_flatten_output_enabled_workflow", + inputs={}, + expected_outputs={"output": [1, 2, 2, 4, 3, 6]}, + description="Iteration with flatten_output=True flattens nested arrays", + use_auto_mock=False, # Run code nodes directly + ) + + result = runner.run_test_case(test_case) + + assert result.success, f"Test failed: {result.error}" + assert result.actual_outputs is not None, "Should have outputs" + assert result.actual_outputs == {"output": [1, 2, 2, 4, 3, 6]}, ( + f"Expected flattened output [1, 2, 2, 4, 3, 6], got {result.actual_outputs}" + ) + + +@skip_if_database_unavailable() +def test_iteration_with_flatten_output_disabled(): + """ + Test iteration node with flatten_output=False. + + The fixture implements an iteration that: + 1. Iterates over [1, 2, 3] + 2. For each item, outputs [item, item*2] + 3. With flatten_output=False, should output [[1, 2], [2, 4], [3, 6]] + """ + runner = TableTestRunner() + + test_case = WorkflowTestCase( + fixture_path="iteration_flatten_output_disabled_workflow", + inputs={}, + expected_outputs={"output": [[1, 2], [2, 4], [3, 6]]}, + description="Iteration with flatten_output=False preserves nested structure", + use_auto_mock=False, # Run code nodes directly + ) + + result = runner.run_test_case(test_case) + + assert result.success, f"Test failed: {result.error}" + assert result.actual_outputs is not None, "Should have outputs" + assert result.actual_outputs == {"output": [[1, 2], [2, 4], [3, 6]]}, ( + f"Expected nested output [[1, 2], [2, 4], [3, 6]], got {result.actual_outputs}" + ) + + +@skip_if_database_unavailable() +def test_iteration_flatten_output_comparison(): + """ + Run both flatten_output configurations in parallel to verify the difference. + """ + runner = TableTestRunner() + + test_cases = [ + WorkflowTestCase( + fixture_path="iteration_flatten_output_enabled_workflow", + inputs={}, + expected_outputs={"output": [1, 2, 2, 4, 3, 6]}, + description="flatten_output=True: Flattened output", + use_auto_mock=False, # Run code nodes directly + ), + WorkflowTestCase( + fixture_path="iteration_flatten_output_disabled_workflow", + inputs={}, + expected_outputs={"output": [[1, 2], [2, 4], [3, 6]]}, + description="flatten_output=False: Nested output", + use_auto_mock=False, # Run code nodes directly + ), + ] + + suite_result = runner.run_table_tests(test_cases, parallel=True) + + # Assert all tests passed + assert suite_result.passed_tests == 2, f"Expected 2 passed tests, got {suite_result.passed_tests}" + assert suite_result.failed_tests == 0, f"Expected 0 failed tests, got {suite_result.failed_tests}" + assert suite_result.success_rate == 100.0, f"Expected 100% success rate, got {suite_result.success_rate}" diff --git a/api/tests/unit_tests/core/workflow/nodes/webhook/test_entities.py b/api/tests/unit_tests/core/workflow/nodes/webhook/test_entities.py index 97e2a59578..4fa9a01b61 100644 --- a/api/tests/unit_tests/core/workflow/nodes/webhook/test_entities.py +++ b/api/tests/unit_tests/core/workflow/nodes/webhook/test_entities.py @@ -30,7 +30,7 @@ def test_content_type_enum(): assert ContentType.FORM_DATA == "multipart/form-data" assert ContentType.FORM_URLENCODED == "application/x-www-form-urlencoded" assert ContentType.TEXT == "text/plain" - assert ContentType.FORM == "form" + assert ContentType.BINARY == "application/octet-stream" # Test all enum values are strings for content_type in ContentType: @@ -79,7 +79,17 @@ def test_webhook_body_parameter_creation(): def test_webhook_body_parameter_types(): """Test WebhookBodyParameter type validation.""" - valid_types = ["string", "number", "boolean", "object", "array", "file"] + valid_types = [ + "string", + "number", + "boolean", + "object", + "array[string]", + "array[number]", + "array[boolean]", + "array[object]", + "file", + ] for param_type in valid_types: param = WebhookBodyParameter(name="test", type=param_type) @@ -127,7 +137,7 @@ def test_webhook_data_creation_full(): title="Full Webhook Test", desc="A comprehensive webhook test", method=Method.POST, - **{"content-type": ContentType.FORM_DATA}, + content_type=ContentType.FORM_DATA, headers=headers, params=params, body=body, @@ -151,19 +161,13 @@ def test_webhook_data_creation_full(): def test_webhook_data_content_type_alias(): - """Test WebhookData content_type field alias.""" - # Test using the alias "content-type" - data1 = WebhookData(title="Test", **{"content-type": "application/json"}) + """Test WebhookData content_type accepts both strings and enum values.""" + data1 = WebhookData(title="Test", content_type="application/json") assert data1.content_type == ContentType.JSON - # Test using the alias with enum value - data2 = WebhookData(title="Test", **{"content-type": ContentType.FORM_DATA}) + data2 = WebhookData(title="Test", content_type=ContentType.FORM_DATA) assert data2.content_type == ContentType.FORM_DATA - # Test both approaches result in same field - assert hasattr(data1, "content_type") - assert hasattr(data2, "content_type") - def test_webhook_data_model_dump(): """Test WebhookData model serialization.""" @@ -196,12 +200,12 @@ def test_webhook_data_model_dump_with_alias(): """Test WebhookData model serialization includes alias.""" data = WebhookData( title="Test Webhook", - **{"content-type": ContentType.FORM_DATA}, + content_type=ContentType.FORM_DATA, ) dumped = data.model_dump(by_alias=True) - assert "content-type" in dumped - assert dumped["content-type"] == "multipart/form-data" + assert "content_type" in dumped + assert dumped["content_type"] == "multipart/form-data" def test_webhook_data_validation_errors(): @@ -214,9 +218,9 @@ def test_webhook_data_validation_errors(): with pytest.raises(ValidationError): WebhookData(title="Test", method="invalid_method") - # Invalid content_type via alias + # Invalid content_type with pytest.raises(ValidationError): - WebhookData(title="Test", **{"content-type": "invalid/type"}) + WebhookData(title="Test", content_type="invalid/type") # Invalid status_code (should be int) - use non-numeric string with pytest.raises(ValidationError): @@ -276,7 +280,17 @@ def test_webhook_body_parameter_edge_cases(): assert file_param.required is True # Test all valid types - for param_type in ["string", "number", "boolean", "object", "array", "file"]: + for param_type in [ + "string", + "number", + "boolean", + "object", + "array[string]", + "array[number]", + "array[boolean]", + "array[object]", + "file", + ]: param = WebhookBodyParameter(name=f"test_{param_type}", type=param_type) assert param.type == param_type diff --git a/api/tests/unit_tests/core/workflow/nodes/webhook/test_exceptions.py b/api/tests/unit_tests/core/workflow/nodes/webhook/test_exceptions.py index f59b6bd1ba..374d5183c8 100644 --- a/api/tests/unit_tests/core/workflow/nodes/webhook/test_exceptions.py +++ b/api/tests/unit_tests/core/workflow/nodes/webhook/test_exceptions.py @@ -149,7 +149,7 @@ def test_webhook_error_attributes(): assert WebhookConfigError.__name__ == "WebhookConfigError" # Test that all error classes have proper __module__ - expected_module = "core.workflow.nodes.webhook.exc" + expected_module = "core.workflow.nodes.trigger_webhook.exc" assert WebhookNodeError.__module__ == expected_module assert WebhookTimeoutError.__module__ == expected_module assert WebhookNotFoundError.__module__ == expected_module diff --git a/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py b/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py index 4a085fd316..d7094ae5f2 100644 --- a/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py @@ -5,8 +5,6 @@ from core.file import File, FileTransferMethod, FileType from core.variables import StringVariable from core.workflow.entities.graph_init_params import GraphInitParams from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from core.workflow.nodes.answer.entities import AnswerStreamGenerateRoute -from core.workflow.nodes.end.entities import EndStreamParam from core.workflow.nodes.trigger_webhook.entities import ( ContentType, Method, @@ -43,17 +41,6 @@ def create_webhook_node(webhook_data: WebhookData, variable_pool: VariablePool) invoke_from=InvokeFrom.SERVICE_API, call_depth=0, ), - graph=Graph( - root_node_id="1", - answer_stream_generate_routes=AnswerStreamGenerateRoute( - answer_dependencies={}, - answer_generate_route={}, - ), - end_stream_param=EndStreamParam( - end_dependencies={}, - end_stream_variable_selector_mapping={}, - ), - ), graph_runtime_state=GraphRuntimeState( variable_pool=variable_pool, start_at=0, @@ -85,7 +72,7 @@ def test_webhook_node_basic_initialization(): node = create_webhook_node(data, variable_pool) - assert node.node_type.value == "webhook" + assert node.node_type.value == "trigger-webhook" assert node.version() == "1" assert node._get_title() == "Test Webhook" assert node._node_data.method == Method.POST @@ -101,7 +88,7 @@ def test_webhook_node_default_config(): assert config["type"] == "webhook" assert config["config"]["method"] == "get" - assert config["config"]["content-type"] == "application/json" + assert config["config"]["content_type"] == "application/json" assert config["config"]["headers"] == [] assert config["config"]["params"] == [] assert config["config"]["body"] == [] @@ -142,7 +129,7 @@ def test_webhook_node_run_with_headers(): assert result.status == WorkflowNodeExecutionStatus.SUCCEEDED assert result.outputs["Authorization"] == "Bearer token123" - assert result.outputs["Content-Type"] == "application/json" # Case-insensitive match + assert result.outputs["Content_Type"] == "application/json" # Case-insensitive match assert "_webhook_raw" in result.outputs @@ -376,8 +363,8 @@ def test_webhook_node_run_case_insensitive_headers(): result = node._run() assert result.status == WorkflowNodeExecutionStatus.SUCCEEDED - assert result.outputs["Content-Type"] == "application/json" - assert result.outputs["X-API-KEY"] == "key123" + assert result.outputs["Content_Type"] == "application/json" + assert result.outputs["X_API_KEY"] == "key123" assert result.outputs["authorization"] == "Bearer token" @@ -436,13 +423,12 @@ def test_webhook_node_different_methods(method): assert node._node_data.method == method -def test_webhook_data_alias_content_type(): - """Test that content-type field alias works correctly.""" - # Test both ways of setting content_type - data1 = WebhookData(title="Test", **{"content-type": "application/json"}) +def test_webhook_data_content_type_field(): + """Test that content_type accepts both raw strings and enum values.""" + data1 = WebhookData(title="Test", content_type="application/json") assert data1.content_type == ContentType.JSON - data2 = WebhookData(title="Test", **{"content-type": ContentType.FORM_DATA}) + data2 = WebhookData(title="Test", content_type=ContentType.FORM_DATA) assert data2.content_type == ContentType.FORM_DATA diff --git a/api/tests/unit_tests/core/workflow/test_enums.py b/api/tests/unit_tests/core/workflow/test_enums.py new file mode 100644 index 0000000000..7cdb2328f2 --- /dev/null +++ b/api/tests/unit_tests/core/workflow/test_enums.py @@ -0,0 +1,32 @@ +"""Tests for workflow pause related enums and constants.""" + +from core.workflow.enums import ( + WorkflowExecutionStatus, +) + + +class TestWorkflowExecutionStatus: + """Test WorkflowExecutionStatus enum.""" + + def test_is_ended_method(self): + """Test is_ended method for different statuses.""" + # Test ended statuses + ended_statuses = [ + WorkflowExecutionStatus.SUCCEEDED, + WorkflowExecutionStatus.FAILED, + WorkflowExecutionStatus.PARTIAL_SUCCEEDED, + WorkflowExecutionStatus.STOPPED, + ] + + for status in ended_statuses: + assert status.is_ended(), f"{status} should be considered ended" + + # Test non-ended statuses + non_ended_statuses = [ + WorkflowExecutionStatus.SCHEDULED, + WorkflowExecutionStatus.RUNNING, + WorkflowExecutionStatus.PAUSED, + ] + + for status in non_ended_statuses: + assert not status.is_ended(), f"{status} should not be considered ended" diff --git a/api/tests/unit_tests/core/workflow/test_system_variable_read_only_view.py b/api/tests/unit_tests/core/workflow/test_system_variable_read_only_view.py new file mode 100644 index 0000000000..57bc96fe71 --- /dev/null +++ b/api/tests/unit_tests/core/workflow/test_system_variable_read_only_view.py @@ -0,0 +1,202 @@ +from typing import cast + +import pytest + +from core.file.models import File, FileTransferMethod, FileType +from core.workflow.system_variable import SystemVariable, SystemVariableReadOnlyView + + +class TestSystemVariableReadOnlyView: + """Test cases for SystemVariableReadOnlyView class.""" + + def test_read_only_property_access(self): + """Test that all properties return correct values from wrapped instance.""" + # Create test data + test_file = File( + id="file-123", + tenant_id="tenant-123", + type=FileType.IMAGE, + transfer_method=FileTransferMethod.LOCAL_FILE, + related_id="related-123", + ) + + datasource_info = {"key": "value", "nested": {"data": 42}} + + # Create SystemVariable with all fields + system_var = SystemVariable( + user_id="user-123", + app_id="app-123", + workflow_id="workflow-123", + files=[test_file], + workflow_execution_id="exec-123", + query="test query", + conversation_id="conv-123", + dialogue_count=5, + document_id="doc-123", + original_document_id="orig-doc-123", + dataset_id="dataset-123", + batch="batch-123", + datasource_type="type-123", + datasource_info=datasource_info, + invoke_from="invoke-123", + ) + + # Create read-only view + read_only_view = SystemVariableReadOnlyView(system_var) + + # Test all properties + assert read_only_view.user_id == "user-123" + assert read_only_view.app_id == "app-123" + assert read_only_view.workflow_id == "workflow-123" + assert read_only_view.workflow_execution_id == "exec-123" + assert read_only_view.query == "test query" + assert read_only_view.conversation_id == "conv-123" + assert read_only_view.dialogue_count == 5 + assert read_only_view.document_id == "doc-123" + assert read_only_view.original_document_id == "orig-doc-123" + assert read_only_view.dataset_id == "dataset-123" + assert read_only_view.batch == "batch-123" + assert read_only_view.datasource_type == "type-123" + assert read_only_view.invoke_from == "invoke-123" + + def test_defensive_copying_of_mutable_objects(self): + """Test that mutable objects are defensively copied.""" + # Create test data + test_file = File( + id="file-123", + tenant_id="tenant-123", + type=FileType.IMAGE, + transfer_method=FileTransferMethod.LOCAL_FILE, + related_id="related-123", + ) + + datasource_info = {"key": "original_value"} + + # Create SystemVariable + system_var = SystemVariable( + files=[test_file], datasource_info=datasource_info, workflow_execution_id="exec-123" + ) + + # Create read-only view + read_only_view = SystemVariableReadOnlyView(system_var) + + # Test files defensive copying + files_copy = read_only_view.files + assert isinstance(files_copy, tuple) # Should be immutable tuple + assert len(files_copy) == 1 + assert files_copy[0].id == "file-123" + + # Verify it's a copy (can't modify original through view) + assert isinstance(files_copy, tuple) + # tuples don't have append method, so they're immutable + + # Test datasource_info defensive copying + datasource_copy = read_only_view.datasource_info + assert datasource_copy is not None + assert datasource_copy["key"] == "original_value" + + datasource_copy = cast(dict, datasource_copy) + with pytest.raises(TypeError): + datasource_copy["key"] = "modified value" + + # Verify original is unchanged + assert system_var.datasource_info is not None + assert system_var.datasource_info["key"] == "original_value" + assert read_only_view.datasource_info is not None + assert read_only_view.datasource_info["key"] == "original_value" + + def test_always_accesses_latest_data(self): + """Test that properties always return the latest data from wrapped instance.""" + # Create SystemVariable + system_var = SystemVariable(user_id="original-user", workflow_execution_id="exec-123") + + # Create read-only view + read_only_view = SystemVariableReadOnlyView(system_var) + + # Verify initial value + assert read_only_view.user_id == "original-user" + + # Modify the wrapped instance + system_var.user_id = "modified-user" + + # Verify view returns the new value + assert read_only_view.user_id == "modified-user" + + def test_repr_method(self): + """Test the __repr__ method.""" + # Create SystemVariable + system_var = SystemVariable(workflow_execution_id="exec-123") + + # Create read-only view + read_only_view = SystemVariableReadOnlyView(system_var) + + # Test repr + repr_str = repr(read_only_view) + assert "SystemVariableReadOnlyView" in repr_str + assert "system_variable=" in repr_str + + def test_none_value_handling(self): + """Test that None values are properly handled.""" + # Create SystemVariable with all None values except workflow_execution_id + system_var = SystemVariable( + user_id=None, + app_id=None, + workflow_id=None, + workflow_execution_id="exec-123", + query=None, + conversation_id=None, + dialogue_count=None, + document_id=None, + original_document_id=None, + dataset_id=None, + batch=None, + datasource_type=None, + datasource_info=None, + invoke_from=None, + ) + + # Create read-only view + read_only_view = SystemVariableReadOnlyView(system_var) + + # Test all None values + assert read_only_view.user_id is None + assert read_only_view.app_id is None + assert read_only_view.workflow_id is None + assert read_only_view.query is None + assert read_only_view.conversation_id is None + assert read_only_view.dialogue_count is None + assert read_only_view.document_id is None + assert read_only_view.original_document_id is None + assert read_only_view.dataset_id is None + assert read_only_view.batch is None + assert read_only_view.datasource_type is None + assert read_only_view.datasource_info is None + assert read_only_view.invoke_from is None + + # files should be empty tuple even when default list is empty + assert read_only_view.files == () + + def test_empty_files_handling(self): + """Test that empty files list is handled correctly.""" + # Create SystemVariable with empty files + system_var = SystemVariable(files=[], workflow_execution_id="exec-123") + + # Create read-only view + read_only_view = SystemVariableReadOnlyView(system_var) + + # Test files handling + assert read_only_view.files == () + assert isinstance(read_only_view.files, tuple) + + def test_empty_datasource_info_handling(self): + """Test that empty datasource_info is handled correctly.""" + # Create SystemVariable with empty datasource_info + system_var = SystemVariable(datasource_info={}, workflow_execution_id="exec-123") + + # Create read-only view + read_only_view = SystemVariableReadOnlyView(system_var) + + # Test datasource_info handling + assert read_only_view.datasource_info == {} + # Should be a copy, not the same object + assert read_only_view.datasource_info is not system_var.datasource_info diff --git a/api/tests/unit_tests/core/workflow/utils/test_condition.py b/api/tests/unit_tests/core/workflow/utils/test_condition.py new file mode 100644 index 0000000000..efedf88726 --- /dev/null +++ b/api/tests/unit_tests/core/workflow/utils/test_condition.py @@ -0,0 +1,52 @@ +from core.workflow.runtime import VariablePool +from core.workflow.utils.condition.entities import Condition +from core.workflow.utils.condition.processor import ConditionProcessor + + +def test_number_formatting(): + condition_processor = ConditionProcessor() + variable_pool = VariablePool() + variable_pool.add(["test_node_id", "zone"], 0) + variable_pool.add(["test_node_id", "one"], 1) + variable_pool.add(["test_node_id", "one_one"], 1.1) + # 0 <= 0.95 + assert ( + condition_processor.process_conditions( + variable_pool=variable_pool, + conditions=[Condition(variable_selector=["test_node_id", "zone"], comparison_operator="≤", value="0.95")], + operator="or", + ).final_result + == True + ) + + # 1 >= 0.95 + assert ( + condition_processor.process_conditions( + variable_pool=variable_pool, + conditions=[Condition(variable_selector=["test_node_id", "one"], comparison_operator="≥", value="0.95")], + operator="or", + ).final_result + == True + ) + + # 1.1 >= 0.95 + assert ( + condition_processor.process_conditions( + variable_pool=variable_pool, + conditions=[ + Condition(variable_selector=["test_node_id", "one_one"], comparison_operator="≥", value="0.95") + ], + operator="or", + ).final_result + == True + ) + + # 1.1 > 0 + assert ( + condition_processor.process_conditions( + variable_pool=variable_pool, + conditions=[Condition(variable_selector=["test_node_id", "one_one"], comparison_operator=">", value="0")], + operator="or", + ).final_result + == True + ) diff --git a/api/tests/unit_tests/extensions/test_celery_ssl.py b/api/tests/unit_tests/extensions/test_celery_ssl.py index d33b7eaf23..fc7a090ef9 100644 --- a/api/tests/unit_tests/extensions/test_celery_ssl.py +++ b/api/tests/unit_tests/extensions/test_celery_ssl.py @@ -135,6 +135,8 @@ class TestCelerySSLConfiguration: mock_config.WORKFLOW_SCHEDULE_POLLER_INTERVAL = 1 mock_config.WORKFLOW_SCHEDULE_POLLER_BATCH_SIZE = 100 mock_config.WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK = 0 + mock_config.ENABLE_TRIGGER_PROVIDER_REFRESH_TASK = False + mock_config.TRIGGER_PROVIDER_REFRESH_INTERVAL = 15 with patch("extensions.ext_celery.dify_config", mock_config): from dify_app import DifyApp diff --git a/api/tests/unit_tests/libs/broadcast_channel/redis/test_channel_unit_tests.py b/api/tests/unit_tests/libs/broadcast_channel/redis/test_channel_unit_tests.py new file mode 100644 index 0000000000..ccba075fdf --- /dev/null +++ b/api/tests/unit_tests/libs/broadcast_channel/redis/test_channel_unit_tests.py @@ -0,0 +1,1403 @@ +""" +Comprehensive unit tests for Redis broadcast channel implementation. + +This test suite covers all aspects of the Redis broadcast channel including: +- Basic functionality and contract compliance +- Error handling and edge cases +- Thread safety and concurrency +- Resource management and cleanup +- Performance and reliability scenarios +""" + +import dataclasses +import threading +import time +from collections.abc import Generator +from unittest.mock import MagicMock, patch + +import pytest + +from libs.broadcast_channel.exc import BroadcastChannelError, SubscriptionClosedError +from libs.broadcast_channel.redis.channel import ( + BroadcastChannel as RedisBroadcastChannel, +) +from libs.broadcast_channel.redis.channel import ( + Topic, + _RedisSubscription, +) +from libs.broadcast_channel.redis.sharded_channel import ( + ShardedRedisBroadcastChannel, + ShardedTopic, + _RedisShardedSubscription, +) + + +class TestBroadcastChannel: + """Test cases for the main BroadcastChannel class.""" + + @pytest.fixture + def mock_redis_client(self) -> MagicMock: + """Create a mock Redis client for testing.""" + client = MagicMock() + client.pubsub.return_value = MagicMock() + return client + + @pytest.fixture + def broadcast_channel(self, mock_redis_client: MagicMock) -> RedisBroadcastChannel: + """Create a BroadcastChannel instance with mock Redis client (regular).""" + return RedisBroadcastChannel(mock_redis_client) + + @pytest.fixture + def sharded_broadcast_channel(self, mock_redis_client: MagicMock) -> ShardedRedisBroadcastChannel: + """Create a ShardedRedisBroadcastChannel instance with mock Redis client.""" + return ShardedRedisBroadcastChannel(mock_redis_client) + + def test_topic_creation(self, broadcast_channel: RedisBroadcastChannel, mock_redis_client: MagicMock): + """Test that topic() method returns a Topic instance with correct parameters.""" + topic_name = "test-topic" + topic = broadcast_channel.topic(topic_name) + + assert isinstance(topic, Topic) + assert topic._client == mock_redis_client + assert topic._topic == topic_name + + def test_topic_isolation(self, broadcast_channel: RedisBroadcastChannel): + """Test that different topic names create isolated Topic instances.""" + topic1 = broadcast_channel.topic("topic1") + topic2 = broadcast_channel.topic("topic2") + + assert topic1 is not topic2 + assert topic1._topic == "topic1" + assert topic2._topic == "topic2" + + def test_sharded_topic_creation( + self, sharded_broadcast_channel: ShardedRedisBroadcastChannel, mock_redis_client: MagicMock + ): + """Test that topic() on ShardedRedisBroadcastChannel returns a ShardedTopic instance with correct parameters.""" + topic_name = "test-sharded-topic" + sharded_topic = sharded_broadcast_channel.topic(topic_name) + + assert isinstance(sharded_topic, ShardedTopic) + assert sharded_topic._client == mock_redis_client + assert sharded_topic._topic == topic_name + + def test_sharded_topic_isolation(self, sharded_broadcast_channel: ShardedRedisBroadcastChannel): + """Test that different sharded topic names create isolated ShardedTopic instances.""" + topic1 = sharded_broadcast_channel.topic("sharded-topic1") + topic2 = sharded_broadcast_channel.topic("sharded-topic2") + + assert topic1 is not topic2 + assert topic1._topic == "sharded-topic1" + assert topic2._topic == "sharded-topic2" + + def test_regular_and_sharded_topic_isolation( + self, broadcast_channel: RedisBroadcastChannel, sharded_broadcast_channel: ShardedRedisBroadcastChannel + ): + """Test that regular topics and sharded topics from different channels are separate instances.""" + regular_topic = broadcast_channel.topic("test-topic") + sharded_topic = sharded_broadcast_channel.topic("test-topic") + + assert isinstance(regular_topic, Topic) + assert isinstance(sharded_topic, ShardedTopic) + assert regular_topic is not sharded_topic + assert regular_topic._topic == sharded_topic._topic + + +class TestTopic: + """Test cases for the Topic class.""" + + @pytest.fixture + def mock_redis_client(self) -> MagicMock: + """Create a mock Redis client for testing.""" + client = MagicMock() + client.pubsub.return_value = MagicMock() + return client + + @pytest.fixture + def topic(self, mock_redis_client: MagicMock) -> Topic: + """Create a Topic instance for testing.""" + return Topic(mock_redis_client, "test-topic") + + def test_as_producer_returns_self(self, topic: Topic): + """Test that as_producer() returns self as Producer interface.""" + producer = topic.as_producer() + assert producer is topic + # Producer is a Protocol, check duck typing instead + assert hasattr(producer, "publish") + + def test_as_subscriber_returns_self(self, topic: Topic): + """Test that as_subscriber() returns self as Subscriber interface.""" + subscriber = topic.as_subscriber() + assert subscriber is topic + # Subscriber is a Protocol, check duck typing instead + assert hasattr(subscriber, "subscribe") + + def test_publish_calls_redis_publish(self, topic: Topic, mock_redis_client: MagicMock): + """Test that publish() calls Redis PUBLISH with correct parameters.""" + payload = b"test message" + topic.publish(payload) + + mock_redis_client.publish.assert_called_once_with("test-topic", payload) + + +class TestShardedTopic: + """Test cases for the ShardedTopic class.""" + + @pytest.fixture + def mock_redis_client(self) -> MagicMock: + """Create a mock Redis client for testing.""" + client = MagicMock() + client.pubsub.return_value = MagicMock() + return client + + @pytest.fixture + def sharded_topic(self, mock_redis_client: MagicMock) -> ShardedTopic: + """Create a ShardedTopic instance for testing.""" + return ShardedTopic(mock_redis_client, "test-sharded-topic") + + def test_as_producer_returns_self(self, sharded_topic: ShardedTopic): + """Test that as_producer() returns self as Producer interface.""" + producer = sharded_topic.as_producer() + assert producer is sharded_topic + # Producer is a Protocol, check duck typing instead + assert hasattr(producer, "publish") + + def test_as_subscriber_returns_self(self, sharded_topic: ShardedTopic): + """Test that as_subscriber() returns self as Subscriber interface.""" + subscriber = sharded_topic.as_subscriber() + assert subscriber is sharded_topic + # Subscriber is a Protocol, check duck typing instead + assert hasattr(subscriber, "subscribe") + + def test_publish_calls_redis_spublish(self, sharded_topic: ShardedTopic, mock_redis_client: MagicMock): + """Test that publish() calls Redis SPUBLISH with correct parameters.""" + payload = b"test sharded message" + sharded_topic.publish(payload) + + mock_redis_client.spublish.assert_called_once_with("test-sharded-topic", payload) + + def test_subscribe_returns_sharded_subscription(self, sharded_topic: ShardedTopic, mock_redis_client: MagicMock): + """Test that subscribe() returns a _RedisShardedSubscription instance.""" + subscription = sharded_topic.subscribe() + + assert isinstance(subscription, _RedisShardedSubscription) + assert subscription._pubsub is mock_redis_client.pubsub.return_value + assert subscription._topic == "test-sharded-topic" + + +@dataclasses.dataclass(frozen=True) +class SubscriptionTestCase: + """Test case data for subscription tests.""" + + name: str + buffer_size: int + payload: bytes + expected_messages: list[bytes] + should_drop: bool = False + description: str = "" + + +class TestRedisSubscription: + """Test cases for the _RedisSubscription class.""" + + @pytest.fixture + def mock_pubsub(self) -> MagicMock: + """Create a mock PubSub instance for testing.""" + pubsub = MagicMock() + pubsub.subscribe = MagicMock() + pubsub.unsubscribe = MagicMock() + pubsub.close = MagicMock() + pubsub.get_message = MagicMock() + return pubsub + + @pytest.fixture + def subscription(self, mock_pubsub: MagicMock) -> Generator[_RedisSubscription, None, None]: + """Create a _RedisSubscription instance for testing.""" + subscription = _RedisSubscription( + pubsub=mock_pubsub, + topic="test-topic", + ) + yield subscription + subscription.close() + + @pytest.fixture + def started_subscription(self, subscription: _RedisSubscription) -> _RedisSubscription: + """Create a subscription that has been started.""" + subscription._start_if_needed() + return subscription + + # ==================== Lifecycle Tests ==================== + + def test_subscription_initialization(self, mock_pubsub: MagicMock): + """Test that subscription is properly initialized.""" + subscription = _RedisSubscription( + pubsub=mock_pubsub, + topic="test-topic", + ) + + assert subscription._pubsub is mock_pubsub + assert subscription._topic == "test-topic" + assert not subscription._closed.is_set() + assert subscription._dropped_count == 0 + assert subscription._listener_thread is None + assert not subscription._started + + def test_start_if_needed_first_call(self, subscription: _RedisSubscription, mock_pubsub: MagicMock): + """Test that _start_if_needed() properly starts subscription on first call.""" + subscription._start_if_needed() + + mock_pubsub.subscribe.assert_called_once_with("test-topic") + assert subscription._started is True + assert subscription._listener_thread is not None + + def test_start_if_needed_subsequent_calls(self, started_subscription: _RedisSubscription): + """Test that _start_if_needed() doesn't start subscription on subsequent calls.""" + original_thread = started_subscription._listener_thread + started_subscription._start_if_needed() + + # Should not create new thread or generator + assert started_subscription._listener_thread is original_thread + + def test_start_if_needed_when_closed(self, subscription: _RedisSubscription): + """Test that _start_if_needed() raises error when subscription is closed.""" + subscription.close() + + with pytest.raises(SubscriptionClosedError, match="The Redis regular subscription is closed"): + subscription._start_if_needed() + + def test_start_if_needed_when_cleaned_up(self, subscription: _RedisSubscription): + """Test that _start_if_needed() raises error when pubsub is None.""" + subscription._pubsub = None + + with pytest.raises(SubscriptionClosedError, match="The Redis regular subscription has been cleaned up"): + subscription._start_if_needed() + + def test_context_manager_usage(self, subscription: _RedisSubscription, mock_pubsub: MagicMock): + """Test that subscription works as context manager.""" + with subscription as sub: + assert sub is subscription + assert subscription._started is True + mock_pubsub.subscribe.assert_called_once_with("test-topic") + + def test_close_idempotent(self, subscription: _RedisSubscription, mock_pubsub: MagicMock): + """Test that close() is idempotent and can be called multiple times.""" + subscription._start_if_needed() + + # Close multiple times + subscription.close() + subscription.close() + subscription.close() + + # Should only cleanup once + mock_pubsub.unsubscribe.assert_called_once_with("test-topic") + mock_pubsub.close.assert_called_once() + assert subscription._pubsub is None + assert subscription._closed.is_set() + + def test_close_cleanup(self, subscription: _RedisSubscription, mock_pubsub: MagicMock): + """Test that close() properly cleans up all resources.""" + subscription._start_if_needed() + thread = subscription._listener_thread + + subscription.close() + + # Verify cleanup + mock_pubsub.unsubscribe.assert_called_once_with("test-topic") + mock_pubsub.close.assert_called_once() + assert subscription._pubsub is None + assert subscription._listener_thread is None + + # Wait for thread to finish (with timeout) + if thread and thread.is_alive(): + thread.join(timeout=1.0) + assert not thread.is_alive() + + # ==================== Message Processing Tests ==================== + + def test_message_iterator_with_messages(self, started_subscription: _RedisSubscription): + """Test message iterator behavior with messages in queue.""" + test_messages = [b"msg1", b"msg2", b"msg3"] + + # Add messages to queue + for msg in test_messages: + started_subscription._queue.put_nowait(msg) + + # Iterate through messages + iterator = iter(started_subscription) + received_messages = [] + + for msg in iterator: + received_messages.append(msg) + if len(received_messages) >= len(test_messages): + break + + assert received_messages == test_messages + + def test_message_iterator_when_closed(self, subscription: _RedisSubscription): + """Test that iterator raises error when subscription is closed.""" + subscription.close() + + with pytest.raises(BroadcastChannelError, match="The Redis regular subscription is closed"): + iter(subscription) + + # ==================== Message Enqueue Tests ==================== + + def test_enqueue_message_success(self, started_subscription: _RedisSubscription): + """Test successful message enqueue.""" + payload = b"test message" + + started_subscription._enqueue_message(payload) + + assert started_subscription._queue.qsize() == 1 + assert started_subscription._queue.get_nowait() == payload + + def test_enqueue_message_when_closed(self, subscription: _RedisSubscription): + """Test message enqueue when subscription is closed.""" + subscription.close() + payload = b"test message" + + # Should not raise exception, but should not enqueue + subscription._enqueue_message(payload) + + assert subscription._queue.empty() + + def test_enqueue_message_with_full_queue(self, started_subscription: _RedisSubscription): + """Test message enqueue with full queue (dropping behavior).""" + # Fill the queue + for i in range(started_subscription._queue.maxsize): + started_subscription._queue.put_nowait(f"old_msg_{i}".encode()) + + # Try to enqueue new message (should drop oldest) + new_message = b"new_message" + started_subscription._enqueue_message(new_message) + + # Should have dropped one message and added new one + assert started_subscription._dropped_count == 1 + + # New message should be in queue + messages = [] + while not started_subscription._queue.empty(): + messages.append(started_subscription._queue.get_nowait()) + + assert new_message in messages + + # ==================== Listener Thread Tests ==================== + + @patch("time.sleep", side_effect=lambda x: None) # Speed up test + def test_listener_thread_normal_operation( + self, mock_sleep, subscription: _RedisSubscription, mock_pubsub: MagicMock + ): + """Test listener thread normal operation.""" + # Mock message from Redis + mock_message = {"type": "message", "channel": "test-topic", "data": b"test payload"} + mock_pubsub.get_message.return_value = mock_message + + # Start listener + subscription._start_if_needed() + + # Wait a bit for processing + time.sleep(0.1) + + # Verify message was processed + assert not subscription._queue.empty() + assert subscription._queue.get_nowait() == b"test payload" + + def test_listener_thread_ignores_subscribe_messages(self, subscription: _RedisSubscription, mock_pubsub: MagicMock): + """Test that listener thread ignores subscribe/unsubscribe messages.""" + mock_message = {"type": "subscribe", "channel": "test-topic", "data": 1} + mock_pubsub.get_message.return_value = mock_message + + subscription._start_if_needed() + time.sleep(0.1) + + # Should not enqueue subscribe messages + assert subscription._queue.empty() + + def test_listener_thread_ignores_wrong_channel(self, subscription: _RedisSubscription, mock_pubsub: MagicMock): + """Test that listener thread ignores messages from wrong channels.""" + mock_message = {"type": "message", "channel": "wrong-topic", "data": b"test payload"} + mock_pubsub.get_message.return_value = mock_message + + subscription._start_if_needed() + time.sleep(0.1) + + # Should not enqueue messages from wrong channels + assert subscription._queue.empty() + + def test_listener_thread_handles_redis_exceptions(self, subscription: _RedisSubscription, mock_pubsub: MagicMock): + """Test that listener thread handles Redis exceptions gracefully.""" + mock_pubsub.get_message.side_effect = Exception("Redis error") + + subscription._start_if_needed() + + # Wait for thread to handle exception + time.sleep(0.2) + + # Thread should still be alive but not processing + assert subscription._listener_thread is not None + assert not subscription._listener_thread.is_alive() + + def test_listener_thread_stops_when_closed(self, subscription: _RedisSubscription, mock_pubsub: MagicMock): + """Test that listener thread stops when subscription is closed.""" + subscription._start_if_needed() + thread = subscription._listener_thread + + # Close subscription + subscription.close() + + # Wait for thread to finish + if thread is not None and thread.is_alive(): + thread.join(timeout=1.0) + + assert thread is None or not thread.is_alive() + + # ==================== Table-driven Tests ==================== + + @pytest.mark.parametrize( + "test_case", + [ + SubscriptionTestCase( + name="basic_message", + buffer_size=5, + payload=b"hello world", + expected_messages=[b"hello world"], + description="Basic message publishing and receiving", + ), + SubscriptionTestCase( + name="empty_message", + buffer_size=5, + payload=b"", + expected_messages=[b""], + description="Empty message handling", + ), + SubscriptionTestCase( + name="large_message", + buffer_size=5, + payload=b"x" * 10000, + expected_messages=[b"x" * 10000], + description="Large message handling", + ), + SubscriptionTestCase( + name="unicode_message", + buffer_size=5, + payload="你好世界".encode(), + expected_messages=["你好世界".encode()], + description="Unicode message handling", + ), + ], + ) + def test_subscription_scenarios(self, test_case: SubscriptionTestCase, mock_pubsub: MagicMock): + """Test various subscription scenarios using table-driven approach.""" + subscription = _RedisSubscription( + pubsub=mock_pubsub, + topic="test-topic", + ) + + # Simulate receiving message + mock_message = {"type": "message", "channel": "test-topic", "data": test_case.payload} + mock_pubsub.get_message.return_value = mock_message + + try: + with subscription: + # Wait for message processing + time.sleep(0.1) + + # Collect received messages + received = [] + for msg in subscription: + received.append(msg) + if len(received) >= len(test_case.expected_messages): + break + + assert received == test_case.expected_messages, f"Failed: {test_case.description}" + finally: + subscription.close() + + def test_concurrent_close_and_enqueue(self, started_subscription: _RedisSubscription): + """Test concurrent close and enqueue operations.""" + errors = [] + + def close_subscription(): + try: + time.sleep(0.05) # Small delay + started_subscription.close() + except Exception as e: + errors.append(e) + + def enqueue_messages(): + try: + for i in range(50): + started_subscription._enqueue_message(f"msg_{i}".encode()) + time.sleep(0.001) + except Exception as e: + errors.append(e) + + # Start threads + close_thread = threading.Thread(target=close_subscription) + enqueue_thread = threading.Thread(target=enqueue_messages) + + close_thread.start() + enqueue_thread.start() + + # Wait for completion + close_thread.join(timeout=2.0) + enqueue_thread.join(timeout=2.0) + + # Should not have any errors (operations should be safe) + assert len(errors) == 0 + + # ==================== Error Handling Tests ==================== + + def test_iterator_after_close(self, subscription: _RedisSubscription): + """Test iterator behavior after close.""" + subscription.close() + + with pytest.raises(SubscriptionClosedError, match="The Redis regular subscription is closed"): + iter(subscription) + + def test_start_after_close(self, subscription: _RedisSubscription): + """Test start attempts after close.""" + subscription.close() + + with pytest.raises(SubscriptionClosedError, match="The Redis regular subscription is closed"): + subscription._start_if_needed() + + def test_pubsub_none_operations(self, subscription: _RedisSubscription): + """Test operations when pubsub is None.""" + subscription._pubsub = None + + with pytest.raises(SubscriptionClosedError, match="The Redis regular subscription has been cleaned up"): + subscription._start_if_needed() + + # Close should still work + subscription.close() # Should not raise + + def test_channel_name_variations(self, mock_pubsub: MagicMock): + """Test various channel name formats.""" + channel_names = [ + "simple", + "with-dashes", + "with_underscores", + "with.numbers", + "WITH.UPPERCASE", + "mixed-CASE_name", + "very.long.channel.name.with.multiple.parts", + ] + + for channel_name in channel_names: + subscription = _RedisSubscription( + pubsub=mock_pubsub, + topic=channel_name, + ) + + subscription._start_if_needed() + mock_pubsub.subscribe.assert_called_with(channel_name) + subscription.close() + + def test_received_on_closed_subscription(self, subscription: _RedisSubscription): + subscription.close() + + with pytest.raises(SubscriptionClosedError): + subscription.receive() + + +class TestRedisShardedSubscription: + """Test cases for the _RedisShardedSubscription class.""" + + @pytest.fixture + def mock_pubsub(self) -> MagicMock: + """Create a mock PubSub instance for testing.""" + pubsub = MagicMock() + pubsub.ssubscribe = MagicMock() + pubsub.sunsubscribe = MagicMock() + pubsub.close = MagicMock() + pubsub.get_sharded_message = MagicMock() + return pubsub + + @pytest.fixture + def sharded_subscription(self, mock_pubsub: MagicMock) -> Generator[_RedisShardedSubscription, None, None]: + """Create a _RedisShardedSubscription instance for testing.""" + subscription = _RedisShardedSubscription( + pubsub=mock_pubsub, + topic="test-sharded-topic", + ) + yield subscription + subscription.close() + + @pytest.fixture + def started_sharded_subscription( + self, sharded_subscription: _RedisShardedSubscription + ) -> _RedisShardedSubscription: + """Create a sharded subscription that has been started.""" + sharded_subscription._start_if_needed() + return sharded_subscription + + # ==================== Lifecycle Tests ==================== + + def test_sharded_subscription_initialization(self, mock_pubsub: MagicMock): + """Test that sharded subscription is properly initialized.""" + subscription = _RedisShardedSubscription( + pubsub=mock_pubsub, + topic="test-sharded-topic", + ) + + assert subscription._pubsub is mock_pubsub + assert subscription._topic == "test-sharded-topic" + assert not subscription._closed.is_set() + assert subscription._dropped_count == 0 + assert subscription._listener_thread is None + assert not subscription._started + + def test_start_if_needed_first_call(self, sharded_subscription: _RedisShardedSubscription, mock_pubsub: MagicMock): + """Test that _start_if_needed() properly starts sharded subscription on first call.""" + sharded_subscription._start_if_needed() + + mock_pubsub.ssubscribe.assert_called_once_with("test-sharded-topic") + assert sharded_subscription._started is True + assert sharded_subscription._listener_thread is not None + + def test_start_if_needed_subsequent_calls(self, started_sharded_subscription: _RedisShardedSubscription): + """Test that _start_if_needed() doesn't start sharded subscription on subsequent calls.""" + original_thread = started_sharded_subscription._listener_thread + started_sharded_subscription._start_if_needed() + + # Should not create new thread or generator + assert started_sharded_subscription._listener_thread is original_thread + + def test_start_if_needed_when_closed(self, sharded_subscription: _RedisShardedSubscription): + """Test that _start_if_needed() raises error when sharded subscription is closed.""" + sharded_subscription.close() + + with pytest.raises(SubscriptionClosedError, match="The Redis sharded subscription is closed"): + sharded_subscription._start_if_needed() + + def test_start_if_needed_when_cleaned_up(self, sharded_subscription: _RedisShardedSubscription): + """Test that _start_if_needed() raises error when pubsub is None.""" + sharded_subscription._pubsub = None + + with pytest.raises(SubscriptionClosedError, match="The Redis sharded subscription has been cleaned up"): + sharded_subscription._start_if_needed() + + def test_context_manager_usage(self, sharded_subscription: _RedisShardedSubscription, mock_pubsub: MagicMock): + """Test that sharded subscription works as context manager.""" + with sharded_subscription as sub: + assert sub is sharded_subscription + assert sharded_subscription._started is True + mock_pubsub.ssubscribe.assert_called_once_with("test-sharded-topic") + + def test_close_idempotent(self, sharded_subscription: _RedisShardedSubscription, mock_pubsub: MagicMock): + """Test that close() is idempotent and can be called multiple times.""" + sharded_subscription._start_if_needed() + + # Close multiple times + sharded_subscription.close() + sharded_subscription.close() + sharded_subscription.close() + + # Should only cleanup once + mock_pubsub.sunsubscribe.assert_called_once_with("test-sharded-topic") + mock_pubsub.close.assert_called_once() + assert sharded_subscription._pubsub is None + assert sharded_subscription._closed.is_set() + + def test_close_cleanup(self, sharded_subscription: _RedisShardedSubscription, mock_pubsub: MagicMock): + """Test that close() properly cleans up all resources.""" + sharded_subscription._start_if_needed() + thread = sharded_subscription._listener_thread + + sharded_subscription.close() + + # Verify cleanup + mock_pubsub.sunsubscribe.assert_called_once_with("test-sharded-topic") + mock_pubsub.close.assert_called_once() + assert sharded_subscription._pubsub is None + assert sharded_subscription._listener_thread is None + + # Wait for thread to finish (with timeout) + if thread and thread.is_alive(): + thread.join(timeout=1.0) + assert not thread.is_alive() + + # ==================== Message Processing Tests ==================== + + def test_message_iterator_with_messages(self, started_sharded_subscription: _RedisShardedSubscription): + """Test message iterator behavior with messages in queue.""" + test_messages = [b"sharded_msg1", b"sharded_msg2", b"sharded_msg3"] + + # Add messages to queue + for msg in test_messages: + started_sharded_subscription._queue.put_nowait(msg) + + # Iterate through messages + iterator = iter(started_sharded_subscription) + received_messages = [] + + for msg in iterator: + received_messages.append(msg) + if len(received_messages) >= len(test_messages): + break + + assert received_messages == test_messages + + def test_message_iterator_when_closed(self, sharded_subscription: _RedisShardedSubscription): + """Test that iterator raises error when sharded subscription is closed.""" + sharded_subscription.close() + + with pytest.raises(SubscriptionClosedError, match="The Redis sharded subscription is closed"): + iter(sharded_subscription) + + # ==================== Message Enqueue Tests ==================== + + def test_enqueue_message_success(self, started_sharded_subscription: _RedisShardedSubscription): + """Test successful message enqueue.""" + payload = b"test sharded message" + + started_sharded_subscription._enqueue_message(payload) + + assert started_sharded_subscription._queue.qsize() == 1 + assert started_sharded_subscription._queue.get_nowait() == payload + + def test_enqueue_message_when_closed(self, sharded_subscription: _RedisShardedSubscription): + """Test message enqueue when sharded subscription is closed.""" + sharded_subscription.close() + payload = b"test sharded message" + + # Should not raise exception, but should not enqueue + sharded_subscription._enqueue_message(payload) + + assert sharded_subscription._queue.empty() + + def test_enqueue_message_with_full_queue(self, started_sharded_subscription: _RedisShardedSubscription): + """Test message enqueue with full queue (dropping behavior).""" + # Fill the queue + for i in range(started_sharded_subscription._queue.maxsize): + started_sharded_subscription._queue.put_nowait(f"old_msg_{i}".encode()) + + # Try to enqueue new message (should drop oldest) + new_message = b"new_sharded_message" + started_sharded_subscription._enqueue_message(new_message) + + # Should have dropped one message and added new one + assert started_sharded_subscription._dropped_count == 1 + + # New message should be in queue + messages = [] + while not started_sharded_subscription._queue.empty(): + messages.append(started_sharded_subscription._queue.get_nowait()) + + assert new_message in messages + + # ==================== Listener Thread Tests ==================== + + @patch("time.sleep", side_effect=lambda x: None) # Speed up test + def test_listener_thread_normal_operation( + self, mock_sleep, sharded_subscription: _RedisShardedSubscription, mock_pubsub: MagicMock + ): + """Test sharded listener thread normal operation.""" + # Mock sharded message from Redis + mock_message = {"type": "smessage", "channel": "test-sharded-topic", "data": b"test sharded payload"} + mock_pubsub.get_sharded_message.return_value = mock_message + + # Start listener + sharded_subscription._start_if_needed() + + # Wait a bit for processing + time.sleep(0.1) + + # Verify message was processed + assert not sharded_subscription._queue.empty() + assert sharded_subscription._queue.get_nowait() == b"test sharded payload" + + def test_listener_thread_ignores_subscribe_messages( + self, sharded_subscription: _RedisShardedSubscription, mock_pubsub: MagicMock + ): + """Test that listener thread ignores ssubscribe/sunsubscribe messages.""" + mock_message = {"type": "ssubscribe", "channel": "test-sharded-topic", "data": 1} + mock_pubsub.get_sharded_message.return_value = mock_message + + sharded_subscription._start_if_needed() + time.sleep(0.1) + + # Should not enqueue ssubscribe messages + assert sharded_subscription._queue.empty() + + def test_listener_thread_ignores_wrong_channel( + self, sharded_subscription: _RedisShardedSubscription, mock_pubsub: MagicMock + ): + """Test that listener thread ignores messages from wrong channels.""" + mock_message = {"type": "smessage", "channel": "wrong-sharded-topic", "data": b"test payload"} + mock_pubsub.get_sharded_message.return_value = mock_message + + sharded_subscription._start_if_needed() + time.sleep(0.1) + + # Should not enqueue messages from wrong channels + assert sharded_subscription._queue.empty() + + def test_listener_thread_ignores_regular_messages( + self, sharded_subscription: _RedisShardedSubscription, mock_pubsub: MagicMock + ): + """Test that listener thread ignores regular (non-sharded) messages.""" + mock_message = {"type": "message", "channel": "test-sharded-topic", "data": b"test payload"} + mock_pubsub.get_sharded_message.return_value = mock_message + + sharded_subscription._start_if_needed() + time.sleep(0.1) + + # Should not enqueue regular messages in sharded subscription + assert sharded_subscription._queue.empty() + + def test_listener_thread_handles_redis_exceptions( + self, sharded_subscription: _RedisShardedSubscription, mock_pubsub: MagicMock + ): + """Test that listener thread handles Redis exceptions gracefully.""" + mock_pubsub.get_sharded_message.side_effect = Exception("Redis error") + + sharded_subscription._start_if_needed() + + # Wait for thread to handle exception + time.sleep(0.2) + + # Thread should still be alive but not processing + assert sharded_subscription._listener_thread is not None + assert not sharded_subscription._listener_thread.is_alive() + + def test_listener_thread_stops_when_closed( + self, sharded_subscription: _RedisShardedSubscription, mock_pubsub: MagicMock + ): + """Test that listener thread stops when sharded subscription is closed.""" + sharded_subscription._start_if_needed() + thread = sharded_subscription._listener_thread + + # Close subscription + sharded_subscription.close() + + # Wait for thread to finish + if thread is not None and thread.is_alive(): + thread.join(timeout=1.0) + + assert thread is None or not thread.is_alive() + + # ==================== Table-driven Tests ==================== + + @pytest.mark.parametrize( + "test_case", + [ + SubscriptionTestCase( + name="basic_sharded_message", + buffer_size=5, + payload=b"hello sharded world", + expected_messages=[b"hello sharded world"], + description="Basic sharded message publishing and receiving", + ), + SubscriptionTestCase( + name="empty_sharded_message", + buffer_size=5, + payload=b"", + expected_messages=[b""], + description="Empty sharded message handling", + ), + SubscriptionTestCase( + name="large_sharded_message", + buffer_size=5, + payload=b"x" * 10000, + expected_messages=[b"x" * 10000], + description="Large sharded message handling", + ), + SubscriptionTestCase( + name="unicode_sharded_message", + buffer_size=5, + payload="你好世界".encode(), + expected_messages=["你好世界".encode()], + description="Unicode sharded message handling", + ), + ], + ) + def test_sharded_subscription_scenarios(self, test_case: SubscriptionTestCase, mock_pubsub: MagicMock): + """Test various sharded subscription scenarios using table-driven approach.""" + subscription = _RedisShardedSubscription( + pubsub=mock_pubsub, + topic="test-sharded-topic", + ) + + # Simulate receiving sharded message + mock_message = {"type": "smessage", "channel": "test-sharded-topic", "data": test_case.payload} + mock_pubsub.get_sharded_message.return_value = mock_message + + try: + with subscription: + # Wait for message processing + time.sleep(0.1) + + # Collect received messages + received = [] + for msg in subscription: + received.append(msg) + if len(received) >= len(test_case.expected_messages): + break + + assert received == test_case.expected_messages, f"Failed: {test_case.description}" + finally: + subscription.close() + + def test_concurrent_close_and_enqueue(self, started_sharded_subscription: _RedisShardedSubscription): + """Test concurrent close and enqueue operations for sharded subscription.""" + errors = [] + + def close_subscription(): + try: + time.sleep(0.05) # Small delay + started_sharded_subscription.close() + except Exception as e: + errors.append(e) + + def enqueue_messages(): + try: + for i in range(50): + started_sharded_subscription._enqueue_message(f"sharded_msg_{i}".encode()) + time.sleep(0.001) + except Exception as e: + errors.append(e) + + # Start threads + close_thread = threading.Thread(target=close_subscription) + enqueue_thread = threading.Thread(target=enqueue_messages) + + close_thread.start() + enqueue_thread.start() + + # Wait for completion + close_thread.join(timeout=2.0) + enqueue_thread.join(timeout=2.0) + + # Should not have any errors (operations should be safe) + assert len(errors) == 0 + + # ==================== Error Handling Tests ==================== + + def test_iterator_after_close(self, sharded_subscription: _RedisShardedSubscription): + """Test iterator behavior after close for sharded subscription.""" + sharded_subscription.close() + + with pytest.raises(SubscriptionClosedError, match="The Redis sharded subscription is closed"): + iter(sharded_subscription) + + def test_start_after_close(self, sharded_subscription: _RedisShardedSubscription): + """Test start attempts after close for sharded subscription.""" + sharded_subscription.close() + + with pytest.raises(SubscriptionClosedError, match="The Redis sharded subscription is closed"): + sharded_subscription._start_if_needed() + + def test_pubsub_none_operations(self, sharded_subscription: _RedisShardedSubscription): + """Test operations when pubsub is None for sharded subscription.""" + sharded_subscription._pubsub = None + + with pytest.raises(SubscriptionClosedError, match="The Redis sharded subscription has been cleaned up"): + sharded_subscription._start_if_needed() + + # Close should still work + sharded_subscription.close() # Should not raise + + def test_channel_name_variations(self, mock_pubsub: MagicMock): + """Test various sharded channel name formats.""" + channel_names = [ + "simple", + "with-dashes", + "with_underscores", + "with.numbers", + "WITH.UPPERCASE", + "mixed-CASE_name", + "very.long.sharded.channel.name.with.multiple.parts", + ] + + for channel_name in channel_names: + subscription = _RedisShardedSubscription( + pubsub=mock_pubsub, + topic=channel_name, + ) + + subscription._start_if_needed() + mock_pubsub.ssubscribe.assert_called_with(channel_name) + subscription.close() + + def test_receive_on_closed_sharded_subscription(self, sharded_subscription: _RedisShardedSubscription): + """Test receive method on closed sharded subscription.""" + sharded_subscription.close() + + with pytest.raises(SubscriptionClosedError): + sharded_subscription.receive() + + def test_receive_with_timeout(self, started_sharded_subscription: _RedisShardedSubscription): + """Test receive method with timeout for sharded subscription.""" + # Should return None when no message available and timeout expires + result = started_sharded_subscription.receive(timeout=0.01) + assert result is None + + def test_receive_with_message(self, started_sharded_subscription: _RedisShardedSubscription): + """Test receive method when message is available for sharded subscription.""" + test_message = b"test sharded receive" + started_sharded_subscription._queue.put_nowait(test_message) + + result = started_sharded_subscription.receive(timeout=1.0) + assert result == test_message + + +class TestRedisSubscriptionCommon: + """Parameterized tests for common Redis subscription functionality. + + This test suite eliminates duplication by running the same tests against + both regular and sharded subscriptions using pytest.mark.parametrize. + """ + + @pytest.fixture( + params=[ + ("regular", _RedisSubscription), + ("sharded", _RedisShardedSubscription), + ] + ) + def subscription_params(self, request): + """Parameterized fixture providing subscription type and class.""" + return request.param + + @pytest.fixture + def mock_pubsub(self) -> MagicMock: + """Create a mock PubSub instance for testing.""" + pubsub = MagicMock() + # Set up mock methods for both regular and sharded subscriptions + pubsub.subscribe = MagicMock() + pubsub.unsubscribe = MagicMock() + pubsub.ssubscribe = MagicMock() # type: ignore[attr-defined] + pubsub.sunsubscribe = MagicMock() # type: ignore[attr-defined] + pubsub.get_message = MagicMock() + pubsub.get_sharded_message = MagicMock() # type: ignore[attr-defined] + pubsub.close = MagicMock() + return pubsub + + @pytest.fixture + def subscription(self, subscription_params, mock_pubsub: MagicMock): + """Create a subscription instance based on parameterized type.""" + subscription_type, subscription_class = subscription_params + topic_name = f"test-{subscription_type}-topic" + subscription = subscription_class( + pubsub=mock_pubsub, + topic=topic_name, + ) + yield subscription + subscription.close() + + @pytest.fixture + def started_subscription(self, subscription): + """Create a subscription that has been started.""" + subscription._start_if_needed() + return subscription + + # ==================== Initialization Tests ==================== + + def test_subscription_initialization(self, subscription, subscription_params): + """Test that subscription is properly initialized.""" + subscription_type, _ = subscription_params + expected_topic = f"test-{subscription_type}-topic" + + assert subscription._pubsub is not None + assert subscription._topic == expected_topic + assert not subscription._closed.is_set() + assert subscription._dropped_count == 0 + assert subscription._listener_thread is None + assert not subscription._started + + def test_subscription_type(self, subscription, subscription_params): + """Test that subscription returns correct type.""" + subscription_type, _ = subscription_params + assert subscription._get_subscription_type() == subscription_type + + # ==================== Lifecycle Tests ==================== + + def test_start_if_needed_first_call(self, subscription, subscription_params, mock_pubsub: MagicMock): + """Test that _start_if_needed() properly starts subscription on first call.""" + subscription_type, _ = subscription_params + subscription._start_if_needed() + + if subscription_type == "regular": + mock_pubsub.subscribe.assert_called_once() + else: + mock_pubsub.ssubscribe.assert_called_once() + + assert subscription._started is True + assert subscription._listener_thread is not None + + def test_start_if_needed_subsequent_calls(self, started_subscription): + """Test that _start_if_needed() doesn't start subscription on subsequent calls.""" + original_thread = started_subscription._listener_thread + started_subscription._start_if_needed() + + # Should not create new thread + assert started_subscription._listener_thread is original_thread + + def test_context_manager_usage(self, subscription, subscription_params, mock_pubsub: MagicMock): + """Test that subscription works as context manager.""" + subscription_type, _ = subscription_params + expected_topic = f"test-{subscription_type}-topic" + + with subscription as sub: + assert sub is subscription + assert subscription._started is True + if subscription_type == "regular": + mock_pubsub.subscribe.assert_called_with(expected_topic) + else: + mock_pubsub.ssubscribe.assert_called_with(expected_topic) + + def test_close_idempotent(self, subscription, subscription_params, mock_pubsub: MagicMock): + """Test that close() is idempotent and can be called multiple times.""" + subscription_type, _ = subscription_params + subscription._start_if_needed() + + # Close multiple times + subscription.close() + subscription.close() + subscription.close() + + # Should only cleanup once + if subscription_type == "regular": + mock_pubsub.unsubscribe.assert_called_once() + else: + mock_pubsub.sunsubscribe.assert_called_once() + mock_pubsub.close.assert_called_once() + assert subscription._pubsub is None + assert subscription._closed.is_set() + + # ==================== Message Processing Tests ==================== + + def test_message_iterator_with_messages(self, started_subscription): + """Test message iterator behavior with messages in queue.""" + test_messages = [b"msg1", b"msg2", b"msg3"] + + # Add messages to queue + for msg in test_messages: + started_subscription._queue.put_nowait(msg) + + # Iterate through messages + iterator = iter(started_subscription) + received_messages = [] + + for msg in iterator: + received_messages.append(msg) + if len(received_messages) >= len(test_messages): + break + + assert received_messages == test_messages + + def test_message_iterator_when_closed(self, subscription, subscription_params): + """Test that iterator raises error when subscription is closed.""" + subscription_type, _ = subscription_params + subscription.close() + + with pytest.raises(SubscriptionClosedError, match=f"The Redis {subscription_type} subscription is closed"): + iter(subscription) + + # ==================== Message Enqueue Tests ==================== + + def test_enqueue_message_success(self, started_subscription): + """Test successful message enqueue.""" + payload = b"test message" + + started_subscription._enqueue_message(payload) + + assert started_subscription._queue.qsize() == 1 + assert started_subscription._queue.get_nowait() == payload + + def test_enqueue_message_when_closed(self, subscription): + """Test message enqueue when subscription is closed.""" + subscription.close() + payload = b"test message" + + # Should not raise exception, but should not enqueue + subscription._enqueue_message(payload) + + assert subscription._queue.empty() + + def test_enqueue_message_with_full_queue(self, started_subscription): + """Test message enqueue with full queue (dropping behavior).""" + # Fill the queue + for i in range(started_subscription._queue.maxsize): + started_subscription._queue.put_nowait(f"old_msg_{i}".encode()) + + # Try to enqueue new message (should drop oldest) + new_message = b"new_message" + started_subscription._enqueue_message(new_message) + + # Should have dropped one message and added new one + assert started_subscription._dropped_count == 1 + + # New message should be in queue + messages = [] + while not started_subscription._queue.empty(): + messages.append(started_subscription._queue.get_nowait()) + + assert new_message in messages + + # ==================== Message Type Tests ==================== + + def test_get_message_type(self, subscription, subscription_params): + """Test that subscription returns correct message type.""" + subscription_type, _ = subscription_params + expected_type = "message" if subscription_type == "regular" else "smessage" + assert subscription._get_message_type() == expected_type + + # ==================== Error Handling Tests ==================== + + def test_start_if_needed_when_closed(self, subscription, subscription_params): + """Test that _start_if_needed() raises error when subscription is closed.""" + subscription_type, _ = subscription_params + subscription.close() + + with pytest.raises(SubscriptionClosedError, match=f"The Redis {subscription_type} subscription is closed"): + subscription._start_if_needed() + + def test_start_if_needed_when_cleaned_up(self, subscription, subscription_params): + """Test that _start_if_needed() raises error when pubsub is None.""" + subscription_type, _ = subscription_params + subscription._pubsub = None + + with pytest.raises( + SubscriptionClosedError, match=f"The Redis {subscription_type} subscription has been cleaned up" + ): + subscription._start_if_needed() + + def test_iterator_after_close(self, subscription, subscription_params): + """Test iterator behavior after close.""" + subscription_type, _ = subscription_params + subscription.close() + + with pytest.raises(SubscriptionClosedError, match=f"The Redis {subscription_type} subscription is closed"): + iter(subscription) + + def test_start_after_close(self, subscription, subscription_params): + """Test start attempts after close.""" + subscription_type, _ = subscription_params + subscription.close() + + with pytest.raises(SubscriptionClosedError, match=f"The Redis {subscription_type} subscription is closed"): + subscription._start_if_needed() + + def test_pubsub_none_operations(self, subscription, subscription_params): + """Test operations when pubsub is None.""" + subscription_type, _ = subscription_params + subscription._pubsub = None + + with pytest.raises( + SubscriptionClosedError, match=f"The Redis {subscription_type} subscription has been cleaned up" + ): + subscription._start_if_needed() + + # Close should still work + subscription.close() # Should not raise + + def test_receive_on_closed_subscription(self, subscription, subscription_params): + """Test receive method on closed subscription.""" + subscription.close() + + with pytest.raises(SubscriptionClosedError): + subscription.receive() + + # ==================== Table-driven Tests ==================== + + @pytest.mark.parametrize( + "test_case", + [ + SubscriptionTestCase( + name="basic_message", + buffer_size=5, + payload=b"hello world", + expected_messages=[b"hello world"], + description="Basic message publishing and receiving", + ), + SubscriptionTestCase( + name="empty_message", + buffer_size=5, + payload=b"", + expected_messages=[b""], + description="Empty message handling", + ), + SubscriptionTestCase( + name="large_message", + buffer_size=5, + payload=b"x" * 10000, + expected_messages=[b"x" * 10000], + description="Large message handling", + ), + SubscriptionTestCase( + name="unicode_message", + buffer_size=5, + payload="你好世界".encode(), + expected_messages=["你好世界".encode()], + description="Unicode message handling", + ), + ], + ) + def test_subscription_scenarios( + self, test_case: SubscriptionTestCase, subscription, subscription_params, mock_pubsub: MagicMock + ): + """Test various subscription scenarios using table-driven approach.""" + subscription_type, _ = subscription_params + expected_topic = f"test-{subscription_type}-topic" + expected_message_type = "message" if subscription_type == "regular" else "smessage" + + # Simulate receiving message + mock_message = {"type": expected_message_type, "channel": expected_topic, "data": test_case.payload} + + if subscription_type == "regular": + mock_pubsub.get_message.return_value = mock_message + else: + mock_pubsub.get_sharded_message.return_value = mock_message + + try: + with subscription: + # Wait for message processing + time.sleep(0.1) + + # Collect received messages + received = [] + for msg in subscription: + received.append(msg) + if len(received) >= len(test_case.expected_messages): + break + + assert received == test_case.expected_messages, f"Failed: {test_case.description}" + finally: + subscription.close() + + # ==================== Concurrency Tests ==================== + + def test_concurrent_close_and_enqueue(self, started_subscription): + """Test concurrent close and enqueue operations.""" + errors = [] + + def close_subscription(): + try: + time.sleep(0.05) # Small delay + started_subscription.close() + except Exception as e: + errors.append(e) + + def enqueue_messages(): + try: + for i in range(50): + started_subscription._enqueue_message(f"msg_{i}".encode()) + time.sleep(0.001) + except Exception as e: + errors.append(e) + + # Start threads + close_thread = threading.Thread(target=close_subscription) + enqueue_thread = threading.Thread(target=enqueue_messages) + + close_thread.start() + enqueue_thread.start() + + # Wait for completion + close_thread.join(timeout=2.0) + enqueue_thread.join(timeout=2.0) + + # Should not have any errors (operations should be safe) + assert len(errors) == 0 diff --git a/api/tests/unit_tests/libs/test_datetime_utils.py b/api/tests/unit_tests/libs/test_datetime_utils.py index e914ca4816..84f5b63fbf 100644 --- a/api/tests/unit_tests/libs/test_datetime_utils.py +++ b/api/tests/unit_tests/libs/test_datetime_utils.py @@ -1,8 +1,10 @@ import datetime +from unittest.mock import patch import pytest +import pytz -from libs.datetime_utils import naive_utc_now +from libs.datetime_utils import naive_utc_now, parse_time_range def test_naive_utc_now(monkeypatch: pytest.MonkeyPatch): @@ -20,3 +22,247 @@ def test_naive_utc_now(monkeypatch: pytest.MonkeyPatch): naive_time = naive_datetime.time() utc_time = tz_aware_utc_now.time() assert naive_time == utc_time + + +class TestParseTimeRange: + """Test cases for parse_time_range function.""" + + def test_parse_time_range_basic(self): + """Test basic time range parsing.""" + start, end = parse_time_range("2024-01-01 10:00", "2024-01-01 18:00", "UTC") + + assert start is not None + assert end is not None + assert start < end + assert start.tzinfo == pytz.UTC + assert end.tzinfo == pytz.UTC + + def test_parse_time_range_start_only(self): + """Test parsing with only start time.""" + start, end = parse_time_range("2024-01-01 10:00", None, "UTC") + + assert start is not None + assert end is None + assert start.tzinfo == pytz.UTC + + def test_parse_time_range_end_only(self): + """Test parsing with only end time.""" + start, end = parse_time_range(None, "2024-01-01 18:00", "UTC") + + assert start is None + assert end is not None + assert end.tzinfo == pytz.UTC + + def test_parse_time_range_both_none(self): + """Test parsing with both times None.""" + start, end = parse_time_range(None, None, "UTC") + + assert start is None + assert end is None + + def test_parse_time_range_different_timezones(self): + """Test parsing with different timezones.""" + # Test with US/Eastern timezone + start, end = parse_time_range("2024-01-01 10:00", "2024-01-01 18:00", "US/Eastern") + + assert start is not None + assert end is not None + assert start.tzinfo == pytz.UTC + assert end.tzinfo == pytz.UTC + # Verify the times are correctly converted to UTC + assert start.hour == 15 # 10 AM EST = 3 PM UTC (in January) + assert end.hour == 23 # 6 PM EST = 11 PM UTC (in January) + + def test_parse_time_range_invalid_start_format(self): + """Test parsing with invalid start time format.""" + with pytest.raises(ValueError, match="time data.*does not match format"): + parse_time_range("invalid-date", "2024-01-01 18:00", "UTC") + + def test_parse_time_range_invalid_end_format(self): + """Test parsing with invalid end time format.""" + with pytest.raises(ValueError, match="time data.*does not match format"): + parse_time_range("2024-01-01 10:00", "invalid-date", "UTC") + + def test_parse_time_range_invalid_timezone(self): + """Test parsing with invalid timezone.""" + with pytest.raises(pytz.exceptions.UnknownTimeZoneError): + parse_time_range("2024-01-01 10:00", "2024-01-01 18:00", "Invalid/Timezone") + + def test_parse_time_range_start_after_end(self): + """Test parsing with start time after end time.""" + with pytest.raises(ValueError, match="start must be earlier than or equal to end"): + parse_time_range("2024-01-01 18:00", "2024-01-01 10:00", "UTC") + + def test_parse_time_range_start_equals_end(self): + """Test parsing with start time equal to end time.""" + start, end = parse_time_range("2024-01-01 10:00", "2024-01-01 10:00", "UTC") + + assert start is not None + assert end is not None + assert start == end + + def test_parse_time_range_dst_ambiguous_time(self): + """Test parsing during DST ambiguous time (fall back).""" + # This test simulates DST fall back where 2:30 AM occurs twice + with patch("pytz.timezone") as mock_timezone: + # Mock timezone that raises AmbiguousTimeError + mock_tz = mock_timezone.return_value + + # Create a mock datetime object for the return value + mock_dt = datetime.datetime(2024, 1, 1, 10, 0, 0) + mock_utc_dt = mock_dt.replace(tzinfo=pytz.UTC) + + # Create a proper mock for the localized datetime + from unittest.mock import MagicMock + + mock_localized_dt = MagicMock() + mock_localized_dt.astimezone.return_value = mock_utc_dt + + # Set up side effects: first call raises exception, second call succeeds + mock_tz.localize.side_effect = [ + pytz.AmbiguousTimeError("Ambiguous time"), # First call for start + mock_localized_dt, # Second call for start (with is_dst=False) + pytz.AmbiguousTimeError("Ambiguous time"), # First call for end + mock_localized_dt, # Second call for end (with is_dst=False) + ] + + start, end = parse_time_range("2024-01-01 10:00", "2024-01-01 18:00", "US/Eastern") + + # Should use is_dst=False for ambiguous times + assert mock_tz.localize.call_count == 4 # 2 calls per time (first fails, second succeeds) + assert start is not None + assert end is not None + + def test_parse_time_range_dst_nonexistent_time(self): + """Test parsing during DST nonexistent time (spring forward).""" + with patch("pytz.timezone") as mock_timezone: + # Mock timezone that raises NonExistentTimeError + mock_tz = mock_timezone.return_value + + # Create a mock datetime object for the return value + mock_dt = datetime.datetime(2024, 1, 1, 10, 0, 0) + mock_utc_dt = mock_dt.replace(tzinfo=pytz.UTC) + + # Create a proper mock for the localized datetime + from unittest.mock import MagicMock + + mock_localized_dt = MagicMock() + mock_localized_dt.astimezone.return_value = mock_utc_dt + + # Set up side effects: first call raises exception, second call succeeds + mock_tz.localize.side_effect = [ + pytz.NonExistentTimeError("Non-existent time"), # First call for start + mock_localized_dt, # Second call for start (with adjusted time) + pytz.NonExistentTimeError("Non-existent time"), # First call for end + mock_localized_dt, # Second call for end (with adjusted time) + ] + + start, end = parse_time_range("2024-01-01 10:00", "2024-01-01 18:00", "US/Eastern") + + # Should adjust time forward by 1 hour for nonexistent times + assert mock_tz.localize.call_count == 4 # 2 calls per time (first fails, second succeeds) + assert start is not None + assert end is not None + + def test_parse_time_range_edge_cases(self): + """Test edge cases for time parsing.""" + # Test with midnight times + start, end = parse_time_range("2024-01-01 00:00", "2024-01-01 23:59", "UTC") + assert start is not None + assert end is not None + assert start.hour == 0 + assert start.minute == 0 + assert end.hour == 23 + assert end.minute == 59 + + def test_parse_time_range_different_dates(self): + """Test parsing with different dates.""" + start, end = parse_time_range("2024-01-01 10:00", "2024-01-02 10:00", "UTC") + assert start is not None + assert end is not None + assert start.date() != end.date() + assert (end - start).days == 1 + + def test_parse_time_range_seconds_handling(self): + """Test that seconds are properly set to 0.""" + start, end = parse_time_range("2024-01-01 10:30", "2024-01-01 18:45", "UTC") + assert start is not None + assert end is not None + assert start.second == 0 + assert end.second == 0 + + def test_parse_time_range_timezone_conversion_accuracy(self): + """Test accurate timezone conversion.""" + # Test with a known timezone conversion + start, end = parse_time_range("2024-01-01 12:00", "2024-01-01 12:00", "Asia/Tokyo") + + assert start is not None + assert end is not None + assert start.tzinfo == pytz.UTC + assert end.tzinfo == pytz.UTC + # Tokyo is UTC+9, so 12:00 JST = 03:00 UTC + assert start.hour == 3 + assert end.hour == 3 + + def test_parse_time_range_summer_time(self): + """Test parsing during summer time (DST).""" + # Test with US/Eastern during summer (EDT = UTC-4) + start, end = parse_time_range("2024-07-01 12:00", "2024-07-01 12:00", "US/Eastern") + + assert start is not None + assert end is not None + assert start.tzinfo == pytz.UTC + assert end.tzinfo == pytz.UTC + # 12:00 EDT = 16:00 UTC + assert start.hour == 16 + assert end.hour == 16 + + def test_parse_time_range_winter_time(self): + """Test parsing during winter time (standard time).""" + # Test with US/Eastern during winter (EST = UTC-5) + start, end = parse_time_range("2024-01-01 12:00", "2024-01-01 12:00", "US/Eastern") + + assert start is not None + assert end is not None + assert start.tzinfo == pytz.UTC + assert end.tzinfo == pytz.UTC + # 12:00 EST = 17:00 UTC + assert start.hour == 17 + assert end.hour == 17 + + def test_parse_time_range_empty_strings(self): + """Test parsing with empty strings.""" + # Empty strings are treated as None, so they should not raise errors + start, end = parse_time_range("", "2024-01-01 18:00", "UTC") + assert start is None + assert end is not None + + start, end = parse_time_range("2024-01-01 10:00", "", "UTC") + assert start is not None + assert end is None + + def test_parse_time_range_malformed_datetime(self): + """Test parsing with malformed datetime strings.""" + with pytest.raises(ValueError, match="time data.*does not match format"): + parse_time_range("2024-13-01 10:00", "2024-01-01 18:00", "UTC") + + with pytest.raises(ValueError, match="time data.*does not match format"): + parse_time_range("2024-01-01 10:00", "2024-01-32 18:00", "UTC") + + def test_parse_time_range_very_long_time_range(self): + """Test parsing with very long time range.""" + start, end = parse_time_range("2020-01-01 00:00", "2030-12-31 23:59", "UTC") + + assert start is not None + assert end is not None + assert start < end + assert (end - start).days > 3000 # More than 8 years + + def test_parse_time_range_negative_timezone(self): + """Test parsing with negative timezone offset.""" + start, end = parse_time_range("2024-01-01 12:00", "2024-01-01 12:00", "America/New_York") + + assert start is not None + assert end is not None + assert start.tzinfo == pytz.UTC + assert end.tzinfo == pytz.UTC diff --git a/api/tests/unit_tests/libs/test_token.py b/api/tests/unit_tests/libs/test_token.py index a611d3eb0e..6a65b5faa0 100644 --- a/api/tests/unit_tests/libs/test_token.py +++ b/api/tests/unit_tests/libs/test_token.py @@ -1,5 +1,10 @@ +from unittest.mock import MagicMock + +from werkzeug.wrappers import Response + from constants import COOKIE_NAME_ACCESS_TOKEN, COOKIE_NAME_WEBAPP_ACCESS_TOKEN -from libs.token import extract_access_token, extract_webapp_access_token +from libs import token +from libs.token import extract_access_token, extract_webapp_access_token, set_csrf_token_to_cookie class MockRequest: @@ -23,3 +28,35 @@ def test_extract_access_token(): for request, expected_console, expected_webapp in test_cases: assert extract_access_token(request) == expected_console # pyright: ignore[reportArgumentType] assert extract_webapp_access_token(request) == expected_webapp # pyright: ignore[reportArgumentType] + + +def test_real_cookie_name_uses_host_prefix_without_domain(monkeypatch): + monkeypatch.setattr(token.dify_config, "CONSOLE_WEB_URL", "https://console.example.com", raising=False) + monkeypatch.setattr(token.dify_config, "CONSOLE_API_URL", "https://api.example.com", raising=False) + monkeypatch.setattr(token.dify_config, "COOKIE_DOMAIN", "", raising=False) + + assert token._real_cookie_name("csrf_token") == "__Host-csrf_token" + + +def test_real_cookie_name_without_host_prefix_when_domain_present(monkeypatch): + monkeypatch.setattr(token.dify_config, "CONSOLE_WEB_URL", "https://console.example.com", raising=False) + monkeypatch.setattr(token.dify_config, "CONSOLE_API_URL", "https://api.example.com", raising=False) + monkeypatch.setattr(token.dify_config, "COOKIE_DOMAIN", ".example.com", raising=False) + + assert token._real_cookie_name("csrf_token") == "csrf_token" + + +def test_set_csrf_cookie_includes_domain_when_configured(monkeypatch): + monkeypatch.setattr(token.dify_config, "CONSOLE_WEB_URL", "https://console.example.com", raising=False) + monkeypatch.setattr(token.dify_config, "CONSOLE_API_URL", "https://api.example.com", raising=False) + monkeypatch.setattr(token.dify_config, "COOKIE_DOMAIN", ".example.com", raising=False) + + response = Response() + request = MagicMock() + + set_csrf_token_to_cookie(request, response, "abc123") + + cookies = response.headers.getlist("Set-Cookie") + assert any("csrf_token=abc123" in c for c in cookies) + assert any("Domain=example.com" in c for c in cookies) + assert all("__Host-" not in c for c in cookies) diff --git a/api/tests/unit_tests/models/test_account_models.py b/api/tests/unit_tests/models/test_account_models.py new file mode 100644 index 0000000000..cc311d447f --- /dev/null +++ b/api/tests/unit_tests/models/test_account_models.py @@ -0,0 +1,886 @@ +""" +Comprehensive unit tests for Account model. + +This test suite covers: +- Account model validation +- Password hashing/verification +- Account status transitions +- Tenant relationship integrity +- Email uniqueness constraints +""" + +import base64 +import secrets +from datetime import UTC, datetime +from unittest.mock import MagicMock, patch +from uuid import uuid4 + +import pytest + +from libs.password import compare_password, hash_password, valid_password +from models.account import Account, AccountStatus, Tenant, TenantAccountJoin, TenantAccountRole + + +class TestAccountModelValidation: + """Test suite for Account model validation and basic operations.""" + + def test_account_creation_with_required_fields(self): + """Test creating an account with all required fields.""" + # Arrange & Act + account = Account( + name="Test User", + email="test@example.com", + password="hashed_password", + password_salt="salt_value", + ) + + # Assert + assert account.name == "Test User" + assert account.email == "test@example.com" + assert account.password == "hashed_password" + assert account.password_salt == "salt_value" + assert account.status == "active" # Default value + + def test_account_creation_with_optional_fields(self): + """Test creating an account with optional fields.""" + # Arrange & Act + account = Account( + name="Test User", + email="test@example.com", + avatar="https://example.com/avatar.png", + interface_language="en-US", + interface_theme="dark", + timezone="America/New_York", + ) + + # Assert + assert account.avatar == "https://example.com/avatar.png" + assert account.interface_language == "en-US" + assert account.interface_theme == "dark" + assert account.timezone == "America/New_York" + + def test_account_creation_without_password(self): + """Test creating an account without password (for invite-based registration).""" + # Arrange & Act + account = Account( + name="Invited User", + email="invited@example.com", + ) + + # Assert + assert account.password is None + assert account.password_salt is None + assert not account.is_password_set + + def test_account_is_password_set_property(self): + """Test the is_password_set property.""" + # Arrange + account_with_password = Account( + name="User With Password", + email="withpass@example.com", + password="hashed_password", + ) + account_without_password = Account( + name="User Without Password", + email="nopass@example.com", + ) + + # Assert + assert account_with_password.is_password_set + assert not account_without_password.is_password_set + + def test_account_default_status(self): + """Test that account has default status of 'active'.""" + # Arrange & Act + account = Account( + name="Test User", + email="test@example.com", + ) + + # Assert + assert account.status == "active" + + def test_account_get_status_method(self): + """Test the get_status method returns AccountStatus enum.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + status="pending", + ) + + # Act + status = account.get_status() + + # Assert + assert status == AccountStatus.PENDING + assert isinstance(status, AccountStatus) + + +class TestPasswordHashingAndVerification: + """Test suite for password hashing and verification functionality.""" + + def test_password_hashing_produces_consistent_result(self): + """Test that hashing the same password with the same salt produces the same result.""" + # Arrange + password = "TestPassword123" + salt = secrets.token_bytes(16) + + # Act + hash1 = hash_password(password, salt) + hash2 = hash_password(password, salt) + + # Assert + assert hash1 == hash2 + + def test_password_hashing_different_salts_produce_different_hashes(self): + """Test that different salts produce different hashes for the same password.""" + # Arrange + password = "TestPassword123" + salt1 = secrets.token_bytes(16) + salt2 = secrets.token_bytes(16) + + # Act + hash1 = hash_password(password, salt1) + hash2 = hash_password(password, salt2) + + # Assert + assert hash1 != hash2 + + def test_password_comparison_success(self): + """Test successful password comparison.""" + # Arrange + password = "TestPassword123" + salt = secrets.token_bytes(16) + password_hashed = hash_password(password, salt) + + # Encode to base64 as done in the application + base64_salt = base64.b64encode(salt).decode() + base64_password_hashed = base64.b64encode(password_hashed).decode() + + # Act + result = compare_password(password, base64_password_hashed, base64_salt) + + # Assert + assert result is True + + def test_password_comparison_failure(self): + """Test password comparison with wrong password.""" + # Arrange + correct_password = "TestPassword123" + wrong_password = "WrongPassword456" + salt = secrets.token_bytes(16) + password_hashed = hash_password(correct_password, salt) + + # Encode to base64 + base64_salt = base64.b64encode(salt).decode() + base64_password_hashed = base64.b64encode(password_hashed).decode() + + # Act + result = compare_password(wrong_password, base64_password_hashed, base64_salt) + + # Assert + assert result is False + + def test_valid_password_with_correct_format(self): + """Test password validation with correct format.""" + # Arrange + valid_passwords = [ + "Password123", + "Test1234", + "MySecure1Pass", + "abcdefgh1", + ] + + # Act & Assert + for password in valid_passwords: + result = valid_password(password) + assert result == password + + def test_valid_password_with_incorrect_format(self): + """Test password validation with incorrect format.""" + # Arrange + invalid_passwords = [ + "short1", # Too short + "NoNumbers", # No numbers + "12345678", # No letters + "Pass1", # Too short + ] + + # Act & Assert + for password in invalid_passwords: + with pytest.raises(ValueError, match="Password must contain letters and numbers"): + valid_password(password) + + def test_password_hashing_integration_with_account(self): + """Test password hashing integration with Account model.""" + # Arrange + password = "SecurePass123" + salt = secrets.token_bytes(16) + base64_salt = base64.b64encode(salt).decode() + password_hashed = hash_password(password, salt) + base64_password_hashed = base64.b64encode(password_hashed).decode() + + # Act + account = Account( + name="Test User", + email="test@example.com", + password=base64_password_hashed, + password_salt=base64_salt, + ) + + # Assert + assert account.is_password_set + assert compare_password(password, account.password, account.password_salt) + + +class TestAccountStatusTransitions: + """Test suite for account status transitions.""" + + def test_account_status_enum_values(self): + """Test that AccountStatus enum has all expected values.""" + # Assert + assert AccountStatus.PENDING == "pending" + assert AccountStatus.UNINITIALIZED == "uninitialized" + assert AccountStatus.ACTIVE == "active" + assert AccountStatus.BANNED == "banned" + assert AccountStatus.CLOSED == "closed" + + def test_account_status_transition_pending_to_active(self): + """Test transitioning account status from pending to active.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + status=AccountStatus.PENDING, + ) + + # Act + account.status = AccountStatus.ACTIVE + account.initialized_at = datetime.now(UTC) + + # Assert + assert account.get_status() == AccountStatus.ACTIVE + assert account.initialized_at is not None + + def test_account_status_transition_active_to_banned(self): + """Test transitioning account status from active to banned.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + status=AccountStatus.ACTIVE, + ) + + # Act + account.status = AccountStatus.BANNED + + # Assert + assert account.get_status() == AccountStatus.BANNED + + def test_account_status_transition_active_to_closed(self): + """Test transitioning account status from active to closed.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + status=AccountStatus.ACTIVE, + ) + + # Act + account.status = AccountStatus.CLOSED + + # Assert + assert account.get_status() == AccountStatus.CLOSED + + def test_account_status_uninitialized(self): + """Test account with uninitialized status.""" + # Arrange & Act + account = Account( + name="Test User", + email="test@example.com", + status=AccountStatus.UNINITIALIZED, + ) + + # Assert + assert account.get_status() == AccountStatus.UNINITIALIZED + assert account.initialized_at is None + + +class TestTenantRelationshipIntegrity: + """Test suite for tenant relationship integrity.""" + + @patch("models.account.db") + def test_account_current_tenant_property(self, mock_db): + """Test the current_tenant property getter.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + ) + account.id = str(uuid4()) + + tenant = Tenant(name="Test Tenant") + tenant.id = str(uuid4()) + + account._current_tenant = tenant + + # Act + result = account.current_tenant + + # Assert + assert result == tenant + + @patch("models.account.Session") + @patch("models.account.db") + def test_account_current_tenant_setter_with_valid_tenant(self, mock_db, mock_session_class): + """Test setting current_tenant with a valid tenant relationship.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + ) + account.id = str(uuid4()) + + tenant = Tenant(name="Test Tenant") + tenant.id = str(uuid4()) + + # Mock the session and queries + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + + # Mock TenantAccountJoin query result + tenant_join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER, + ) + mock_session.scalar.return_value = tenant_join + + # Mock Tenant query result + mock_session.scalars.return_value.one.return_value = tenant + + # Act + account.current_tenant = tenant + + # Assert + assert account._current_tenant == tenant + assert account.role == TenantAccountRole.OWNER + + @patch("models.account.Session") + @patch("models.account.db") + def test_account_current_tenant_setter_without_relationship(self, mock_db, mock_session_class): + """Test setting current_tenant when no relationship exists.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + ) + account.id = str(uuid4()) + + tenant = Tenant(name="Test Tenant") + tenant.id = str(uuid4()) + + # Mock the session and queries + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + + # Mock no TenantAccountJoin found + mock_session.scalar.return_value = None + + # Act + account.current_tenant = tenant + + # Assert + assert account._current_tenant is None + + def test_account_current_tenant_id_property(self): + """Test the current_tenant_id property.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + ) + tenant = Tenant(name="Test Tenant") + tenant.id = str(uuid4()) + + # Act - with tenant + account._current_tenant = tenant + tenant_id = account.current_tenant_id + + # Assert + assert tenant_id == tenant.id + + # Act - without tenant + account._current_tenant = None + tenant_id_none = account.current_tenant_id + + # Assert + assert tenant_id_none is None + + @patch("models.account.Session") + @patch("models.account.db") + def test_account_set_tenant_id_method(self, mock_db, mock_session_class): + """Test the set_tenant_id method.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + ) + account.id = str(uuid4()) + + tenant = Tenant(name="Test Tenant") + tenant.id = str(uuid4()) + + tenant_join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.ADMIN, + ) + + # Mock the session and queries + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.execute.return_value.first.return_value = (tenant, tenant_join) + + # Act + account.set_tenant_id(tenant.id) + + # Assert + assert account._current_tenant == tenant + assert account.role == TenantAccountRole.ADMIN + + @patch("models.account.Session") + @patch("models.account.db") + def test_account_set_tenant_id_with_no_relationship(self, mock_db, mock_session_class): + """Test set_tenant_id when no relationship exists.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + ) + account.id = str(uuid4()) + tenant_id = str(uuid4()) + + # Mock the session and queries + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.execute.return_value.first.return_value = None + + # Act + account.set_tenant_id(tenant_id) + + # Assert - should not set tenant when no relationship exists + # The method returns early without setting _current_tenant + + +class TestAccountRolePermissions: + """Test suite for account role permissions.""" + + def test_is_admin_or_owner_with_admin_role(self): + """Test is_admin_or_owner property with admin role.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + ) + account.role = TenantAccountRole.ADMIN + + # Act & Assert + assert account.is_admin_or_owner + + def test_is_admin_or_owner_with_owner_role(self): + """Test is_admin_or_owner property with owner role.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + ) + account.role = TenantAccountRole.OWNER + + # Act & Assert + assert account.is_admin_or_owner + + def test_is_admin_or_owner_with_normal_role(self): + """Test is_admin_or_owner property with normal role.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + ) + account.role = TenantAccountRole.NORMAL + + # Act & Assert + assert not account.is_admin_or_owner + + def test_is_admin_property(self): + """Test is_admin property.""" + # Arrange + admin_account = Account(name="Admin", email="admin@example.com") + admin_account.role = TenantAccountRole.ADMIN + + owner_account = Account(name="Owner", email="owner@example.com") + owner_account.role = TenantAccountRole.OWNER + + # Act & Assert + assert admin_account.is_admin + assert not owner_account.is_admin + + def test_has_edit_permission_with_editing_roles(self): + """Test has_edit_permission property with roles that have edit permission.""" + # Arrange + roles_with_edit = [ + TenantAccountRole.OWNER, + TenantAccountRole.ADMIN, + TenantAccountRole.EDITOR, + ] + + for role in roles_with_edit: + account = Account(name="Test User", email=f"test_{role}@example.com") + account.role = role + + # Act & Assert + assert account.has_edit_permission, f"Role {role} should have edit permission" + + def test_has_edit_permission_without_editing_roles(self): + """Test has_edit_permission property with roles that don't have edit permission.""" + # Arrange + roles_without_edit = [ + TenantAccountRole.NORMAL, + TenantAccountRole.DATASET_OPERATOR, + ] + + for role in roles_without_edit: + account = Account(name="Test User", email=f"test_{role}@example.com") + account.role = role + + # Act & Assert + assert not account.has_edit_permission, f"Role {role} should not have edit permission" + + def test_is_dataset_editor_property(self): + """Test is_dataset_editor property.""" + # Arrange + dataset_roles = [ + TenantAccountRole.OWNER, + TenantAccountRole.ADMIN, + TenantAccountRole.EDITOR, + TenantAccountRole.DATASET_OPERATOR, + ] + + for role in dataset_roles: + account = Account(name="Test User", email=f"test_{role}@example.com") + account.role = role + + # Act & Assert + assert account.is_dataset_editor, f"Role {role} should have dataset edit permission" + + # Test normal role doesn't have dataset edit permission + normal_account = Account(name="Normal User", email="normal@example.com") + normal_account.role = TenantAccountRole.NORMAL + assert not normal_account.is_dataset_editor + + def test_is_dataset_operator_property(self): + """Test is_dataset_operator property.""" + # Arrange + dataset_operator = Account(name="Dataset Operator", email="operator@example.com") + dataset_operator.role = TenantAccountRole.DATASET_OPERATOR + + normal_account = Account(name="Normal User", email="normal@example.com") + normal_account.role = TenantAccountRole.NORMAL + + # Act & Assert + assert dataset_operator.is_dataset_operator + assert not normal_account.is_dataset_operator + + def test_current_role_property(self): + """Test current_role property.""" + # Arrange + account = Account(name="Test User", email="test@example.com") + account.role = TenantAccountRole.EDITOR + + # Act + current_role = account.current_role + + # Assert + assert current_role == TenantAccountRole.EDITOR + + +class TestAccountGetByOpenId: + """Test suite for get_by_openid class method.""" + + @patch("models.account.db") + def test_get_by_openid_success(self, mock_db): + """Test successful retrieval of account by OpenID.""" + # Arrange + provider = "google" + open_id = "google_user_123" + account_id = str(uuid4()) + + mock_account_integrate = MagicMock() + mock_account_integrate.account_id = account_id + + mock_account = Account(name="Test User", email="test@example.com") + mock_account.id = account_id + + # Mock the query chain + mock_query = MagicMock() + mock_where = MagicMock() + mock_where.one_or_none.return_value = mock_account_integrate + mock_query.where.return_value = mock_where + mock_db.session.query.return_value = mock_query + + # Mock the second query for account + mock_account_query = MagicMock() + mock_account_where = MagicMock() + mock_account_where.one_or_none.return_value = mock_account + mock_account_query.where.return_value = mock_account_where + + # Setup query to return different results based on model + def query_side_effect(model): + if model.__name__ == "AccountIntegrate": + return mock_query + elif model.__name__ == "Account": + return mock_account_query + return MagicMock() + + mock_db.session.query.side_effect = query_side_effect + + # Act + result = Account.get_by_openid(provider, open_id) + + # Assert + assert result == mock_account + + @patch("models.account.db") + def test_get_by_openid_not_found(self, mock_db): + """Test get_by_openid when account integrate doesn't exist.""" + # Arrange + provider = "github" + open_id = "github_user_456" + + # Mock the query chain to return None + mock_query = MagicMock() + mock_where = MagicMock() + mock_where.one_or_none.return_value = None + mock_query.where.return_value = mock_where + mock_db.session.query.return_value = mock_query + + # Act + result = Account.get_by_openid(provider, open_id) + + # Assert + assert result is None + + +class TestTenantAccountJoinModel: + """Test suite for TenantAccountJoin model.""" + + def test_tenant_account_join_creation(self): + """Test creating a TenantAccountJoin record.""" + # Arrange + tenant_id = str(uuid4()) + account_id = str(uuid4()) + + # Act + join = TenantAccountJoin( + tenant_id=tenant_id, + account_id=account_id, + role=TenantAccountRole.NORMAL, + current=True, + ) + + # Assert + assert join.tenant_id == tenant_id + assert join.account_id == account_id + assert join.role == TenantAccountRole.NORMAL + assert join.current is True + + def test_tenant_account_join_default_values(self): + """Test default values for TenantAccountJoin.""" + # Arrange + tenant_id = str(uuid4()) + account_id = str(uuid4()) + + # Act + join = TenantAccountJoin( + tenant_id=tenant_id, + account_id=account_id, + ) + + # Assert + assert join.current is False # Default value + assert join.role == "normal" # Default value + assert join.invited_by is None # Default value + + def test_tenant_account_join_with_invited_by(self): + """Test TenantAccountJoin with invited_by field.""" + # Arrange + tenant_id = str(uuid4()) + account_id = str(uuid4()) + inviter_id = str(uuid4()) + + # Act + join = TenantAccountJoin( + tenant_id=tenant_id, + account_id=account_id, + role=TenantAccountRole.EDITOR, + invited_by=inviter_id, + ) + + # Assert + assert join.invited_by == inviter_id + + +class TestTenantModel: + """Test suite for Tenant model.""" + + def test_tenant_creation(self): + """Test creating a Tenant.""" + # Arrange & Act + tenant = Tenant(name="Test Workspace") + + # Assert + assert tenant.name == "Test Workspace" + assert tenant.status == "normal" # Default value + assert tenant.plan == "basic" # Default value + + def test_tenant_custom_config_dict_property(self): + """Test custom_config_dict property getter.""" + # Arrange + tenant = Tenant(name="Test Workspace") + config = {"feature1": True, "feature2": "value"} + tenant.custom_config = '{"feature1": true, "feature2": "value"}' + + # Act + result = tenant.custom_config_dict + + # Assert + assert result["feature1"] is True + assert result["feature2"] == "value" + + def test_tenant_custom_config_dict_property_empty(self): + """Test custom_config_dict property with empty config.""" + # Arrange + tenant = Tenant(name="Test Workspace") + tenant.custom_config = None + + # Act + result = tenant.custom_config_dict + + # Assert + assert result == {} + + def test_tenant_custom_config_dict_setter(self): + """Test custom_config_dict property setter.""" + # Arrange + tenant = Tenant(name="Test Workspace") + config = {"feature1": True, "feature2": "value"} + + # Act + tenant.custom_config_dict = config + + # Assert + assert tenant.custom_config == '{"feature1": true, "feature2": "value"}' + + @patch("models.account.db") + def test_tenant_get_accounts(self, mock_db): + """Test getting accounts associated with a tenant.""" + # Arrange + tenant = Tenant(name="Test Workspace") + tenant.id = str(uuid4()) + + account1 = Account(name="User 1", email="user1@example.com") + account1.id = str(uuid4()) + account2 = Account(name="User 2", email="user2@example.com") + account2.id = str(uuid4()) + + # Mock the query chain + mock_scalars = MagicMock() + mock_scalars.all.return_value = [account1, account2] + mock_db.session.scalars.return_value = mock_scalars + + # Act + accounts = tenant.get_accounts() + + # Assert + assert len(accounts) == 2 + assert account1 in accounts + assert account2 in accounts + + +class TestTenantStatusEnum: + """Test suite for TenantStatus enum.""" + + def test_tenant_status_enum_values(self): + """Test TenantStatus enum values.""" + # Arrange & Act + from models.account import TenantStatus + + # Assert + assert TenantStatus.NORMAL == "normal" + assert TenantStatus.ARCHIVE == "archive" + + +class TestAccountIntegration: + """Integration tests for Account model with related models.""" + + def test_account_with_multiple_tenants(self): + """Test account associated with multiple tenants.""" + # Arrange + account = Account(name="Multi-Tenant User", email="multi@example.com") + account.id = str(uuid4()) + + tenant1_id = str(uuid4()) + tenant2_id = str(uuid4()) + + join1 = TenantAccountJoin( + tenant_id=tenant1_id, + account_id=account.id, + role=TenantAccountRole.OWNER, + current=True, + ) + + join2 = TenantAccountJoin( + tenant_id=tenant2_id, + account_id=account.id, + role=TenantAccountRole.NORMAL, + current=False, + ) + + # Assert - verify the joins are created correctly + assert join1.account_id == account.id + assert join2.account_id == account.id + assert join1.current is True + assert join2.current is False + + def test_account_last_login_tracking(self): + """Test account last login tracking.""" + # Arrange + account = Account(name="Test User", email="test@example.com") + login_time = datetime.now(UTC) + login_ip = "192.168.1.1" + + # Act + account.last_login_at = login_time + account.last_login_ip = login_ip + + # Assert + assert account.last_login_at == login_time + assert account.last_login_ip == login_ip + + def test_account_initialization_tracking(self): + """Test account initialization tracking.""" + # Arrange + account = Account( + name="Test User", + email="test@example.com", + status=AccountStatus.PENDING, + ) + + # Act - simulate initialization + account.status = AccountStatus.ACTIVE + account.initialized_at = datetime.now(UTC) + + # Assert + assert account.get_status() == AccountStatus.ACTIVE + assert account.initialized_at is not None diff --git a/api/tests/unit_tests/models/test_base.py b/api/tests/unit_tests/models/test_base.py new file mode 100644 index 0000000000..e0dda3c1dd --- /dev/null +++ b/api/tests/unit_tests/models/test_base.py @@ -0,0 +1,11 @@ +from models.base import DefaultFieldsMixin + + +class FooModel(DefaultFieldsMixin): + def __init__(self, id: str): + self.id = id + + +def test_repr(): + foo_model = FooModel(id="test-id") + assert repr(foo_model) == "" diff --git a/api/tests/unit_tests/models/test_dataset_models.py b/api/tests/unit_tests/models/test_dataset_models.py new file mode 100644 index 0000000000..2322c556e2 --- /dev/null +++ b/api/tests/unit_tests/models/test_dataset_models.py @@ -0,0 +1,1341 @@ +""" +Comprehensive unit tests for Dataset models. + +This test suite covers: +- Dataset model validation +- Document model relationships +- Segment model indexing +- Dataset-Document cascade deletes +- Embedding storage validation +""" + +import json +import pickle +from datetime import UTC, datetime +from unittest.mock import MagicMock, patch +from uuid import uuid4 + +from models.dataset import ( + AppDatasetJoin, + ChildChunk, + Dataset, + DatasetKeywordTable, + DatasetProcessRule, + Document, + DocumentSegment, + Embedding, +) + + +class TestDatasetModelValidation: + """Test suite for Dataset model validation and basic operations.""" + + def test_dataset_creation_with_required_fields(self): + """Test creating a dataset with all required fields.""" + # Arrange + tenant_id = str(uuid4()) + created_by = str(uuid4()) + + # Act + dataset = Dataset( + tenant_id=tenant_id, + name="Test Dataset", + data_source_type="upload_file", + created_by=created_by, + ) + + # Assert + assert dataset.name == "Test Dataset" + assert dataset.tenant_id == tenant_id + assert dataset.data_source_type == "upload_file" + assert dataset.created_by == created_by + # Note: Default values are set by database, not by model instantiation + + def test_dataset_creation_with_optional_fields(self): + """Test creating a dataset with optional fields.""" + # Arrange & Act + dataset = Dataset( + tenant_id=str(uuid4()), + name="Test Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + description="Test description", + indexing_technique="high_quality", + embedding_model="text-embedding-ada-002", + embedding_model_provider="openai", + ) + + # Assert + assert dataset.description == "Test description" + assert dataset.indexing_technique == "high_quality" + assert dataset.embedding_model == "text-embedding-ada-002" + assert dataset.embedding_model_provider == "openai" + + def test_dataset_indexing_technique_validation(self): + """Test dataset indexing technique values.""" + # Arrange & Act + dataset_high_quality = Dataset( + tenant_id=str(uuid4()), + name="High Quality Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + indexing_technique="high_quality", + ) + dataset_economy = Dataset( + tenant_id=str(uuid4()), + name="Economy Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + indexing_technique="economy", + ) + + # Assert + assert dataset_high_quality.indexing_technique == "high_quality" + assert dataset_economy.indexing_technique == "economy" + assert "high_quality" in Dataset.INDEXING_TECHNIQUE_LIST + assert "economy" in Dataset.INDEXING_TECHNIQUE_LIST + + def test_dataset_provider_validation(self): + """Test dataset provider values.""" + # Arrange & Act + dataset_vendor = Dataset( + tenant_id=str(uuid4()), + name="Vendor Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + provider="vendor", + ) + dataset_external = Dataset( + tenant_id=str(uuid4()), + name="External Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + provider="external", + ) + + # Assert + assert dataset_vendor.provider == "vendor" + assert dataset_external.provider == "external" + assert "vendor" in Dataset.PROVIDER_LIST + assert "external" in Dataset.PROVIDER_LIST + + def test_dataset_index_struct_dict_property(self): + """Test index_struct_dict property parsing.""" + # Arrange + index_struct_data = {"type": "vector", "dimension": 1536} + dataset = Dataset( + tenant_id=str(uuid4()), + name="Test Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + index_struct=json.dumps(index_struct_data), + ) + + # Act + result = dataset.index_struct_dict + + # Assert + assert result == index_struct_data + assert result["type"] == "vector" + assert result["dimension"] == 1536 + + def test_dataset_index_struct_dict_property_none(self): + """Test index_struct_dict property when index_struct is None.""" + # Arrange + dataset = Dataset( + tenant_id=str(uuid4()), + name="Test Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + ) + + # Act + result = dataset.index_struct_dict + + # Assert + assert result is None + + def test_dataset_external_retrieval_model_property(self): + """Test external_retrieval_model property with default values.""" + # Arrange + dataset = Dataset( + tenant_id=str(uuid4()), + name="Test Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + ) + + # Act + result = dataset.external_retrieval_model + + # Assert + assert result["top_k"] == 2 + assert result["score_threshold"] == 0.0 + + def test_dataset_retrieval_model_dict_property(self): + """Test retrieval_model_dict property with default values.""" + # Arrange + dataset = Dataset( + tenant_id=str(uuid4()), + name="Test Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + ) + + # Act + result = dataset.retrieval_model_dict + + # Assert + assert result["top_k"] == 2 + assert result["reranking_enable"] is False + assert result["score_threshold_enabled"] is False + + def test_dataset_gen_collection_name_by_id(self): + """Test static method for generating collection name.""" + # Arrange + dataset_id = "12345678-1234-1234-1234-123456789abc" + + # Act + collection_name = Dataset.gen_collection_name_by_id(dataset_id) + + # Assert + assert "12345678_1234_1234_1234_123456789abc" in collection_name + assert "-" not in collection_name.split("_")[-1] + + +class TestDocumentModelRelationships: + """Test suite for Document model relationships and properties.""" + + def test_document_creation_with_required_fields(self): + """Test creating a document with all required fields.""" + # Arrange + tenant_id = str(uuid4()) + dataset_id = str(uuid4()) + created_by = str(uuid4()) + + # Act + document = Document( + tenant_id=tenant_id, + dataset_id=dataset_id, + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test_document.pdf", + created_from="web", + created_by=created_by, + ) + + # Assert + assert document.tenant_id == tenant_id + assert document.dataset_id == dataset_id + assert document.position == 1 + assert document.data_source_type == "upload_file" + assert document.batch == "batch_001" + assert document.name == "test_document.pdf" + assert document.created_from == "web" + assert document.created_by == created_by + # Note: Default values are set by database, not by model instantiation + + def test_document_data_source_types(self): + """Test document data source type validation.""" + # Assert + assert "upload_file" in Document.DATA_SOURCES + assert "notion_import" in Document.DATA_SOURCES + assert "website_crawl" in Document.DATA_SOURCES + + def test_document_display_status_queuing(self): + """Test document display_status property for queuing state.""" + # Arrange + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + indexing_status="waiting", + ) + + # Act + status = document.display_status + + # Assert + assert status == "queuing" + + def test_document_display_status_paused(self): + """Test document display_status property for paused state.""" + # Arrange + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + indexing_status="parsing", + is_paused=True, + ) + + # Act + status = document.display_status + + # Assert + assert status == "paused" + + def test_document_display_status_indexing(self): + """Test document display_status property for indexing state.""" + # Arrange + for indexing_status in ["parsing", "cleaning", "splitting", "indexing"]: + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + indexing_status=indexing_status, + ) + + # Act + status = document.display_status + + # Assert + assert status == "indexing" + + def test_document_display_status_error(self): + """Test document display_status property for error state.""" + # Arrange + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + indexing_status="error", + ) + + # Act + status = document.display_status + + # Assert + assert status == "error" + + def test_document_display_status_available(self): + """Test document display_status property for available state.""" + # Arrange + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + indexing_status="completed", + enabled=True, + archived=False, + ) + + # Act + status = document.display_status + + # Assert + assert status == "available" + + def test_document_display_status_disabled(self): + """Test document display_status property for disabled state.""" + # Arrange + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + indexing_status="completed", + enabled=False, + archived=False, + ) + + # Act + status = document.display_status + + # Assert + assert status == "disabled" + + def test_document_display_status_archived(self): + """Test document display_status property for archived state.""" + # Arrange + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + indexing_status="completed", + archived=True, + ) + + # Act + status = document.display_status + + # Assert + assert status == "archived" + + def test_document_data_source_info_dict_property(self): + """Test data_source_info_dict property parsing.""" + # Arrange + data_source_info = {"upload_file_id": str(uuid4()), "file_name": "test.pdf"} + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + data_source_info=json.dumps(data_source_info), + ) + + # Act + result = document.data_source_info_dict + + # Assert + assert result == data_source_info + assert "upload_file_id" in result + assert "file_name" in result + + def test_document_data_source_info_dict_property_empty(self): + """Test data_source_info_dict property when data_source_info is None.""" + # Arrange + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + ) + + # Act + result = document.data_source_info_dict + + # Assert + assert result == {} + + def test_document_average_segment_length(self): + """Test average_segment_length property calculation.""" + # Arrange + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + word_count=1000, + ) + + # Mock segment_count property + with patch.object(Document, "segment_count", new_callable=lambda: property(lambda self: 10)): + # Act + result = document.average_segment_length + + # Assert + assert result == 100 + + def test_document_average_segment_length_zero(self): + """Test average_segment_length property when word_count is zero.""" + # Arrange + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + word_count=0, + ) + + # Act + result = document.average_segment_length + + # Assert + assert result == 0 + + +class TestDocumentSegmentIndexing: + """Test suite for DocumentSegment model indexing and operations.""" + + def test_document_segment_creation_with_required_fields(self): + """Test creating a document segment with all required fields.""" + # Arrange + tenant_id = str(uuid4()) + dataset_id = str(uuid4()) + document_id = str(uuid4()) + created_by = str(uuid4()) + + # Act + segment = DocumentSegment( + tenant_id=tenant_id, + dataset_id=dataset_id, + document_id=document_id, + position=1, + content="This is a test segment content.", + word_count=6, + tokens=10, + created_by=created_by, + ) + + # Assert + assert segment.tenant_id == tenant_id + assert segment.dataset_id == dataset_id + assert segment.document_id == document_id + assert segment.position == 1 + assert segment.content == "This is a test segment content." + assert segment.word_count == 6 + assert segment.tokens == 10 + assert segment.created_by == created_by + # Note: Default values are set by database, not by model instantiation + + def test_document_segment_with_indexing_fields(self): + """Test creating a document segment with indexing fields.""" + # Arrange + index_node_id = str(uuid4()) + index_node_hash = "abc123hash" + keywords = ["test", "segment", "indexing"] + + # Act + segment = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=str(uuid4()), + position=1, + content="Test content", + word_count=2, + tokens=5, + created_by=str(uuid4()), + index_node_id=index_node_id, + index_node_hash=index_node_hash, + keywords=keywords, + ) + + # Assert + assert segment.index_node_id == index_node_id + assert segment.index_node_hash == index_node_hash + assert segment.keywords == keywords + + def test_document_segment_with_answer_field(self): + """Test creating a document segment with answer field for QA model.""" + # Arrange + content = "What is AI?" + answer = "AI stands for Artificial Intelligence." + + # Act + segment = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=str(uuid4()), + position=1, + content=content, + answer=answer, + word_count=3, + tokens=8, + created_by=str(uuid4()), + ) + + # Assert + assert segment.content == content + assert segment.answer == answer + + def test_document_segment_status_transitions(self): + """Test document segment status field values.""" + # Arrange & Act + segment_waiting = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=str(uuid4()), + position=1, + content="Test", + word_count=1, + tokens=2, + created_by=str(uuid4()), + status="waiting", + ) + segment_completed = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=str(uuid4()), + position=1, + content="Test", + word_count=1, + tokens=2, + created_by=str(uuid4()), + status="completed", + ) + + # Assert + assert segment_waiting.status == "waiting" + assert segment_completed.status == "completed" + + def test_document_segment_enabled_disabled_tracking(self): + """Test document segment enabled/disabled state tracking.""" + # Arrange + disabled_by = str(uuid4()) + disabled_at = datetime.now(UTC) + + # Act + segment = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=str(uuid4()), + position=1, + content="Test", + word_count=1, + tokens=2, + created_by=str(uuid4()), + enabled=False, + disabled_by=disabled_by, + disabled_at=disabled_at, + ) + + # Assert + assert segment.enabled is False + assert segment.disabled_by == disabled_by + assert segment.disabled_at == disabled_at + + def test_document_segment_hit_count_tracking(self): + """Test document segment hit count tracking.""" + # Arrange & Act + segment = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=str(uuid4()), + position=1, + content="Test", + word_count=1, + tokens=2, + created_by=str(uuid4()), + hit_count=5, + ) + + # Assert + assert segment.hit_count == 5 + + def test_document_segment_error_tracking(self): + """Test document segment error tracking.""" + # Arrange + error_message = "Indexing failed due to timeout" + stopped_at = datetime.now(UTC) + + # Act + segment = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=str(uuid4()), + position=1, + content="Test", + word_count=1, + tokens=2, + created_by=str(uuid4()), + error=error_message, + stopped_at=stopped_at, + ) + + # Assert + assert segment.error == error_message + assert segment.stopped_at == stopped_at + + +class TestEmbeddingStorage: + """Test suite for Embedding model storage and retrieval.""" + + def test_embedding_creation_with_required_fields(self): + """Test creating an embedding with required fields.""" + # Arrange + model_name = "text-embedding-ada-002" + hash_value = "abc123hash" + provider_name = "openai" + + # Act + embedding = Embedding( + model_name=model_name, + hash=hash_value, + provider_name=provider_name, + embedding=b"binary_data", + ) + + # Assert + assert embedding.model_name == model_name + assert embedding.hash == hash_value + assert embedding.provider_name == provider_name + assert embedding.embedding == b"binary_data" + + def test_embedding_set_and_get_embedding(self): + """Test setting and getting embedding data.""" + # Arrange + embedding_data = [0.1, 0.2, 0.3, 0.4, 0.5] + embedding = Embedding( + model_name="text-embedding-ada-002", + hash="test_hash", + provider_name="openai", + embedding=b"", + ) + + # Act + embedding.set_embedding(embedding_data) + retrieved_data = embedding.get_embedding() + + # Assert + assert retrieved_data == embedding_data + assert len(retrieved_data) == 5 + assert retrieved_data[0] == 0.1 + assert retrieved_data[4] == 0.5 + + def test_embedding_pickle_serialization(self): + """Test embedding data is properly pickled.""" + # Arrange + embedding_data = [0.1, 0.2, 0.3] + embedding = Embedding( + model_name="text-embedding-ada-002", + hash="test_hash", + provider_name="openai", + embedding=b"", + ) + + # Act + embedding.set_embedding(embedding_data) + + # Assert + # Verify the embedding is stored as pickled binary data + assert isinstance(embedding.embedding, bytes) + # Verify we can unpickle it + unpickled_data = pickle.loads(embedding.embedding) # noqa: S301 + assert unpickled_data == embedding_data + + def test_embedding_with_large_vector(self): + """Test embedding with large dimension vector.""" + # Arrange + # Simulate a 1536-dimension vector (OpenAI ada-002 size) + large_embedding_data = [0.001 * i for i in range(1536)] + embedding = Embedding( + model_name="text-embedding-ada-002", + hash="large_vector_hash", + provider_name="openai", + embedding=b"", + ) + + # Act + embedding.set_embedding(large_embedding_data) + retrieved_data = embedding.get_embedding() + + # Assert + assert len(retrieved_data) == 1536 + assert retrieved_data[0] == 0.0 + assert abs(retrieved_data[1535] - 1.535) < 0.0001 # Float comparison with tolerance + + +class TestDatasetProcessRule: + """Test suite for DatasetProcessRule model.""" + + def test_dataset_process_rule_creation(self): + """Test creating a dataset process rule.""" + # Arrange + dataset_id = str(uuid4()) + created_by = str(uuid4()) + + # Act + process_rule = DatasetProcessRule( + dataset_id=dataset_id, + mode="automatic", + created_by=created_by, + ) + + # Assert + assert process_rule.dataset_id == dataset_id + assert process_rule.mode == "automatic" + assert process_rule.created_by == created_by + + def test_dataset_process_rule_modes(self): + """Test dataset process rule mode validation.""" + # Assert + assert "automatic" in DatasetProcessRule.MODES + assert "custom" in DatasetProcessRule.MODES + assert "hierarchical" in DatasetProcessRule.MODES + + def test_dataset_process_rule_with_rules_dict(self): + """Test dataset process rule with rules dictionary.""" + # Arrange + rules_data = { + "pre_processing_rules": [ + {"id": "remove_extra_spaces", "enabled": True}, + {"id": "remove_urls_emails", "enabled": False}, + ], + "segmentation": {"delimiter": "\n", "max_tokens": 500, "chunk_overlap": 50}, + } + process_rule = DatasetProcessRule( + dataset_id=str(uuid4()), + mode="custom", + created_by=str(uuid4()), + rules=json.dumps(rules_data), + ) + + # Act + result = process_rule.rules_dict + + # Assert + assert result == rules_data + assert "pre_processing_rules" in result + assert "segmentation" in result + + def test_dataset_process_rule_to_dict(self): + """Test dataset process rule to_dict method.""" + # Arrange + dataset_id = str(uuid4()) + rules_data = {"test": "data"} + process_rule = DatasetProcessRule( + dataset_id=dataset_id, + mode="automatic", + created_by=str(uuid4()), + rules=json.dumps(rules_data), + ) + + # Act + result = process_rule.to_dict() + + # Assert + assert result["dataset_id"] == dataset_id + assert result["mode"] == "automatic" + assert result["rules"] == rules_data + + def test_dataset_process_rule_automatic_rules(self): + """Test dataset process rule automatic rules constant.""" + # Act + automatic_rules = DatasetProcessRule.AUTOMATIC_RULES + + # Assert + assert "pre_processing_rules" in automatic_rules + assert "segmentation" in automatic_rules + assert automatic_rules["segmentation"]["max_tokens"] == 500 + + +class TestDatasetKeywordTable: + """Test suite for DatasetKeywordTable model.""" + + def test_dataset_keyword_table_creation(self): + """Test creating a dataset keyword table.""" + # Arrange + dataset_id = str(uuid4()) + keyword_data = {"test": ["node1", "node2"], "keyword": ["node3"]} + + # Act + keyword_table = DatasetKeywordTable( + dataset_id=dataset_id, + keyword_table=json.dumps(keyword_data), + ) + + # Assert + assert keyword_table.dataset_id == dataset_id + assert keyword_table.data_source_type == "database" # Default value + + def test_dataset_keyword_table_data_source_type(self): + """Test dataset keyword table data source type.""" + # Arrange & Act + keyword_table = DatasetKeywordTable( + dataset_id=str(uuid4()), + keyword_table="{}", + data_source_type="file", + ) + + # Assert + assert keyword_table.data_source_type == "file" + + +class TestAppDatasetJoin: + """Test suite for AppDatasetJoin model.""" + + def test_app_dataset_join_creation(self): + """Test creating an app-dataset join relationship.""" + # Arrange + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + # Act + join = AppDatasetJoin( + app_id=app_id, + dataset_id=dataset_id, + ) + + # Assert + assert join.app_id == app_id + assert join.dataset_id == dataset_id + # Note: ID is auto-generated when saved to database + + +class TestChildChunk: + """Test suite for ChildChunk model.""" + + def test_child_chunk_creation(self): + """Test creating a child chunk.""" + # Arrange + tenant_id = str(uuid4()) + dataset_id = str(uuid4()) + document_id = str(uuid4()) + segment_id = str(uuid4()) + created_by = str(uuid4()) + + # Act + child_chunk = ChildChunk( + tenant_id=tenant_id, + dataset_id=dataset_id, + document_id=document_id, + segment_id=segment_id, + position=1, + content="Child chunk content", + word_count=3, + created_by=created_by, + ) + + # Assert + assert child_chunk.tenant_id == tenant_id + assert child_chunk.dataset_id == dataset_id + assert child_chunk.document_id == document_id + assert child_chunk.segment_id == segment_id + assert child_chunk.position == 1 + assert child_chunk.content == "Child chunk content" + assert child_chunk.word_count == 3 + assert child_chunk.created_by == created_by + # Note: Default values are set by database, not by model instantiation + + def test_child_chunk_with_indexing_fields(self): + """Test creating a child chunk with indexing fields.""" + # Arrange + index_node_id = str(uuid4()) + index_node_hash = "child_hash_123" + + # Act + child_chunk = ChildChunk( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=str(uuid4()), + segment_id=str(uuid4()), + position=1, + content="Test content", + word_count=2, + created_by=str(uuid4()), + index_node_id=index_node_id, + index_node_hash=index_node_hash, + ) + + # Assert + assert child_chunk.index_node_id == index_node_id + assert child_chunk.index_node_hash == index_node_hash + + +class TestDatasetDocumentCascadeDeletes: + """Test suite for Dataset-Document cascade delete operations.""" + + def test_dataset_with_documents_relationship(self): + """Test dataset can track its documents.""" + # Arrange + dataset_id = str(uuid4()) + dataset = Dataset( + tenant_id=str(uuid4()), + name="Test Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + ) + dataset.id = dataset_id + + # Mock the database session query + mock_query = MagicMock() + mock_query.where.return_value.scalar.return_value = 3 + + with patch("models.dataset.db.session.query", return_value=mock_query): + # Act + total_docs = dataset.total_documents + + # Assert + assert total_docs == 3 + + def test_dataset_available_documents_count(self): + """Test dataset can count available documents.""" + # Arrange + dataset_id = str(uuid4()) + dataset = Dataset( + tenant_id=str(uuid4()), + name="Test Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + ) + dataset.id = dataset_id + + # Mock the database session query + mock_query = MagicMock() + mock_query.where.return_value.scalar.return_value = 2 + + with patch("models.dataset.db.session.query", return_value=mock_query): + # Act + available_docs = dataset.total_available_documents + + # Assert + assert available_docs == 2 + + def test_dataset_word_count_aggregation(self): + """Test dataset can aggregate word count from documents.""" + # Arrange + dataset_id = str(uuid4()) + dataset = Dataset( + tenant_id=str(uuid4()), + name="Test Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + ) + dataset.id = dataset_id + + # Mock the database session query + mock_query = MagicMock() + mock_query.with_entities.return_value.where.return_value.scalar.return_value = 5000 + + with patch("models.dataset.db.session.query", return_value=mock_query): + # Act + total_words = dataset.word_count + + # Assert + assert total_words == 5000 + + def test_dataset_available_segment_count(self): + """Test dataset can count available segments.""" + # Arrange + dataset_id = str(uuid4()) + dataset = Dataset( + tenant_id=str(uuid4()), + name="Test Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + ) + dataset.id = dataset_id + + # Mock the database session query + mock_query = MagicMock() + mock_query.where.return_value.scalar.return_value = 15 + + with patch("models.dataset.db.session.query", return_value=mock_query): + # Act + segment_count = dataset.available_segment_count + + # Assert + assert segment_count == 15 + + def test_document_segment_count_property(self): + """Test document can count its segments.""" + # Arrange + document_id = str(uuid4()) + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + ) + document.id = document_id + + # Mock the database session query + mock_query = MagicMock() + mock_query.where.return_value.count.return_value = 10 + + with patch("models.dataset.db.session.query", return_value=mock_query): + # Act + segment_count = document.segment_count + + # Assert + assert segment_count == 10 + + def test_document_hit_count_aggregation(self): + """Test document can aggregate hit count from segments.""" + # Arrange + document_id = str(uuid4()) + document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + ) + document.id = document_id + + # Mock the database session query + mock_query = MagicMock() + mock_query.with_entities.return_value.where.return_value.scalar.return_value = 25 + + with patch("models.dataset.db.session.query", return_value=mock_query): + # Act + hit_count = document.hit_count + + # Assert + assert hit_count == 25 + + +class TestDocumentSegmentNavigation: + """Test suite for DocumentSegment navigation properties.""" + + def test_document_segment_dataset_property(self): + """Test segment can access its parent dataset.""" + # Arrange + dataset_id = str(uuid4()) + segment = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=dataset_id, + document_id=str(uuid4()), + position=1, + content="Test", + word_count=1, + tokens=2, + created_by=str(uuid4()), + ) + + mock_dataset = Dataset( + tenant_id=str(uuid4()), + name="Test Dataset", + data_source_type="upload_file", + created_by=str(uuid4()), + ) + mock_dataset.id = dataset_id + + # Mock the database session scalar + with patch("models.dataset.db.session.scalar", return_value=mock_dataset): + # Act + dataset = segment.dataset + + # Assert + assert dataset is not None + assert dataset.id == dataset_id + + def test_document_segment_document_property(self): + """Test segment can access its parent document.""" + # Arrange + document_id = str(uuid4()) + segment = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=document_id, + position=1, + content="Test", + word_count=1, + tokens=2, + created_by=str(uuid4()), + ) + + mock_document = Document( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=str(uuid4()), + ) + mock_document.id = document_id + + # Mock the database session scalar + with patch("models.dataset.db.session.scalar", return_value=mock_document): + # Act + document = segment.document + + # Assert + assert document is not None + assert document.id == document_id + + def test_document_segment_previous_segment(self): + """Test segment can access previous segment.""" + # Arrange + document_id = str(uuid4()) + segment = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=document_id, + position=2, + content="Test", + word_count=1, + tokens=2, + created_by=str(uuid4()), + ) + + previous_segment = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=document_id, + position=1, + content="Previous", + word_count=1, + tokens=2, + created_by=str(uuid4()), + ) + + # Mock the database session scalar + with patch("models.dataset.db.session.scalar", return_value=previous_segment): + # Act + prev_seg = segment.previous_segment + + # Assert + assert prev_seg is not None + assert prev_seg.position == 1 + + def test_document_segment_next_segment(self): + """Test segment can access next segment.""" + # Arrange + document_id = str(uuid4()) + segment = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=document_id, + position=1, + content="Test", + word_count=1, + tokens=2, + created_by=str(uuid4()), + ) + + next_segment = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=document_id, + position=2, + content="Next", + word_count=1, + tokens=2, + created_by=str(uuid4()), + ) + + # Mock the database session scalar + with patch("models.dataset.db.session.scalar", return_value=next_segment): + # Act + next_seg = segment.next_segment + + # Assert + assert next_seg is not None + assert next_seg.position == 2 + + +class TestModelIntegration: + """Test suite for model integration scenarios.""" + + def test_complete_dataset_document_segment_hierarchy(self): + """Test complete hierarchy from dataset to segment.""" + # Arrange + tenant_id = str(uuid4()) + dataset_id = str(uuid4()) + document_id = str(uuid4()) + created_by = str(uuid4()) + + # Create dataset + dataset = Dataset( + tenant_id=tenant_id, + name="Test Dataset", + data_source_type="upload_file", + created_by=created_by, + indexing_technique="high_quality", + ) + dataset.id = dataset_id + + # Create document + document = Document( + tenant_id=tenant_id, + dataset_id=dataset_id, + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=created_by, + word_count=100, + ) + document.id = document_id + + # Create segment + segment = DocumentSegment( + tenant_id=tenant_id, + dataset_id=dataset_id, + document_id=document_id, + position=1, + content="Test segment content", + word_count=3, + tokens=5, + created_by=created_by, + status="completed", + ) + + # Assert + assert dataset.id == dataset_id + assert document.dataset_id == dataset_id + assert segment.dataset_id == dataset_id + assert segment.document_id == document_id + assert dataset.indexing_technique == "high_quality" + assert document.word_count == 100 + assert segment.status == "completed" + + def test_document_to_dict_serialization(self): + """Test document to_dict method for serialization.""" + # Arrange + tenant_id = str(uuid4()) + dataset_id = str(uuid4()) + created_by = str(uuid4()) + + document = Document( + tenant_id=tenant_id, + dataset_id=dataset_id, + position=1, + data_source_type="upload_file", + batch="batch_001", + name="test.pdf", + created_from="web", + created_by=created_by, + word_count=100, + indexing_status="completed", + ) + + # Mock segment_count and hit_count + with ( + patch.object(Document, "segment_count", new_callable=lambda: property(lambda self: 5)), + patch.object(Document, "hit_count", new_callable=lambda: property(lambda self: 10)), + ): + # Act + result = document.to_dict() + + # Assert + assert result["tenant_id"] == tenant_id + assert result["dataset_id"] == dataset_id + assert result["name"] == "test.pdf" + assert result["word_count"] == 100 + assert result["indexing_status"] == "completed" + assert result["segment_count"] == 5 + assert result["hit_count"] == 10 diff --git a/api/tests/unit_tests/repositories/test_sqlalchemy_api_workflow_run_repository.py b/api/tests/unit_tests/repositories/test_sqlalchemy_api_workflow_run_repository.py new file mode 100644 index 0000000000..73b35b8e63 --- /dev/null +++ b/api/tests/unit_tests/repositories/test_sqlalchemy_api_workflow_run_repository.py @@ -0,0 +1,370 @@ +"""Unit tests for DifyAPISQLAlchemyWorkflowRunRepository implementation.""" + +from datetime import UTC, datetime +from unittest.mock import Mock, patch + +import pytest +from sqlalchemy.orm import Session, sessionmaker + +from core.workflow.entities.workflow_pause import WorkflowPauseEntity +from core.workflow.enums import WorkflowExecutionStatus +from models.workflow import WorkflowPause as WorkflowPauseModel +from models.workflow import WorkflowRun +from repositories.sqlalchemy_api_workflow_run_repository import ( + DifyAPISQLAlchemyWorkflowRunRepository, + _PrivateWorkflowPauseEntity, + _WorkflowRunError, +) + + +class TestDifyAPISQLAlchemyWorkflowRunRepository: + """Test DifyAPISQLAlchemyWorkflowRunRepository implementation.""" + + @pytest.fixture + def mock_session(self): + """Create a mock session.""" + return Mock(spec=Session) + + @pytest.fixture + def mock_session_maker(self, mock_session): + """Create a mock sessionmaker.""" + session_maker = Mock(spec=sessionmaker) + + # Create a context manager mock + context_manager = Mock() + context_manager.__enter__ = Mock(return_value=mock_session) + context_manager.__exit__ = Mock(return_value=None) + session_maker.return_value = context_manager + + # Mock session.begin() context manager + begin_context_manager = Mock() + begin_context_manager.__enter__ = Mock(return_value=None) + begin_context_manager.__exit__ = Mock(return_value=None) + mock_session.begin = Mock(return_value=begin_context_manager) + + # Add missing session methods + mock_session.commit = Mock() + mock_session.rollback = Mock() + mock_session.add = Mock() + mock_session.delete = Mock() + mock_session.get = Mock() + mock_session.scalar = Mock() + mock_session.scalars = Mock() + + # Also support expire_on_commit parameter + def make_session(expire_on_commit=None): + cm = Mock() + cm.__enter__ = Mock(return_value=mock_session) + cm.__exit__ = Mock(return_value=None) + return cm + + session_maker.side_effect = make_session + return session_maker + + @pytest.fixture + def repository(self, mock_session_maker): + """Create repository instance with mocked dependencies.""" + + # Create a testable subclass that implements the save method + class TestableDifyAPISQLAlchemyWorkflowRunRepository(DifyAPISQLAlchemyWorkflowRunRepository): + def __init__(self, session_maker): + # Initialize without calling parent __init__ to avoid any instantiation issues + self._session_maker = session_maker + + def save(self, execution): + """Mock implementation of save method.""" + return None + + # Create repository instance + repo = TestableDifyAPISQLAlchemyWorkflowRunRepository(mock_session_maker) + + return repo + + @pytest.fixture + def sample_workflow_run(self): + """Create a sample WorkflowRun model.""" + workflow_run = Mock(spec=WorkflowRun) + workflow_run.id = "workflow-run-123" + workflow_run.tenant_id = "tenant-123" + workflow_run.app_id = "app-123" + workflow_run.workflow_id = "workflow-123" + workflow_run.status = WorkflowExecutionStatus.RUNNING + return workflow_run + + @pytest.fixture + def sample_workflow_pause(self): + """Create a sample WorkflowPauseModel.""" + pause = Mock(spec=WorkflowPauseModel) + pause.id = "pause-123" + pause.workflow_id = "workflow-123" + pause.workflow_run_id = "workflow-run-123" + pause.state_object_key = "workflow-state-123.json" + pause.resumed_at = None + pause.created_at = datetime.now(UTC) + return pause + + +class TestCreateWorkflowPause(TestDifyAPISQLAlchemyWorkflowRunRepository): + """Test create_workflow_pause method.""" + + def test_create_workflow_pause_success( + self, + repository: DifyAPISQLAlchemyWorkflowRunRepository, + mock_session: Mock, + sample_workflow_run: Mock, + ): + """Test successful workflow pause creation.""" + # Arrange + workflow_run_id = "workflow-run-123" + state_owner_user_id = "user-123" + state = '{"test": "state"}' + + mock_session.get.return_value = sample_workflow_run + + with patch("repositories.sqlalchemy_api_workflow_run_repository.uuidv7") as mock_uuidv7: + mock_uuidv7.side_effect = ["pause-123"] + with patch("repositories.sqlalchemy_api_workflow_run_repository.storage") as mock_storage: + # Act + result = repository.create_workflow_pause( + workflow_run_id=workflow_run_id, + state_owner_user_id=state_owner_user_id, + state=state, + ) + + # Assert + assert isinstance(result, _PrivateWorkflowPauseEntity) + assert result.id == "pause-123" + assert result.workflow_execution_id == workflow_run_id + + # Verify database interactions + mock_session.get.assert_called_once_with(WorkflowRun, workflow_run_id) + mock_storage.save.assert_called_once() + mock_session.add.assert_called() + # When using session.begin() context manager, commit is handled automatically + # No explicit commit call is expected + + def test_create_workflow_pause_not_found( + self, repository: DifyAPISQLAlchemyWorkflowRunRepository, mock_session: Mock + ): + """Test workflow pause creation when workflow run not found.""" + # Arrange + mock_session.get.return_value = None + + # Act & Assert + with pytest.raises(ValueError, match="WorkflowRun not found: workflow-run-123"): + repository.create_workflow_pause( + workflow_run_id="workflow-run-123", + state_owner_user_id="user-123", + state='{"test": "state"}', + ) + + mock_session.get.assert_called_once_with(WorkflowRun, "workflow-run-123") + + def test_create_workflow_pause_invalid_status( + self, repository: DifyAPISQLAlchemyWorkflowRunRepository, mock_session: Mock, sample_workflow_run: Mock + ): + """Test workflow pause creation when workflow not in RUNNING status.""" + # Arrange + sample_workflow_run.status = WorkflowExecutionStatus.PAUSED + mock_session.get.return_value = sample_workflow_run + + # Act & Assert + with pytest.raises(_WorkflowRunError, match="Only WorkflowRun with RUNNING status can be paused"): + repository.create_workflow_pause( + workflow_run_id="workflow-run-123", + state_owner_user_id="user-123", + state='{"test": "state"}', + ) + + +class TestResumeWorkflowPause(TestDifyAPISQLAlchemyWorkflowRunRepository): + """Test resume_workflow_pause method.""" + + def test_resume_workflow_pause_success( + self, + repository: DifyAPISQLAlchemyWorkflowRunRepository, + mock_session: Mock, + sample_workflow_run: Mock, + sample_workflow_pause: Mock, + ): + """Test successful workflow pause resume.""" + # Arrange + workflow_run_id = "workflow-run-123" + pause_entity = Mock(spec=WorkflowPauseEntity) + pause_entity.id = "pause-123" + + # Setup workflow run and pause + sample_workflow_run.status = WorkflowExecutionStatus.PAUSED + sample_workflow_run.pause = sample_workflow_pause + sample_workflow_pause.resumed_at = None + + mock_session.scalar.return_value = sample_workflow_run + + with patch("repositories.sqlalchemy_api_workflow_run_repository.naive_utc_now") as mock_now: + mock_now.return_value = datetime.now(UTC) + + # Act + result = repository.resume_workflow_pause( + workflow_run_id=workflow_run_id, + pause_entity=pause_entity, + ) + + # Assert + assert isinstance(result, _PrivateWorkflowPauseEntity) + assert result.id == "pause-123" + + # Verify state transitions + assert sample_workflow_pause.resumed_at is not None + assert sample_workflow_run.status == WorkflowExecutionStatus.RUNNING + + # Verify database interactions + mock_session.add.assert_called() + # When using session.begin() context manager, commit is handled automatically + # No explicit commit call is expected + + def test_resume_workflow_pause_not_paused( + self, + repository: DifyAPISQLAlchemyWorkflowRunRepository, + mock_session: Mock, + sample_workflow_run: Mock, + ): + """Test resume when workflow is not paused.""" + # Arrange + workflow_run_id = "workflow-run-123" + pause_entity = Mock(spec=WorkflowPauseEntity) + pause_entity.id = "pause-123" + + sample_workflow_run.status = WorkflowExecutionStatus.RUNNING + mock_session.scalar.return_value = sample_workflow_run + + # Act & Assert + with pytest.raises(_WorkflowRunError, match="WorkflowRun is not in PAUSED status"): + repository.resume_workflow_pause( + workflow_run_id=workflow_run_id, + pause_entity=pause_entity, + ) + + def test_resume_workflow_pause_id_mismatch( + self, + repository: DifyAPISQLAlchemyWorkflowRunRepository, + mock_session: Mock, + sample_workflow_run: Mock, + sample_workflow_pause: Mock, + ): + """Test resume when pause ID doesn't match.""" + # Arrange + workflow_run_id = "workflow-run-123" + pause_entity = Mock(spec=WorkflowPauseEntity) + pause_entity.id = "pause-456" # Different ID + + sample_workflow_run.status = WorkflowExecutionStatus.PAUSED + sample_workflow_pause.id = "pause-123" + sample_workflow_run.pause = sample_workflow_pause + mock_session.scalar.return_value = sample_workflow_run + + # Act & Assert + with pytest.raises(_WorkflowRunError, match="different id in WorkflowPause and WorkflowPauseEntity"): + repository.resume_workflow_pause( + workflow_run_id=workflow_run_id, + pause_entity=pause_entity, + ) + + +class TestDeleteWorkflowPause(TestDifyAPISQLAlchemyWorkflowRunRepository): + """Test delete_workflow_pause method.""" + + def test_delete_workflow_pause_success( + self, + repository: DifyAPISQLAlchemyWorkflowRunRepository, + mock_session: Mock, + sample_workflow_pause: Mock, + ): + """Test successful workflow pause deletion.""" + # Arrange + pause_entity = Mock(spec=WorkflowPauseEntity) + pause_entity.id = "pause-123" + + mock_session.get.return_value = sample_workflow_pause + + with patch("repositories.sqlalchemy_api_workflow_run_repository.storage") as mock_storage: + # Act + repository.delete_workflow_pause(pause_entity=pause_entity) + + # Assert + mock_storage.delete.assert_called_once_with(sample_workflow_pause.state_object_key) + mock_session.delete.assert_called_once_with(sample_workflow_pause) + # When using session.begin() context manager, commit is handled automatically + # No explicit commit call is expected + + def test_delete_workflow_pause_not_found( + self, + repository: DifyAPISQLAlchemyWorkflowRunRepository, + mock_session: Mock, + ): + """Test delete when pause not found.""" + # Arrange + pause_entity = Mock(spec=WorkflowPauseEntity) + pause_entity.id = "pause-123" + + mock_session.get.return_value = None + + # Act & Assert + with pytest.raises(_WorkflowRunError, match="WorkflowPause not found: pause-123"): + repository.delete_workflow_pause(pause_entity=pause_entity) + + +class TestPrivateWorkflowPauseEntity(TestDifyAPISQLAlchemyWorkflowRunRepository): + """Test _PrivateWorkflowPauseEntity class.""" + + def test_from_models(self, sample_workflow_pause: Mock): + """Test creating _PrivateWorkflowPauseEntity from models.""" + # Act + entity = _PrivateWorkflowPauseEntity.from_models(sample_workflow_pause) + + # Assert + assert isinstance(entity, _PrivateWorkflowPauseEntity) + assert entity._pause_model == sample_workflow_pause + + def test_properties(self, sample_workflow_pause: Mock): + """Test entity properties.""" + # Arrange + entity = _PrivateWorkflowPauseEntity.from_models(sample_workflow_pause) + + # Act & Assert + assert entity.id == sample_workflow_pause.id + assert entity.workflow_execution_id == sample_workflow_pause.workflow_run_id + assert entity.resumed_at == sample_workflow_pause.resumed_at + + def test_get_state(self, sample_workflow_pause: Mock): + """Test getting state from storage.""" + # Arrange + entity = _PrivateWorkflowPauseEntity.from_models(sample_workflow_pause) + expected_state = b'{"test": "state"}' + + with patch("repositories.sqlalchemy_api_workflow_run_repository.storage") as mock_storage: + mock_storage.load.return_value = expected_state + + # Act + result = entity.get_state() + + # Assert + assert result == expected_state + mock_storage.load.assert_called_once_with(sample_workflow_pause.state_object_key) + + def test_get_state_caching(self, sample_workflow_pause: Mock): + """Test state caching in get_state method.""" + # Arrange + entity = _PrivateWorkflowPauseEntity.from_models(sample_workflow_pause) + expected_state = b'{"test": "state"}' + + with patch("repositories.sqlalchemy_api_workflow_run_repository.storage") as mock_storage: + mock_storage.load.return_value = expected_state + + # Act + result1 = entity.get_state() + result2 = entity.get_state() # Should use cache + + # Assert + assert result1 == expected_state + assert result2 == expected_state + mock_storage.load.assert_called_once() # Only called once due to caching diff --git a/api/tests/unit_tests/services/test_billing_service.py b/api/tests/unit_tests/services/test_billing_service.py new file mode 100644 index 0000000000..dc13143417 --- /dev/null +++ b/api/tests/unit_tests/services/test_billing_service.py @@ -0,0 +1,236 @@ +import json +from unittest.mock import MagicMock, patch + +import httpx +import pytest +from werkzeug.exceptions import InternalServerError + +from services.billing_service import BillingService + + +class TestBillingServiceSendRequest: + """Unit tests for BillingService._send_request method.""" + + @pytest.fixture + def mock_httpx_request(self): + """Mock httpx.request for testing.""" + with patch("services.billing_service.httpx.request") as mock_request: + yield mock_request + + @pytest.fixture + def mock_billing_config(self): + """Mock BillingService configuration.""" + with ( + patch.object(BillingService, "base_url", "https://billing-api.example.com"), + patch.object(BillingService, "secret_key", "test-secret-key"), + ): + yield + + def test_get_request_success(self, mock_httpx_request, mock_billing_config): + """Test successful GET request.""" + # Arrange + expected_response = {"result": "success", "data": {"info": "test"}} + mock_response = MagicMock() + mock_response.status_code = httpx.codes.OK + mock_response.json.return_value = expected_response + mock_httpx_request.return_value = mock_response + + # Act + result = BillingService._send_request("GET", "/test", params={"key": "value"}) + + # Assert + assert result == expected_response + mock_httpx_request.assert_called_once() + call_args = mock_httpx_request.call_args + assert call_args[0][0] == "GET" + assert call_args[0][1] == "https://billing-api.example.com/test" + assert call_args[1]["params"] == {"key": "value"} + assert call_args[1]["headers"]["Billing-Api-Secret-Key"] == "test-secret-key" + assert call_args[1]["headers"]["Content-Type"] == "application/json" + + @pytest.mark.parametrize( + "status_code", [httpx.codes.NOT_FOUND, httpx.codes.INTERNAL_SERVER_ERROR, httpx.codes.BAD_REQUEST] + ) + def test_get_request_non_200_status_code(self, mock_httpx_request, mock_billing_config, status_code): + """Test GET request with non-200 status code raises ValueError.""" + # Arrange + mock_response = MagicMock() + mock_response.status_code = status_code + mock_httpx_request.return_value = mock_response + + # Act & Assert + with pytest.raises(ValueError) as exc_info: + BillingService._send_request("GET", "/test") + assert "Unable to retrieve billing information" in str(exc_info.value) + + def test_put_request_success(self, mock_httpx_request, mock_billing_config): + """Test successful PUT request.""" + # Arrange + expected_response = {"result": "success"} + mock_response = MagicMock() + mock_response.status_code = httpx.codes.OK + mock_response.json.return_value = expected_response + mock_httpx_request.return_value = mock_response + + # Act + result = BillingService._send_request("PUT", "/test", json={"key": "value"}) + + # Assert + assert result == expected_response + call_args = mock_httpx_request.call_args + assert call_args[0][0] == "PUT" + + def test_put_request_internal_server_error(self, mock_httpx_request, mock_billing_config): + """Test PUT request with INTERNAL_SERVER_ERROR raises InternalServerError.""" + # Arrange + mock_response = MagicMock() + mock_response.status_code = httpx.codes.INTERNAL_SERVER_ERROR + mock_httpx_request.return_value = mock_response + + # Act & Assert + with pytest.raises(InternalServerError) as exc_info: + BillingService._send_request("PUT", "/test", json={"key": "value"}) + assert exc_info.value.code == 500 + assert "Unable to process billing request" in str(exc_info.value.description) + + @pytest.mark.parametrize( + "status_code", [httpx.codes.BAD_REQUEST, httpx.codes.NOT_FOUND, httpx.codes.UNAUTHORIZED, httpx.codes.FORBIDDEN] + ) + def test_put_request_non_200_non_500(self, mock_httpx_request, mock_billing_config, status_code): + """Test PUT request with non-200 and non-500 status code raises ValueError.""" + # Arrange + mock_response = MagicMock() + mock_response.status_code = status_code + mock_httpx_request.return_value = mock_response + + # Act & Assert + with pytest.raises(ValueError) as exc_info: + BillingService._send_request("PUT", "/test", json={"key": "value"}) + assert "Invalid arguments." in str(exc_info.value) + + @pytest.mark.parametrize("method", ["POST", "DELETE"]) + def test_non_get_non_put_request_success(self, mock_httpx_request, mock_billing_config, method): + """Test successful POST/DELETE request.""" + # Arrange + expected_response = {"result": "success"} + mock_response = MagicMock() + mock_response.status_code = httpx.codes.OK + mock_response.json.return_value = expected_response + mock_httpx_request.return_value = mock_response + + # Act + result = BillingService._send_request(method, "/test", json={"key": "value"}) + + # Assert + assert result == expected_response + call_args = mock_httpx_request.call_args + assert call_args[0][0] == method + + @pytest.mark.parametrize( + "status_code", [httpx.codes.BAD_REQUEST, httpx.codes.INTERNAL_SERVER_ERROR, httpx.codes.NOT_FOUND] + ) + def test_post_request_non_200_with_valid_json(self, mock_httpx_request, mock_billing_config, status_code): + """Test POST request with non-200 status code raises ValueError.""" + # Arrange + error_response = {"detail": "Error message"} + mock_response = MagicMock() + mock_response.status_code = status_code + mock_response.json.return_value = error_response + mock_httpx_request.return_value = mock_response + + # Act & Assert + with pytest.raises(ValueError) as exc_info: + BillingService._send_request("POST", "/test", json={"key": "value"}) + assert "Unable to send request to" in str(exc_info.value) + + @pytest.mark.parametrize( + "status_code", [httpx.codes.BAD_REQUEST, httpx.codes.INTERNAL_SERVER_ERROR, httpx.codes.NOT_FOUND] + ) + def test_delete_request_non_200_with_valid_json(self, mock_httpx_request, mock_billing_config, status_code): + """Test DELETE request with non-200 status code but valid JSON response. + + DELETE doesn't check status code, so it returns the error JSON. + """ + # Arrange + error_response = {"detail": "Error message"} + mock_response = MagicMock() + mock_response.status_code = status_code + mock_response.json.return_value = error_response + mock_httpx_request.return_value = mock_response + + # Act + result = BillingService._send_request("DELETE", "/test", json={"key": "value"}) + + # Assert + assert result == error_response + + @pytest.mark.parametrize( + "status_code", [httpx.codes.BAD_REQUEST, httpx.codes.INTERNAL_SERVER_ERROR, httpx.codes.NOT_FOUND] + ) + def test_post_request_non_200_with_invalid_json(self, mock_httpx_request, mock_billing_config, status_code): + """Test POST request with non-200 status code raises ValueError before JSON parsing.""" + # Arrange + mock_response = MagicMock() + mock_response.status_code = status_code + mock_response.text = "" + mock_response.json.side_effect = json.JSONDecodeError("Expecting value", "", 0) + mock_httpx_request.return_value = mock_response + + # Act & Assert + # POST checks status code before calling response.json(), so ValueError is raised + with pytest.raises(ValueError) as exc_info: + BillingService._send_request("POST", "/test", json={"key": "value"}) + assert "Unable to send request to" in str(exc_info.value) + + @pytest.mark.parametrize( + "status_code", [httpx.codes.BAD_REQUEST, httpx.codes.INTERNAL_SERVER_ERROR, httpx.codes.NOT_FOUND] + ) + def test_delete_request_non_200_with_invalid_json(self, mock_httpx_request, mock_billing_config, status_code): + """Test DELETE request with non-200 status code and invalid JSON response raises exception. + + DELETE doesn't check status code, so it calls response.json() which raises JSONDecodeError + when the response cannot be parsed as JSON (e.g., empty response). + """ + # Arrange + mock_response = MagicMock() + mock_response.status_code = status_code + mock_response.text = "" + mock_response.json.side_effect = json.JSONDecodeError("Expecting value", "", 0) + mock_httpx_request.return_value = mock_response + + # Act & Assert + with pytest.raises(json.JSONDecodeError): + BillingService._send_request("DELETE", "/test", json={"key": "value"}) + + def test_retry_on_request_error(self, mock_httpx_request, mock_billing_config): + """Test that _send_request retries on httpx.RequestError.""" + # Arrange + expected_response = {"result": "success"} + mock_response = MagicMock() + mock_response.status_code = httpx.codes.OK + mock_response.json.return_value = expected_response + + # First call raises RequestError, second succeeds + mock_httpx_request.side_effect = [ + httpx.RequestError("Network error"), + mock_response, + ] + + # Act + result = BillingService._send_request("GET", "/test") + + # Assert + assert result == expected_response + assert mock_httpx_request.call_count == 2 + + def test_retry_exhausted_raises_exception(self, mock_httpx_request, mock_billing_config): + """Test that _send_request raises exception after retries are exhausted.""" + # Arrange + mock_httpx_request.side_effect = httpx.RequestError("Network error") + + # Act & Assert + with pytest.raises(httpx.RequestError): + BillingService._send_request("GET", "/test") + + # Should retry multiple times (wait=2, stop_before_delay=10 means ~5 attempts) + assert mock_httpx_request.call_count > 1 diff --git a/api/tests/unit_tests/services/test_document_indexing_task_proxy.py b/api/tests/unit_tests/services/test_document_indexing_task_proxy.py new file mode 100644 index 0000000000..d9183be9fb --- /dev/null +++ b/api/tests/unit_tests/services/test_document_indexing_task_proxy.py @@ -0,0 +1,317 @@ +from unittest.mock import Mock, patch + +from core.entities.document_task import DocumentTask +from core.rag.pipeline.queue import TenantIsolatedTaskQueue +from enums.cloud_plan import CloudPlan +from services.document_indexing_task_proxy import DocumentIndexingTaskProxy + + +class DocumentIndexingTaskProxyTestDataFactory: + """Factory class for creating test data and mock objects for DocumentIndexingTaskProxy tests.""" + + @staticmethod + def create_mock_features(billing_enabled: bool = False, plan: CloudPlan = CloudPlan.SANDBOX) -> Mock: + """Create mock features with billing configuration.""" + features = Mock() + features.billing = Mock() + features.billing.enabled = billing_enabled + features.billing.subscription = Mock() + features.billing.subscription.plan = plan + return features + + @staticmethod + def create_mock_tenant_queue(has_task_key: bool = False) -> Mock: + """Create mock TenantIsolatedTaskQueue.""" + queue = Mock(spec=TenantIsolatedTaskQueue) + queue.get_task_key.return_value = "task_key" if has_task_key else None + queue.push_tasks = Mock() + queue.set_task_waiting_time = Mock() + return queue + + @staticmethod + def create_document_task_proxy( + tenant_id: str = "tenant-123", dataset_id: str = "dataset-456", document_ids: list[str] | None = None + ) -> DocumentIndexingTaskProxy: + """Create DocumentIndexingTaskProxy instance for testing.""" + if document_ids is None: + document_ids = ["doc-1", "doc-2", "doc-3"] + return DocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids) + + +class TestDocumentIndexingTaskProxy: + """Test cases for DocumentIndexingTaskProxy class.""" + + def test_initialization(self): + """Test DocumentIndexingTaskProxy initialization.""" + # Arrange + tenant_id = "tenant-123" + dataset_id = "dataset-456" + document_ids = ["doc-1", "doc-2", "doc-3"] + + # Act + proxy = DocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids) + + # Assert + assert proxy._tenant_id == tenant_id + assert proxy._dataset_id == dataset_id + assert proxy._document_ids == document_ids + assert isinstance(proxy._tenant_isolated_task_queue, TenantIsolatedTaskQueue) + assert proxy._tenant_isolated_task_queue._tenant_id == tenant_id + assert proxy._tenant_isolated_task_queue._unique_key == "document_indexing" + + @patch("services.document_indexing_task_proxy.FeatureService") + def test_features_property(self, mock_feature_service): + """Test cached_property features.""" + # Arrange + mock_features = DocumentIndexingTaskProxyTestDataFactory.create_mock_features() + mock_feature_service.get_features.return_value = mock_features + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + + # Act + features1 = proxy.features + features2 = proxy.features # Second call should use cached property + + # Assert + assert features1 == mock_features + assert features2 == mock_features + assert features1 is features2 # Should be the same instance due to caching + mock_feature_service.get_features.assert_called_once_with("tenant-123") + + @patch("services.document_indexing_task_proxy.normal_document_indexing_task") + def test_send_to_direct_queue(self, mock_task): + """Test _send_to_direct_queue method.""" + # Arrange + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + mock_task.delay = Mock() + + # Act + proxy._send_to_direct_queue(mock_task) + + # Assert + mock_task.delay.assert_called_once_with( + tenant_id="tenant-123", dataset_id="dataset-456", document_ids=["doc-1", "doc-2", "doc-3"] + ) + + @patch("services.document_indexing_task_proxy.normal_document_indexing_task") + def test_send_to_tenant_queue_with_existing_task_key(self, mock_task): + """Test _send_to_tenant_queue when task key exists.""" + # Arrange + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + proxy._tenant_isolated_task_queue = DocumentIndexingTaskProxyTestDataFactory.create_mock_tenant_queue( + has_task_key=True + ) + mock_task.delay = Mock() + + # Act + proxy._send_to_tenant_queue(mock_task) + + # Assert + proxy._tenant_isolated_task_queue.push_tasks.assert_called_once() + pushed_tasks = proxy._tenant_isolated_task_queue.push_tasks.call_args[0][0] + assert len(pushed_tasks) == 1 + assert isinstance(DocumentTask(**pushed_tasks[0]), DocumentTask) + assert pushed_tasks[0]["tenant_id"] == "tenant-123" + assert pushed_tasks[0]["dataset_id"] == "dataset-456" + assert pushed_tasks[0]["document_ids"] == ["doc-1", "doc-2", "doc-3"] + mock_task.delay.assert_not_called() + + @patch("services.document_indexing_task_proxy.normal_document_indexing_task") + def test_send_to_tenant_queue_without_task_key(self, mock_task): + """Test _send_to_tenant_queue when no task key exists.""" + # Arrange + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + proxy._tenant_isolated_task_queue = DocumentIndexingTaskProxyTestDataFactory.create_mock_tenant_queue( + has_task_key=False + ) + mock_task.delay = Mock() + + # Act + proxy._send_to_tenant_queue(mock_task) + + # Assert + proxy._tenant_isolated_task_queue.set_task_waiting_time.assert_called_once() + mock_task.delay.assert_called_once_with( + tenant_id="tenant-123", dataset_id="dataset-456", document_ids=["doc-1", "doc-2", "doc-3"] + ) + proxy._tenant_isolated_task_queue.push_tasks.assert_not_called() + + @patch("services.document_indexing_task_proxy.normal_document_indexing_task") + def test_send_to_default_tenant_queue(self, mock_task): + """Test _send_to_default_tenant_queue method.""" + # Arrange + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + proxy._send_to_tenant_queue = Mock() + + # Act + proxy._send_to_default_tenant_queue() + + # Assert + proxy._send_to_tenant_queue.assert_called_once_with(mock_task) + + @patch("services.document_indexing_task_proxy.priority_document_indexing_task") + def test_send_to_priority_tenant_queue(self, mock_task): + """Test _send_to_priority_tenant_queue method.""" + # Arrange + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + proxy._send_to_tenant_queue = Mock() + + # Act + proxy._send_to_priority_tenant_queue() + + # Assert + proxy._send_to_tenant_queue.assert_called_once_with(mock_task) + + @patch("services.document_indexing_task_proxy.priority_document_indexing_task") + def test_send_to_priority_direct_queue(self, mock_task): + """Test _send_to_priority_direct_queue method.""" + # Arrange + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + proxy._send_to_direct_queue = Mock() + + # Act + proxy._send_to_priority_direct_queue() + + # Assert + proxy._send_to_direct_queue.assert_called_once_with(mock_task) + + @patch("services.document_indexing_task_proxy.FeatureService") + def test_dispatch_with_billing_enabled_sandbox_plan(self, mock_feature_service): + """Test _dispatch method when billing is enabled with sandbox plan.""" + # Arrange + mock_features = DocumentIndexingTaskProxyTestDataFactory.create_mock_features( + billing_enabled=True, plan=CloudPlan.SANDBOX + ) + mock_feature_service.get_features.return_value = mock_features + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + proxy._send_to_default_tenant_queue = Mock() + + # Act + proxy._dispatch() + + # Assert + proxy._send_to_default_tenant_queue.assert_called_once() + + @patch("services.document_indexing_task_proxy.FeatureService") + def test_dispatch_with_billing_enabled_non_sandbox_plan(self, mock_feature_service): + """Test _dispatch method when billing is enabled with non-sandbox plan.""" + # Arrange + mock_features = DocumentIndexingTaskProxyTestDataFactory.create_mock_features( + billing_enabled=True, plan=CloudPlan.TEAM + ) + mock_feature_service.get_features.return_value = mock_features + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + proxy._send_to_priority_tenant_queue = Mock() + + # Act + proxy._dispatch() + + # If billing enabled with non sandbox plan, should send to priority tenant queue + proxy._send_to_priority_tenant_queue.assert_called_once() + + @patch("services.document_indexing_task_proxy.FeatureService") + def test_dispatch_with_billing_disabled(self, mock_feature_service): + """Test _dispatch method when billing is disabled.""" + # Arrange + mock_features = DocumentIndexingTaskProxyTestDataFactory.create_mock_features(billing_enabled=False) + mock_feature_service.get_features.return_value = mock_features + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + proxy._send_to_priority_direct_queue = Mock() + + # Act + proxy._dispatch() + + # If billing disabled, for example: self-hosted or enterprise, should send to priority direct queue + proxy._send_to_priority_direct_queue.assert_called_once() + + @patch("services.document_indexing_task_proxy.FeatureService") + def test_delay_method(self, mock_feature_service): + """Test delay method integration.""" + # Arrange + mock_features = DocumentIndexingTaskProxyTestDataFactory.create_mock_features( + billing_enabled=True, plan=CloudPlan.SANDBOX + ) + mock_feature_service.get_features.return_value = mock_features + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + proxy._send_to_default_tenant_queue = Mock() + + # Act + proxy.delay() + + # Assert + # If billing enabled with sandbox plan, should send to default tenant queue + proxy._send_to_default_tenant_queue.assert_called_once() + + def test_document_task_dataclass(self): + """Test DocumentTask dataclass.""" + # Arrange + tenant_id = "tenant-123" + dataset_id = "dataset-456" + document_ids = ["doc-1", "doc-2"] + + # Act + task = DocumentTask(tenant_id=tenant_id, dataset_id=dataset_id, document_ids=document_ids) + + # Assert + assert task.tenant_id == tenant_id + assert task.dataset_id == dataset_id + assert task.document_ids == document_ids + + @patch("services.document_indexing_task_proxy.FeatureService") + def test_dispatch_edge_case_empty_plan(self, mock_feature_service): + """Test _dispatch method with empty plan string.""" + # Arrange + mock_features = DocumentIndexingTaskProxyTestDataFactory.create_mock_features(billing_enabled=True, plan="") + mock_feature_service.get_features.return_value = mock_features + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + proxy._send_to_priority_tenant_queue = Mock() + + # Act + proxy._dispatch() + + # Assert + proxy._send_to_priority_tenant_queue.assert_called_once() + + @patch("services.document_indexing_task_proxy.FeatureService") + def test_dispatch_edge_case_none_plan(self, mock_feature_service): + """Test _dispatch method with None plan.""" + # Arrange + mock_features = DocumentIndexingTaskProxyTestDataFactory.create_mock_features(billing_enabled=True, plan=None) + mock_feature_service.get_features.return_value = mock_features + proxy = DocumentIndexingTaskProxyTestDataFactory.create_document_task_proxy() + proxy._send_to_priority_tenant_queue = Mock() + + # Act + proxy._dispatch() + + # Assert + proxy._send_to_priority_tenant_queue.assert_called_once() + + def test_initialization_with_empty_document_ids(self): + """Test initialization with empty document_ids list.""" + # Arrange + tenant_id = "tenant-123" + dataset_id = "dataset-456" + document_ids = [] + + # Act + proxy = DocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids) + + # Assert + assert proxy._tenant_id == tenant_id + assert proxy._dataset_id == dataset_id + assert proxy._document_ids == document_ids + + def test_initialization_with_single_document_id(self): + """Test initialization with single document_id.""" + # Arrange + tenant_id = "tenant-123" + dataset_id = "dataset-456" + document_ids = ["doc-1"] + + # Act + proxy = DocumentIndexingTaskProxy(tenant_id, dataset_id, document_ids) + + # Assert + assert proxy._tenant_id == tenant_id + assert proxy._dataset_id == dataset_id + assert proxy._document_ids == document_ids diff --git a/api/tests/unit_tests/services/test_document_service_display_status.py b/api/tests/unit_tests/services/test_document_service_display_status.py new file mode 100644 index 0000000000..85cba505a0 --- /dev/null +++ b/api/tests/unit_tests/services/test_document_service_display_status.py @@ -0,0 +1,33 @@ +import sqlalchemy as sa + +from models.dataset import Document +from services.dataset_service import DocumentService + + +def test_normalize_display_status_alias_mapping(): + assert DocumentService.normalize_display_status("ACTIVE") == "available" + assert DocumentService.normalize_display_status("enabled") == "available" + assert DocumentService.normalize_display_status("archived") == "archived" + assert DocumentService.normalize_display_status("unknown") is None + + +def test_build_display_status_filters_available(): + filters = DocumentService.build_display_status_filters("available") + assert len(filters) == 3 + for condition in filters: + assert condition is not None + + +def test_apply_display_status_filter_applies_when_status_present(): + query = sa.select(Document) + filtered = DocumentService.apply_display_status_filter(query, "queuing") + compiled = str(filtered.compile(compile_kwargs={"literal_binds": True})) + assert "WHERE" in compiled + assert "documents.indexing_status = 'waiting'" in compiled + + +def test_apply_display_status_filter_returns_same_when_invalid(): + query = sa.select(Document) + filtered = DocumentService.apply_display_status_filter(query, "invalid") + compiled = str(filtered.compile(compile_kwargs={"literal_binds": True})) + assert "WHERE" not in compiled diff --git a/api/tests/unit_tests/services/test_metadata_partial_update.py b/api/tests/unit_tests/services/test_metadata_partial_update.py new file mode 100644 index 0000000000..00162c10e4 --- /dev/null +++ b/api/tests/unit_tests/services/test_metadata_partial_update.py @@ -0,0 +1,153 @@ +import unittest +from unittest.mock import MagicMock, patch + +from models.dataset import Dataset, Document +from services.entities.knowledge_entities.knowledge_entities import ( + DocumentMetadataOperation, + MetadataDetail, + MetadataOperationData, +) +from services.metadata_service import MetadataService + + +class TestMetadataPartialUpdate(unittest.TestCase): + def setUp(self): + self.dataset = MagicMock(spec=Dataset) + self.dataset.id = "dataset_id" + self.dataset.built_in_field_enabled = False + + self.document = MagicMock(spec=Document) + self.document.id = "doc_id" + self.document.doc_metadata = {"existing_key": "existing_value"} + self.document.data_source_type = "upload_file" + + @patch("services.metadata_service.db") + @patch("services.metadata_service.DocumentService") + @patch("services.metadata_service.current_account_with_tenant") + @patch("services.metadata_service.redis_client") + def test_partial_update_merges_metadata(self, mock_redis, mock_current_account, mock_document_service, mock_db): + # Setup mocks + mock_redis.get.return_value = None + mock_document_service.get_document.return_value = self.document + mock_current_account.return_value = (MagicMock(id="user_id"), "tenant_id") + + # Mock DB query for existing bindings + + # No existing binding for new key + mock_db.session.query.return_value.filter_by.return_value.first.return_value = None + + # Input data + operation = DocumentMetadataOperation( + document_id="doc_id", + metadata_list=[MetadataDetail(id="new_meta_id", name="new_key", value="new_value")], + partial_update=True, + ) + metadata_args = MetadataOperationData(operation_data=[operation]) + + # Execute + MetadataService.update_documents_metadata(self.dataset, metadata_args) + + # Verify + # 1. Check that doc_metadata contains BOTH existing and new keys + expected_metadata = {"existing_key": "existing_value", "new_key": "new_value"} + assert self.document.doc_metadata == expected_metadata + + # 2. Check that existing bindings were NOT deleted + # The delete call in the original code: db.session.query(...).filter_by(...).delete() + # In partial update, this should NOT be called. + mock_db.session.query.return_value.filter_by.return_value.delete.assert_not_called() + + @patch("services.metadata_service.db") + @patch("services.metadata_service.DocumentService") + @patch("services.metadata_service.current_account_with_tenant") + @patch("services.metadata_service.redis_client") + def test_full_update_replaces_metadata(self, mock_redis, mock_current_account, mock_document_service, mock_db): + # Setup mocks + mock_redis.get.return_value = None + mock_document_service.get_document.return_value = self.document + mock_current_account.return_value = (MagicMock(id="user_id"), "tenant_id") + + # Input data (partial_update=False by default) + operation = DocumentMetadataOperation( + document_id="doc_id", + metadata_list=[MetadataDetail(id="new_meta_id", name="new_key", value="new_value")], + partial_update=False, + ) + metadata_args = MetadataOperationData(operation_data=[operation]) + + # Execute + MetadataService.update_documents_metadata(self.dataset, metadata_args) + + # Verify + # 1. Check that doc_metadata contains ONLY the new key + expected_metadata = {"new_key": "new_value"} + assert self.document.doc_metadata == expected_metadata + + # 2. Check that existing bindings WERE deleted + # In full update (default), we expect the existing bindings to be cleared. + mock_db.session.query.return_value.filter_by.return_value.delete.assert_called() + + @patch("services.metadata_service.db") + @patch("services.metadata_service.DocumentService") + @patch("services.metadata_service.current_account_with_tenant") + @patch("services.metadata_service.redis_client") + def test_partial_update_skips_existing_binding( + self, mock_redis, mock_current_account, mock_document_service, mock_db + ): + # Setup mocks + mock_redis.get.return_value = None + mock_document_service.get_document.return_value = self.document + mock_current_account.return_value = (MagicMock(id="user_id"), "tenant_id") + + # Mock DB query to return an existing binding + # This simulates that the document ALREADY has the metadata we are trying to add + mock_existing_binding = MagicMock() + mock_db.session.query.return_value.filter_by.return_value.first.return_value = mock_existing_binding + + # Input data + operation = DocumentMetadataOperation( + document_id="doc_id", + metadata_list=[MetadataDetail(id="existing_meta_id", name="existing_key", value="existing_value")], + partial_update=True, + ) + metadata_args = MetadataOperationData(operation_data=[operation]) + + # Execute + MetadataService.update_documents_metadata(self.dataset, metadata_args) + + # Verify + # We verify that db.session.add was NOT called for DatasetMetadataBinding + # Since we can't easily check "not called with specific type" on the generic add method without complex logic, + # we can check if the number of add calls is 1 (only for the document update) instead of 2 (document + binding) + + # Expected calls: + # 1. db.session.add(document) + # 2. NO db.session.add(binding) because it exists + + # Note: In the code, db.session.add is called for document. + # Then loop over metadata_list. + # If existing_binding found, continue. + # So binding add should be skipped. + + # Let's filter the calls to add to see what was added + add_calls = mock_db.session.add.call_args_list + added_objects = [call.args[0] for call in add_calls] + + # Check that no DatasetMetadataBinding was added + from models.dataset import DatasetMetadataBinding + + has_binding_add = any( + isinstance(obj, DatasetMetadataBinding) + or (isinstance(obj, MagicMock) and getattr(obj, "__class__", None) == DatasetMetadataBinding) + for obj in added_objects + ) + + # Since we mock everything, checking isinstance might be tricky if DatasetMetadataBinding + # is not the exact class used in the service (imports match). + # But we can check the count. + # If it were added, there would be 2 calls. If skipped, 1 call. + assert mock_db.session.add.call_count == 1 + + +if __name__ == "__main__": + unittest.main() diff --git a/api/tests/unit_tests/services/test_rag_pipeline_task_proxy.py b/api/tests/unit_tests/services/test_rag_pipeline_task_proxy.py new file mode 100644 index 0000000000..f5a48b1416 --- /dev/null +++ b/api/tests/unit_tests/services/test_rag_pipeline_task_proxy.py @@ -0,0 +1,483 @@ +import json +from unittest.mock import Mock, patch + +import pytest + +from core.app.entities.rag_pipeline_invoke_entities import RagPipelineInvokeEntity +from core.rag.pipeline.queue import TenantIsolatedTaskQueue +from enums.cloud_plan import CloudPlan +from services.rag_pipeline.rag_pipeline_task_proxy import RagPipelineTaskProxy + + +class RagPipelineTaskProxyTestDataFactory: + """Factory class for creating test data and mock objects for RagPipelineTaskProxy tests.""" + + @staticmethod + def create_mock_features(billing_enabled: bool = False, plan: CloudPlan = CloudPlan.SANDBOX) -> Mock: + """Create mock features with billing configuration.""" + features = Mock() + features.billing = Mock() + features.billing.enabled = billing_enabled + features.billing.subscription = Mock() + features.billing.subscription.plan = plan + return features + + @staticmethod + def create_mock_tenant_queue(has_task_key: bool = False) -> Mock: + """Create mock TenantIsolatedTaskQueue.""" + queue = Mock(spec=TenantIsolatedTaskQueue) + queue.get_task_key.return_value = "task_key" if has_task_key else None + queue.push_tasks = Mock() + queue.set_task_waiting_time = Mock() + return queue + + @staticmethod + def create_rag_pipeline_invoke_entity( + pipeline_id: str = "pipeline-123", + user_id: str = "user-456", + tenant_id: str = "tenant-789", + workflow_id: str = "workflow-101", + streaming: bool = True, + workflow_execution_id: str | None = None, + workflow_thread_pool_id: str | None = None, + ) -> RagPipelineInvokeEntity: + """Create RagPipelineInvokeEntity instance for testing.""" + return RagPipelineInvokeEntity( + pipeline_id=pipeline_id, + application_generate_entity={"key": "value"}, + user_id=user_id, + tenant_id=tenant_id, + workflow_id=workflow_id, + streaming=streaming, + workflow_execution_id=workflow_execution_id, + workflow_thread_pool_id=workflow_thread_pool_id, + ) + + @staticmethod + def create_rag_pipeline_task_proxy( + dataset_tenant_id: str = "tenant-123", + user_id: str = "user-456", + rag_pipeline_invoke_entities: list[RagPipelineInvokeEntity] | None = None, + ) -> RagPipelineTaskProxy: + """Create RagPipelineTaskProxy instance for testing.""" + if rag_pipeline_invoke_entities is None: + rag_pipeline_invoke_entities = [RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_invoke_entity()] + return RagPipelineTaskProxy(dataset_tenant_id, user_id, rag_pipeline_invoke_entities) + + @staticmethod + def create_mock_upload_file(file_id: str = "file-123") -> Mock: + """Create mock upload file.""" + upload_file = Mock() + upload_file.id = file_id + return upload_file + + +class TestRagPipelineTaskProxy: + """Test cases for RagPipelineTaskProxy class.""" + + def test_initialization(self): + """Test RagPipelineTaskProxy initialization.""" + # Arrange + dataset_tenant_id = "tenant-123" + user_id = "user-456" + rag_pipeline_invoke_entities = [RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_invoke_entity()] + + # Act + proxy = RagPipelineTaskProxy(dataset_tenant_id, user_id, rag_pipeline_invoke_entities) + + # Assert + assert proxy._dataset_tenant_id == dataset_tenant_id + assert proxy._user_id == user_id + assert proxy._rag_pipeline_invoke_entities == rag_pipeline_invoke_entities + assert isinstance(proxy._tenant_isolated_task_queue, TenantIsolatedTaskQueue) + assert proxy._tenant_isolated_task_queue._tenant_id == dataset_tenant_id + assert proxy._tenant_isolated_task_queue._unique_key == "pipeline" + + def test_initialization_with_empty_entities(self): + """Test initialization with empty rag_pipeline_invoke_entities.""" + # Arrange + dataset_tenant_id = "tenant-123" + user_id = "user-456" + rag_pipeline_invoke_entities = [] + + # Act + proxy = RagPipelineTaskProxy(dataset_tenant_id, user_id, rag_pipeline_invoke_entities) + + # Assert + assert proxy._dataset_tenant_id == dataset_tenant_id + assert proxy._user_id == user_id + assert proxy._rag_pipeline_invoke_entities == [] + + def test_initialization_with_multiple_entities(self): + """Test initialization with multiple rag_pipeline_invoke_entities.""" + # Arrange + dataset_tenant_id = "tenant-123" + user_id = "user-456" + rag_pipeline_invoke_entities = [ + RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_invoke_entity(pipeline_id="pipeline-1"), + RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_invoke_entity(pipeline_id="pipeline-2"), + RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_invoke_entity(pipeline_id="pipeline-3"), + ] + + # Act + proxy = RagPipelineTaskProxy(dataset_tenant_id, user_id, rag_pipeline_invoke_entities) + + # Assert + assert len(proxy._rag_pipeline_invoke_entities) == 3 + assert proxy._rag_pipeline_invoke_entities[0].pipeline_id == "pipeline-1" + assert proxy._rag_pipeline_invoke_entities[1].pipeline_id == "pipeline-2" + assert proxy._rag_pipeline_invoke_entities[2].pipeline_id == "pipeline-3" + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FeatureService") + def test_features_property(self, mock_feature_service): + """Test cached_property features.""" + # Arrange + mock_features = RagPipelineTaskProxyTestDataFactory.create_mock_features() + mock_feature_service.get_features.return_value = mock_features + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + + # Act + features1 = proxy.features + features2 = proxy.features # Second call should use cached property + + # Assert + assert features1 == mock_features + assert features2 == mock_features + assert features1 is features2 # Should be the same instance due to caching + mock_feature_service.get_features.assert_called_once_with("tenant-123") + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FileService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.db") + def test_upload_invoke_entities(self, mock_db, mock_file_service_class): + """Test _upload_invoke_entities method.""" + # Arrange + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + mock_file_service = Mock() + mock_file_service_class.return_value = mock_file_service + mock_upload_file = RagPipelineTaskProxyTestDataFactory.create_mock_upload_file("file-123") + mock_file_service.upload_text.return_value = mock_upload_file + + # Act + result = proxy._upload_invoke_entities() + + # Assert + assert result == "file-123" + mock_file_service_class.assert_called_once_with(mock_db.engine) + + # Verify upload_text was called with correct parameters + mock_file_service.upload_text.assert_called_once() + call_args = mock_file_service.upload_text.call_args + json_text, name, user_id, tenant_id = call_args[0] + + assert name == "rag_pipeline_invoke_entities.json" + assert user_id == "user-456" + assert tenant_id == "tenant-123" + + # Verify JSON content + parsed_json = json.loads(json_text) + assert len(parsed_json) == 1 + assert parsed_json[0]["pipeline_id"] == "pipeline-123" + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FileService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.db") + def test_upload_invoke_entities_with_multiple_entities(self, mock_db, mock_file_service_class): + """Test _upload_invoke_entities method with multiple entities.""" + # Arrange + entities = [ + RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_invoke_entity(pipeline_id="pipeline-1"), + RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_invoke_entity(pipeline_id="pipeline-2"), + ] + proxy = RagPipelineTaskProxy("tenant-123", "user-456", entities) + mock_file_service = Mock() + mock_file_service_class.return_value = mock_file_service + mock_upload_file = RagPipelineTaskProxyTestDataFactory.create_mock_upload_file("file-456") + mock_file_service.upload_text.return_value = mock_upload_file + + # Act + result = proxy._upload_invoke_entities() + + # Assert + assert result == "file-456" + + # Verify JSON content contains both entities + call_args = mock_file_service.upload_text.call_args + json_text = call_args[0][0] + parsed_json = json.loads(json_text) + assert len(parsed_json) == 2 + assert parsed_json[0]["pipeline_id"] == "pipeline-1" + assert parsed_json[1]["pipeline_id"] == "pipeline-2" + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.rag_pipeline_run_task") + def test_send_to_direct_queue(self, mock_task): + """Test _send_to_direct_queue method.""" + # Arrange + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + proxy._tenant_isolated_task_queue = RagPipelineTaskProxyTestDataFactory.create_mock_tenant_queue() + upload_file_id = "file-123" + mock_task.delay = Mock() + + # Act + proxy._send_to_direct_queue(upload_file_id, mock_task) + + # If sent to direct queue, tenant_isolated_task_queue should not be called + proxy._tenant_isolated_task_queue.push_tasks.assert_not_called() + + # Celery should be called directly + mock_task.delay.assert_called_once_with( + rag_pipeline_invoke_entities_file_id=upload_file_id, tenant_id="tenant-123" + ) + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.rag_pipeline_run_task") + def test_send_to_tenant_queue_with_existing_task_key(self, mock_task): + """Test _send_to_tenant_queue when task key exists.""" + # Arrange + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + proxy._tenant_isolated_task_queue = RagPipelineTaskProxyTestDataFactory.create_mock_tenant_queue( + has_task_key=True + ) + upload_file_id = "file-123" + mock_task.delay = Mock() + + # Act + proxy._send_to_tenant_queue(upload_file_id, mock_task) + + # If task key exists, should push tasks to the queue + proxy._tenant_isolated_task_queue.push_tasks.assert_called_once_with([upload_file_id]) + # Celery should not be called directly + mock_task.delay.assert_not_called() + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.rag_pipeline_run_task") + def test_send_to_tenant_queue_without_task_key(self, mock_task): + """Test _send_to_tenant_queue when no task key exists.""" + # Arrange + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + proxy._tenant_isolated_task_queue = RagPipelineTaskProxyTestDataFactory.create_mock_tenant_queue( + has_task_key=False + ) + upload_file_id = "file-123" + mock_task.delay = Mock() + + # Act + proxy._send_to_tenant_queue(upload_file_id, mock_task) + + # If no task key, should set task waiting time key first + proxy._tenant_isolated_task_queue.set_task_waiting_time.assert_called_once() + mock_task.delay.assert_called_once_with( + rag_pipeline_invoke_entities_file_id=upload_file_id, tenant_id="tenant-123" + ) + + # The first task should be sent to celery directly, so push tasks should not be called + proxy._tenant_isolated_task_queue.push_tasks.assert_not_called() + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.rag_pipeline_run_task") + def test_send_to_default_tenant_queue(self, mock_task): + """Test _send_to_default_tenant_queue method.""" + # Arrange + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + proxy._send_to_tenant_queue = Mock() + upload_file_id = "file-123" + + # Act + proxy._send_to_default_tenant_queue(upload_file_id) + + # Assert + proxy._send_to_tenant_queue.assert_called_once_with(upload_file_id, mock_task) + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.priority_rag_pipeline_run_task") + def test_send_to_priority_tenant_queue(self, mock_task): + """Test _send_to_priority_tenant_queue method.""" + # Arrange + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + proxy._send_to_tenant_queue = Mock() + upload_file_id = "file-123" + + # Act + proxy._send_to_priority_tenant_queue(upload_file_id) + + # Assert + proxy._send_to_tenant_queue.assert_called_once_with(upload_file_id, mock_task) + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.priority_rag_pipeline_run_task") + def test_send_to_priority_direct_queue(self, mock_task): + """Test _send_to_priority_direct_queue method.""" + # Arrange + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + proxy._send_to_direct_queue = Mock() + upload_file_id = "file-123" + + # Act + proxy._send_to_priority_direct_queue(upload_file_id) + + # Assert + proxy._send_to_direct_queue.assert_called_once_with(upload_file_id, mock_task) + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FeatureService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FileService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.db") + def test_dispatch_with_billing_enabled_sandbox_plan(self, mock_db, mock_file_service_class, mock_feature_service): + """Test _dispatch method when billing is enabled with sandbox plan.""" + # Arrange + mock_features = RagPipelineTaskProxyTestDataFactory.create_mock_features( + billing_enabled=True, plan=CloudPlan.SANDBOX + ) + mock_feature_service.get_features.return_value = mock_features + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + proxy._send_to_default_tenant_queue = Mock() + + mock_file_service = Mock() + mock_file_service_class.return_value = mock_file_service + mock_upload_file = RagPipelineTaskProxyTestDataFactory.create_mock_upload_file("file-123") + mock_file_service.upload_text.return_value = mock_upload_file + + # Act + proxy._dispatch() + + # If billing is enabled with sandbox plan, should send to default tenant queue + proxy._send_to_default_tenant_queue.assert_called_once_with("file-123") + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FeatureService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FileService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.db") + def test_dispatch_with_billing_enabled_non_sandbox_plan( + self, mock_db, mock_file_service_class, mock_feature_service + ): + """Test _dispatch method when billing is enabled with non-sandbox plan.""" + # Arrange + mock_features = RagPipelineTaskProxyTestDataFactory.create_mock_features( + billing_enabled=True, plan=CloudPlan.TEAM + ) + mock_feature_service.get_features.return_value = mock_features + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + proxy._send_to_priority_tenant_queue = Mock() + + mock_file_service = Mock() + mock_file_service_class.return_value = mock_file_service + mock_upload_file = RagPipelineTaskProxyTestDataFactory.create_mock_upload_file("file-123") + mock_file_service.upload_text.return_value = mock_upload_file + + # Act + proxy._dispatch() + + # If billing is enabled with non-sandbox plan, should send to priority tenant queue + proxy._send_to_priority_tenant_queue.assert_called_once_with("file-123") + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FeatureService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FileService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.db") + def test_dispatch_with_billing_disabled(self, mock_db, mock_file_service_class, mock_feature_service): + """Test _dispatch method when billing is disabled.""" + # Arrange + mock_features = RagPipelineTaskProxyTestDataFactory.create_mock_features(billing_enabled=False) + mock_feature_service.get_features.return_value = mock_features + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + proxy._send_to_priority_direct_queue = Mock() + + mock_file_service = Mock() + mock_file_service_class.return_value = mock_file_service + mock_upload_file = RagPipelineTaskProxyTestDataFactory.create_mock_upload_file("file-123") + mock_file_service.upload_text.return_value = mock_upload_file + + # Act + proxy._dispatch() + + # If billing is disabled, for example: self-hosted or enterprise, should send to priority direct queue + proxy._send_to_priority_direct_queue.assert_called_once_with("file-123") + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FileService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.db") + def test_dispatch_with_empty_upload_file_id(self, mock_db, mock_file_service_class): + """Test _dispatch method when upload_file_id is empty.""" + # Arrange + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + + mock_file_service = Mock() + mock_file_service_class.return_value = mock_file_service + mock_upload_file = Mock() + mock_upload_file.id = "" # Empty file ID + mock_file_service.upload_text.return_value = mock_upload_file + + # Act & Assert + with pytest.raises(ValueError, match="upload_file_id is empty"): + proxy._dispatch() + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FeatureService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FileService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.db") + def test_dispatch_edge_case_empty_plan(self, mock_db, mock_file_service_class, mock_feature_service): + """Test _dispatch method with empty plan string.""" + # Arrange + mock_features = RagPipelineTaskProxyTestDataFactory.create_mock_features(billing_enabled=True, plan="") + mock_feature_service.get_features.return_value = mock_features + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + proxy._send_to_priority_tenant_queue = Mock() + + mock_file_service = Mock() + mock_file_service_class.return_value = mock_file_service + mock_upload_file = RagPipelineTaskProxyTestDataFactory.create_mock_upload_file("file-123") + mock_file_service.upload_text.return_value = mock_upload_file + + # Act + proxy._dispatch() + + # Assert + proxy._send_to_priority_tenant_queue.assert_called_once_with("file-123") + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FeatureService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FileService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.db") + def test_dispatch_edge_case_none_plan(self, mock_db, mock_file_service_class, mock_feature_service): + """Test _dispatch method with None plan.""" + # Arrange + mock_features = RagPipelineTaskProxyTestDataFactory.create_mock_features(billing_enabled=True, plan=None) + mock_feature_service.get_features.return_value = mock_features + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + proxy._send_to_priority_tenant_queue = Mock() + + mock_file_service = Mock() + mock_file_service_class.return_value = mock_file_service + mock_upload_file = RagPipelineTaskProxyTestDataFactory.create_mock_upload_file("file-123") + mock_file_service.upload_text.return_value = mock_upload_file + + # Act + proxy._dispatch() + + # Assert + proxy._send_to_priority_tenant_queue.assert_called_once_with("file-123") + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FeatureService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.FileService") + @patch("services.rag_pipeline.rag_pipeline_task_proxy.db") + def test_delay_method(self, mock_db, mock_file_service_class, mock_feature_service): + """Test delay method integration.""" + # Arrange + mock_features = RagPipelineTaskProxyTestDataFactory.create_mock_features( + billing_enabled=True, plan=CloudPlan.SANDBOX + ) + mock_feature_service.get_features.return_value = mock_features + proxy = RagPipelineTaskProxyTestDataFactory.create_rag_pipeline_task_proxy() + proxy._dispatch = Mock() + + mock_file_service = Mock() + mock_file_service_class.return_value = mock_file_service + mock_upload_file = RagPipelineTaskProxyTestDataFactory.create_mock_upload_file("file-123") + mock_file_service.upload_text.return_value = mock_upload_file + + # Act + proxy.delay() + + # Assert + proxy._dispatch.assert_called_once() + + @patch("services.rag_pipeline.rag_pipeline_task_proxy.logger") + def test_delay_method_with_empty_entities(self, mock_logger): + """Test delay method with empty rag_pipeline_invoke_entities.""" + # Arrange + proxy = RagPipelineTaskProxy("tenant-123", "user-456", []) + + # Act + proxy.delay() + + # Assert + mock_logger.warning.assert_called_once_with( + "Received empty rag pipeline invoke entities, no tasks delivered: %s %s", "tenant-123", "user-456" + ) diff --git a/api/tests/unit_tests/services/test_schedule_service.py b/api/tests/unit_tests/services/test_schedule_service.py index f3f9fbec80..e28965ea2c 100644 --- a/api/tests/unit_tests/services/test_schedule_service.py +++ b/api/tests/unit_tests/services/test_schedule_service.py @@ -62,12 +62,10 @@ class TestScheduleService(unittest.TestCase): def test_calculate_next_run_at_invalid_cron(self): """Test calculating next run time with invalid cron expression.""" - from croniter import CroniterBadCronError - cron_expr = "invalid cron" timezone = "UTC" - with pytest.raises(CroniterBadCronError): + with pytest.raises(ValueError): calculate_next_run_at(cron_expr, timezone) def test_calculate_next_run_at_invalid_timezone(self): @@ -109,7 +107,7 @@ class TestScheduleService(unittest.TestCase): mock_session.add.assert_called_once() mock_session.flush.assert_called_once() - @patch("services.schedule_service.calculate_next_run_at") + @patch("services.trigger.schedule_service.calculate_next_run_at") def test_update_schedule(self, mock_calculate_next_run): """Test updating an existing schedule.""" mock_session = MagicMock(spec=Session) @@ -189,7 +187,7 @@ class TestScheduleService(unittest.TestCase): assert "Schedule not found: non-existent-id" in str(context.value) mock_session.delete.assert_not_called() - @patch("services.schedule_service.select") + @patch("services.trigger.schedule_service.select") def test_get_tenant_owner(self, mock_select): """Test getting tenant owner account.""" mock_session = MagicMock(spec=Session) @@ -211,7 +209,7 @@ class TestScheduleService(unittest.TestCase): assert result is not None assert result.id == "owner-account-id" - @patch("services.schedule_service.select") + @patch("services.trigger.schedule_service.select") def test_get_tenant_owner_fallback_to_admin(self, mock_select): """Test getting tenant owner falls back to admin if no owner.""" mock_session = MagicMock(spec=Session) @@ -233,7 +231,7 @@ class TestScheduleService(unittest.TestCase): assert result is not None assert result.id == "admin-account-id" - @patch("services.schedule_service.calculate_next_run_at") + @patch("services.trigger.schedule_service.calculate_next_run_at") def test_update_next_run_at(self, mock_calculate_next_run): """Test updating next run time after schedule triggered.""" mock_session = MagicMock(spec=Session) diff --git a/api/tests/unit_tests/services/test_variable_truncator.py b/api/tests/unit_tests/services/test_variable_truncator.py index 6761f939e3..cf6fb25c1c 100644 --- a/api/tests/unit_tests/services/test_variable_truncator.py +++ b/api/tests/unit_tests/services/test_variable_truncator.py @@ -21,6 +21,7 @@ from core.file.enums import FileTransferMethod, FileType from core.file.models import File from core.variables.segments import ( ArrayFileSegment, + ArrayNumberSegment, ArraySegment, FileSegment, FloatSegment, @@ -30,6 +31,7 @@ from core.variables.segments import ( StringSegment, ) from services.variable_truncator import ( + DummyVariableTruncator, MaxDepthExceededError, TruncationResult, UnknownTypeError, @@ -596,3 +598,32 @@ class TestIntegrationScenarios: truncated_mapping, truncated = truncator.truncate_variable_mapping(mapping) assert truncated is False assert truncated_mapping == mapping + + +def test_dummy_variable_truncator_methods(): + """Test DummyVariableTruncator methods work correctly.""" + truncator = DummyVariableTruncator() + + # Test truncate_variable_mapping + test_data: dict[str, Any] = { + "key1": "value1", + "key2": ["item1", "item2"], + "large_array": list(range(2000)), + } + result, is_truncated = truncator.truncate_variable_mapping(test_data) + + assert result == test_data + assert not is_truncated + + # Test truncate method + segment = StringSegment(value="test string") + result = truncator.truncate(segment) + assert isinstance(result, TruncationResult) + assert result.result == segment + assert result.truncated is False + + segment = ArrayNumberSegment(value=list(range(2000))) + result = truncator.truncate(segment) + assert isinstance(result, TruncationResult) + assert result.result == segment + assert result.truncated is False diff --git a/api/tests/unit_tests/services/test_webhook_service.py b/api/tests/unit_tests/services/test_webhook_service.py index 44765aeb9e..010295bcd6 100644 --- a/api/tests/unit_tests/services/test_webhook_service.py +++ b/api/tests/unit_tests/services/test_webhook_service.py @@ -183,8 +183,8 @@ class TestWebhookServiceUnit: assert response_data[0]["id"] == 1 assert response_data[1]["id"] == 2 - @patch("services.webhook_service.ToolFileManager") - @patch("services.webhook_service.file_factory") + @patch("services.trigger.webhook_service.ToolFileManager") + @patch("services.trigger.webhook_service.file_factory") def test_process_file_uploads_success(self, mock_file_factory, mock_tool_file_manager): """Test successful file upload processing.""" # Mock ToolFileManager @@ -223,8 +223,8 @@ class TestWebhookServiceUnit: assert mock_tool_file_manager.call_count == 2 assert mock_file_factory.build_from_mapping.call_count == 2 - @patch("services.webhook_service.ToolFileManager") - @patch("services.webhook_service.file_factory") + @patch("services.trigger.webhook_service.ToolFileManager") + @patch("services.trigger.webhook_service.file_factory") def test_process_file_uploads_with_errors(self, mock_file_factory, mock_tool_file_manager): """Test file upload processing with errors.""" # Mock ToolFileManager @@ -472,15 +472,11 @@ class TestWebhookServiceUnit: mock_get_trigger.return_value = (mock_trigger, mock_workflow, mock_config) mock_extract.return_value = mock_data - # Test normal mode (skip_status_check=False) result = _prepare_webhook_execution("test_webhook", is_debug=False) - mock_get_trigger.assert_called_with("test_webhook", skip_status_check=False) assert result == (mock_trigger, mock_workflow, mock_config, mock_data, None) # Reset mock mock_get_trigger.reset_mock() - # Test debug mode (skip_status_check=True) result = _prepare_webhook_execution("test_webhook", is_debug=True) - mock_get_trigger.assert_called_with("test_webhook", skip_status_check=True) assert result == (mock_trigger, mock_workflow, mock_config, mock_data, None) diff --git a/api/tests/unit_tests/services/test_workflow_run_service_pause.py b/api/tests/unit_tests/services/test_workflow_run_service_pause.py new file mode 100644 index 0000000000..a062d9444e --- /dev/null +++ b/api/tests/unit_tests/services/test_workflow_run_service_pause.py @@ -0,0 +1,200 @@ +"""Comprehensive unit tests for WorkflowRunService class. + +This test suite covers all pause state management operations including: +- Retrieving pause state for workflow runs +- Saving pause state with file uploads +- Marking paused workflows as resumed +- Error handling and edge cases +- Database transaction management +- Repository-based approach testing +""" + +from datetime import datetime +from unittest.mock import MagicMock, create_autospec, patch + +import pytest +from sqlalchemy import Engine +from sqlalchemy.orm import Session, sessionmaker + +from core.workflow.enums import WorkflowExecutionStatus +from repositories.api_workflow_run_repository import APIWorkflowRunRepository +from repositories.sqlalchemy_api_workflow_run_repository import _PrivateWorkflowPauseEntity +from services.workflow_run_service import ( + WorkflowRunService, +) + + +class TestDataFactory: + """Factory class for creating test data objects.""" + + @staticmethod + def create_workflow_run_mock( + id: str = "workflow-run-123", + tenant_id: str = "tenant-456", + app_id: str = "app-789", + workflow_id: str = "workflow-101", + status: str | WorkflowExecutionStatus = "paused", + pause_id: str | None = None, + **kwargs, + ) -> MagicMock: + """Create a mock WorkflowRun object.""" + mock_run = MagicMock() + mock_run.id = id + mock_run.tenant_id = tenant_id + mock_run.app_id = app_id + mock_run.workflow_id = workflow_id + mock_run.status = status + mock_run.pause_id = pause_id + + for key, value in kwargs.items(): + setattr(mock_run, key, value) + + return mock_run + + @staticmethod + def create_workflow_pause_mock( + id: str = "pause-123", + tenant_id: str = "tenant-456", + app_id: str = "app-789", + workflow_id: str = "workflow-101", + workflow_execution_id: str = "workflow-execution-123", + state_file_id: str = "file-456", + resumed_at: datetime | None = None, + **kwargs, + ) -> MagicMock: + """Create a mock WorkflowPauseModel object.""" + mock_pause = MagicMock() + mock_pause.id = id + mock_pause.tenant_id = tenant_id + mock_pause.app_id = app_id + mock_pause.workflow_id = workflow_id + mock_pause.workflow_execution_id = workflow_execution_id + mock_pause.state_file_id = state_file_id + mock_pause.resumed_at = resumed_at + + for key, value in kwargs.items(): + setattr(mock_pause, key, value) + + return mock_pause + + @staticmethod + def create_upload_file_mock( + id: str = "file-456", + key: str = "upload_files/test/state.json", + name: str = "state.json", + tenant_id: str = "tenant-456", + **kwargs, + ) -> MagicMock: + """Create a mock UploadFile object.""" + mock_file = MagicMock() + mock_file.id = id + mock_file.key = key + mock_file.name = name + mock_file.tenant_id = tenant_id + + for key, value in kwargs.items(): + setattr(mock_file, key, value) + + return mock_file + + @staticmethod + def create_pause_entity_mock( + pause_model: MagicMock | None = None, + upload_file: MagicMock | None = None, + ) -> _PrivateWorkflowPauseEntity: + """Create a mock _PrivateWorkflowPauseEntity object.""" + if pause_model is None: + pause_model = TestDataFactory.create_workflow_pause_mock() + if upload_file is None: + upload_file = TestDataFactory.create_upload_file_mock() + + return _PrivateWorkflowPauseEntity.from_models(pause_model, upload_file) + + +class TestWorkflowRunService: + """Comprehensive unit tests for WorkflowRunService class.""" + + @pytest.fixture + def mock_session_factory(self): + """Create a mock session factory with proper session management.""" + mock_session = create_autospec(Session) + + # Create a mock context manager for the session + mock_session_cm = MagicMock() + mock_session_cm.__enter__ = MagicMock(return_value=mock_session) + mock_session_cm.__exit__ = MagicMock(return_value=None) + + # Create a mock context manager for the transaction + mock_transaction_cm = MagicMock() + mock_transaction_cm.__enter__ = MagicMock(return_value=mock_session) + mock_transaction_cm.__exit__ = MagicMock(return_value=None) + + mock_session.begin = MagicMock(return_value=mock_transaction_cm) + + # Create mock factory that returns the context manager + mock_factory = MagicMock(spec=sessionmaker) + mock_factory.return_value = mock_session_cm + + return mock_factory, mock_session + + @pytest.fixture + def mock_workflow_run_repository(self): + """Create a mock APIWorkflowRunRepository.""" + mock_repo = create_autospec(APIWorkflowRunRepository) + return mock_repo + + @pytest.fixture + def workflow_run_service(self, mock_session_factory, mock_workflow_run_repository): + """Create WorkflowRunService instance with mocked dependencies.""" + session_factory, _ = mock_session_factory + + with patch("services.workflow_run_service.DifyAPIRepositoryFactory") as mock_factory: + mock_factory.create_api_workflow_run_repository.return_value = mock_workflow_run_repository + service = WorkflowRunService(session_factory) + return service + + @pytest.fixture + def workflow_run_service_with_engine(self, mock_session_factory, mock_workflow_run_repository): + """Create WorkflowRunService instance with Engine input.""" + mock_engine = create_autospec(Engine) + session_factory, _ = mock_session_factory + + with patch("services.workflow_run_service.DifyAPIRepositoryFactory") as mock_factory: + mock_factory.create_api_workflow_run_repository.return_value = mock_workflow_run_repository + service = WorkflowRunService(mock_engine) + return service + + # ==================== Initialization Tests ==================== + + def test_init_with_session_factory(self, mock_session_factory, mock_workflow_run_repository): + """Test WorkflowRunService initialization with session_factory.""" + session_factory, _ = mock_session_factory + + with patch("services.workflow_run_service.DifyAPIRepositoryFactory") as mock_factory: + mock_factory.create_api_workflow_run_repository.return_value = mock_workflow_run_repository + service = WorkflowRunService(session_factory) + + assert service._session_factory == session_factory + mock_factory.create_api_workflow_run_repository.assert_called_once_with(session_factory) + + def test_init_with_engine(self, mock_session_factory, mock_workflow_run_repository): + """Test WorkflowRunService initialization with Engine (should convert to sessionmaker).""" + mock_engine = create_autospec(Engine) + session_factory, _ = mock_session_factory + + with patch("services.workflow_run_service.DifyAPIRepositoryFactory") as mock_factory: + mock_factory.create_api_workflow_run_repository.return_value = mock_workflow_run_repository + with patch("services.workflow_run_service.sessionmaker", return_value=session_factory) as mock_sessionmaker: + service = WorkflowRunService(mock_engine) + + mock_sessionmaker.assert_called_once_with(bind=mock_engine, expire_on_commit=False) + assert service._session_factory == session_factory + mock_factory.create_api_workflow_run_repository.assert_called_once_with(session_factory) + + def test_init_with_default_dependencies(self, mock_session_factory): + """Test WorkflowRunService initialization with default dependencies.""" + session_factory, _ = mock_session_factory + + service = WorkflowRunService(session_factory) + + assert service._session_factory == session_factory diff --git a/api/tests/unit_tests/services/tools/test_mcp_tools_transform.py b/api/tests/unit_tests/services/tools/test_mcp_tools_transform.py index fb0139932b..7511fd6f0c 100644 --- a/api/tests/unit_tests/services/tools/test_mcp_tools_transform.py +++ b/api/tests/unit_tests/services/tools/test_mcp_tools_transform.py @@ -180,6 +180,25 @@ class TestMCPToolTransform: # Set tools data with null description mock_provider_full.tools = '[{"name": "tool1", "description": null, "inputSchema": {}}]' + # Mock the to_entity and to_api_response methods + mock_entity = Mock() + mock_entity.to_api_response.return_value = { + "name": "Test MCP Provider", + "type": ToolProviderType.MCP, + "is_team_authorization": True, + "server_url": "https://*****.com/mcp", + "provider_icon": "icon.png", + "masked_headers": {"Authorization": "Bearer *****"}, + "updated_at": 1234567890, + "labels": [], + "author": "Test User", + "description": I18nObject(en_US="Test MCP Provider Description", zh_Hans="Test MCP Provider Description"), + "icon": "icon.png", + "label": I18nObject(en_US="Test MCP Provider", zh_Hans="Test MCP Provider"), + "masked_credentials": {}, + } + mock_provider_full.to_entity.return_value = mock_entity + # Call the method with for_list=True result = ToolTransformService.mcp_provider_to_user_provider(mock_provider_full, for_list=True) @@ -198,6 +217,27 @@ class TestMCPToolTransform: # Set tools data with description mock_provider_full.tools = '[{"name": "tool1", "description": "Tool description", "inputSchema": {}}]' + # Mock the to_entity and to_api_response methods + mock_entity = Mock() + mock_entity.to_api_response.return_value = { + "name": "Test MCP Provider", + "type": ToolProviderType.MCP, + "is_team_authorization": True, + "server_url": "https://*****.com/mcp", + "provider_icon": "icon.png", + "masked_headers": {"Authorization": "Bearer *****"}, + "updated_at": 1234567890, + "labels": [], + "configuration": {"timeout": "30", "sse_read_timeout": "300"}, + "original_headers": {"Authorization": "Bearer secret-token"}, + "author": "Test User", + "description": I18nObject(en_US="Test MCP Provider Description", zh_Hans="Test MCP Provider Description"), + "icon": "icon.png", + "label": I18nObject(en_US="Test MCP Provider", zh_Hans="Test MCP Provider"), + "masked_credentials": {}, + } + mock_provider_full.to_entity.return_value = mock_entity + # Call the method with for_list=False result = ToolTransformService.mcp_provider_to_user_provider(mock_provider_full, for_list=False) @@ -205,8 +245,9 @@ class TestMCPToolTransform: assert isinstance(result, ToolProviderApiEntity) assert result.id == "server-identifier-456" # Should use server_identifier when for_list=False assert result.server_identifier == "server-identifier-456" - assert result.timeout == 30 - assert result.sse_read_timeout == 300 + assert result.configuration is not None + assert result.configuration.timeout == 30 + assert result.configuration.sse_read_timeout == 300 assert result.original_headers == {"Authorization": "Bearer secret-token"} assert len(result.tools) == 1 assert result.tools[0].description.en_US == "Tool description" diff --git a/api/tests/unit_tests/services/workflow/test_workflow_converter.py b/api/tests/unit_tests/services/workflow/test_workflow_converter.py index 63ce4c0c3c..267c0a85a7 100644 --- a/api/tests/unit_tests/services/workflow/test_workflow_converter.py +++ b/api/tests/unit_tests/services/workflow/test_workflow_converter.py @@ -70,12 +70,13 @@ def test__convert_to_http_request_node_for_chatbot(default_variables): api_based_extension_id = "api_based_extension_id" mock_api_based_extension = APIBasedExtension( - id=api_based_extension_id, + tenant_id="tenant_id", name="api-1", api_key="encrypted_api_key", api_endpoint="https://dify.ai", ) + mock_api_based_extension.id = api_based_extension_id workflow_converter = WorkflowConverter() workflow_converter._get_api_based_extension = MagicMock(return_value=mock_api_based_extension) @@ -131,11 +132,12 @@ def test__convert_to_http_request_node_for_workflow_app(default_variables): api_based_extension_id = "api_based_extension_id" mock_api_based_extension = APIBasedExtension( - id=api_based_extension_id, + tenant_id="tenant_id", name="api-1", api_key="encrypted_api_key", api_endpoint="https://dify.ai", ) + mock_api_based_extension.id = api_based_extension_id workflow_converter = WorkflowConverter() workflow_converter._get_api_based_extension = MagicMock(return_value=mock_api_based_extension) @@ -199,6 +201,7 @@ def test__convert_to_knowledge_retrieval_node_for_chatbot(): node = WorkflowConverter()._convert_to_knowledge_retrieval_node( new_app_mode=new_app_mode, dataset_config=dataset_config, model_config=model_config ) + assert node is not None assert node["data"]["type"] == "knowledge-retrieval" assert node["data"]["query_variable_selector"] == ["sys", "query"] @@ -231,6 +234,7 @@ def test__convert_to_knowledge_retrieval_node_for_workflow_app(): node = WorkflowConverter()._convert_to_knowledge_retrieval_node( new_app_mode=new_app_mode, dataset_config=dataset_config, model_config=model_config ) + assert node is not None assert node["data"]["type"] == "knowledge-retrieval" assert node["data"]["query_variable_selector"] == ["start", dataset_config.retrieve_config.query_variable] @@ -279,6 +283,7 @@ def test__convert_to_llm_node_for_chatbot_simple_chat_model(default_variables): assert llm_node["data"]["model"]["name"] == model assert llm_node["data"]["model"]["mode"] == model_mode.value template = prompt_template.simple_prompt_template + assert template is not None for v in default_variables: template = template.replace("{{" + v.variable + "}}", "{{#start." + v.variable + "#}}") assert llm_node["data"]["prompt_template"][0]["text"] == template + "\n" @@ -321,6 +326,7 @@ def test__convert_to_llm_node_for_chatbot_simple_completion_model(default_variab assert llm_node["data"]["model"]["name"] == model assert llm_node["data"]["model"]["mode"] == model_mode.value template = prompt_template.simple_prompt_template + assert template is not None for v in default_variables: template = template.replace("{{" + v.variable + "}}", "{{#start." + v.variable + "#}}") assert llm_node["data"]["prompt_template"]["text"] == template + "\n" @@ -372,6 +378,7 @@ def test__convert_to_llm_node_for_chatbot_advanced_chat_model(default_variables) assert llm_node["data"]["model"]["name"] == model assert llm_node["data"]["model"]["mode"] == model_mode.value assert isinstance(llm_node["data"]["prompt_template"], list) + assert prompt_template.advanced_chat_prompt_template is not None assert len(llm_node["data"]["prompt_template"]) == len(prompt_template.advanced_chat_prompt_template.messages) template = prompt_template.advanced_chat_prompt_template.messages[0].text for v in default_variables: @@ -418,6 +425,7 @@ def test__convert_to_llm_node_for_workflow_advanced_completion_model(default_var assert llm_node["data"]["model"]["name"] == model assert llm_node["data"]["model"]["mode"] == model_mode.value assert isinstance(llm_node["data"]["prompt_template"], dict) + assert prompt_template.advanced_completion_prompt_template is not None template = prompt_template.advanced_completion_prompt_template.prompt for v in default_variables: template = template.replace("{{" + v.variable + "}}", "{{#start." + v.variable + "#}}") diff --git a/api/tests/unit_tests/tasks/test_async_workflow_tasks.py b/api/tests/unit_tests/tasks/test_async_workflow_tasks.py new file mode 100644 index 0000000000..0920f1482c --- /dev/null +++ b/api/tests/unit_tests/tasks/test_async_workflow_tasks.py @@ -0,0 +1,18 @@ +from core.app.apps.workflow.app_generator import SKIP_PREPARE_USER_INPUTS_KEY +from services.workflow.entities import WebhookTriggerData +from tasks import async_workflow_tasks + + +def test_build_generator_args_sets_skip_flag_for_webhook(): + trigger_data = WebhookTriggerData( + app_id="app", + tenant_id="tenant", + workflow_id="workflow", + root_node_id="node", + inputs={"webhook_data": {"body": {"foo": "bar"}}}, + ) + + args = async_workflow_tasks._build_generator_args(trigger_data) + + assert args[SKIP_PREPARE_USER_INPUTS_KEY] is True + assert args["inputs"]["webhook_data"]["body"]["foo"] == "bar" diff --git a/api/uv.lock b/api/uv.lock index 3e758aae91..6300adae61 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -48,7 +48,7 @@ wheels = [ [[package]] name = "aiohttp" -version = "3.13.1" +version = "3.13.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohappyeyeballs" }, @@ -59,54 +59,54 @@ dependencies = [ { name = "propcache" }, { name = "yarl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ba/fa/3ae643cd525cf6844d3dc810481e5748107368eb49563c15a5fb9f680750/aiohttp-3.13.1.tar.gz", hash = "sha256:4b7ee9c355015813a6aa085170b96ec22315dabc3d866fd77d147927000e9464", size = 7835344, upload-time = "2025-10-17T14:03:29.337Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/ce/3b83ebba6b3207a7135e5fcaba49706f8a4b6008153b4e30540c982fae26/aiohttp-3.13.2.tar.gz", hash = "sha256:40176a52c186aefef6eb3cad2cdd30cd06e3afbe88fe8ab2af9c0b90f228daca", size = 7837994, upload-time = "2025-10-28T20:59:39.937Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/be/2c/739d03730ffce57d2093e2e611e1541ac9a4b3bb88288c33275058b9ffc2/aiohttp-3.13.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9eefa0a891e85dca56e2d00760945a6325bd76341ec386d3ad4ff72eb97b7e64", size = 742004, upload-time = "2025-10-17T13:59:29.73Z" }, - { url = "https://files.pythonhosted.org/packages/fc/f8/7f5b7f7184d7c80e421dbaecbd13e0b2a0bb8663fd0406864f9a167a438c/aiohttp-3.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c20eb646371a5a57a97de67e52aac6c47badb1564e719b3601bbb557a2e8fd0", size = 495601, upload-time = "2025-10-17T13:59:31.312Z" }, - { url = "https://files.pythonhosted.org/packages/3e/af/fb78d028b9642dd33ff127d9a6a151586f33daff631b05250fecd0ab23f8/aiohttp-3.13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bfc28038cd86fb1deed5cc75c8fda45c6b0f5c51dfd76f8c63d3d22dc1ab3d1b", size = 491790, upload-time = "2025-10-17T13:59:33.304Z" }, - { url = "https://files.pythonhosted.org/packages/1e/ae/e40e422ee995e4f91f7f087b86304e3dd622d3a5b9ca902a1e94ebf9a117/aiohttp-3.13.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b22eeffca2e522451990c31a36fe0e71079e6112159f39a4391f1c1e259a795", size = 1746350, upload-time = "2025-10-17T13:59:35.158Z" }, - { url = "https://files.pythonhosted.org/packages/28/a5/fe6022bb869bf2d2633b155ed8348d76358c22d5ff9692a15016b2d1019f/aiohttp-3.13.1-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:65782b2977c05ebd78787e3c834abe499313bf69d6b8be4ff9c340901ee7541f", size = 1703046, upload-time = "2025-10-17T13:59:37.077Z" }, - { url = "https://files.pythonhosted.org/packages/5a/a5/c4ef3617d7cdc49f2d5af077f19794946f0f2d94b93c631ace79047361a2/aiohttp-3.13.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dacba54f9be3702eb866b0b9966754b475e1e39996e29e442c3cd7f1117b43a9", size = 1806161, upload-time = "2025-10-17T13:59:38.837Z" }, - { url = "https://files.pythonhosted.org/packages/ad/45/b87d2430aee7e7d00b24e3dff2c5bd69f21017f6edb19cfd91e514664fc8/aiohttp-3.13.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:aa878da718e8235302c365e376b768035add36b55177706d784a122cb822a6a4", size = 1894546, upload-time = "2025-10-17T13:59:40.741Z" }, - { url = "https://files.pythonhosted.org/packages/e8/a2/79eb466786a7f11a0292c353a8a9b95e88268c48c389239d7531d66dbb48/aiohttp-3.13.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e4b4e607fbd4964d65945a7b9d1e7f98b0d5545736ea613f77d5a2a37ff1e46", size = 1745683, upload-time = "2025-10-17T13:59:42.59Z" }, - { url = "https://files.pythonhosted.org/packages/93/1a/153b0ad694f377e94eacc85338efe03ed4776a396c8bb47bd9227135792a/aiohttp-3.13.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0c3db2d0e5477ad561bf7ba978c3ae5f8f78afda70daa05020179f759578754f", size = 1605418, upload-time = "2025-10-17T13:59:45.229Z" }, - { url = "https://files.pythonhosted.org/packages/3f/4e/18605b1bfeb4b00d3396d833647cdb213118e2a96862e5aebee62ad065b4/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9739d34506fdf59bf2c092560d502aa728b8cdb33f34ba15fb5e2852c35dd829", size = 1722379, upload-time = "2025-10-17T13:59:46.969Z" }, - { url = "https://files.pythonhosted.org/packages/72/13/0a38ad385d547fb283e0e1fe1ff1dff8899bd4ed0aaceeb13ec14abbf136/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:b902e30a268a85d50197b4997edc6e78842c14c0703450f632c2d82f17577845", size = 1716693, upload-time = "2025-10-17T13:59:49.217Z" }, - { url = "https://files.pythonhosted.org/packages/55/65/7029d7573ab9009adde380052c6130d02c8db52195fda112db35e914fe7b/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1bbfc04c8de7def6504cce0a97f9885a5c805fd2395a0634bc10f9d6ecb42524", size = 1784174, upload-time = "2025-10-17T13:59:51.439Z" }, - { url = "https://files.pythonhosted.org/packages/2d/36/fd46e39cb85418e45b0e4a8bfc39651ee0b8f08ea006adf217a221cdb269/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:6941853405a38a5eeb7d9776db77698df373ff7fa8c765cb81ea14a344fccbeb", size = 1593716, upload-time = "2025-10-17T13:59:53.367Z" }, - { url = "https://files.pythonhosted.org/packages/85/b8/188e0cb1be37b4408373171070fda17c3bf9c67c0d3d4fd5ee5b1fa108e1/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:7764adcd2dc8bd21c8228a53dda2005428498dc4d165f41b6086f0ac1c65b1c9", size = 1799254, upload-time = "2025-10-17T13:59:55.352Z" }, - { url = "https://files.pythonhosted.org/packages/67/ff/fdf768764eb427b0cc9ebb2cebddf990f94d98b430679f8383c35aa114be/aiohttp-3.13.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c09e08d38586fa59e5a2f9626505a0326fadb8e9c45550f029feeb92097a0afc", size = 1738122, upload-time = "2025-10-17T13:59:57.263Z" }, - { url = "https://files.pythonhosted.org/packages/94/84/fce7a4d575943394d7c0e632273838eb6f39de8edf25386017bf5f0de23b/aiohttp-3.13.1-cp311-cp311-win32.whl", hash = "sha256:ce1371675e74f6cf271d0b5530defb44cce713fd0ab733713562b3a2b870815c", size = 430491, upload-time = "2025-10-17T13:59:59.466Z" }, - { url = "https://files.pythonhosted.org/packages/ac/d2/d21b8ab6315a5d588c550ab285b4f02ae363edf012920e597904c5a56608/aiohttp-3.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:77a2f5cc28cf4704cc157be135c6a6cfb38c9dea478004f1c0fd7449cf445c28", size = 454808, upload-time = "2025-10-17T14:00:01.247Z" }, - { url = "https://files.pythonhosted.org/packages/1a/72/d463a10bf29871f6e3f63bcf3c91362dc4d72ed5917a8271f96672c415ad/aiohttp-3.13.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0760bd9a28efe188d77b7c3fe666e6ef74320d0f5b105f2e931c7a7e884c8230", size = 736218, upload-time = "2025-10-17T14:00:03.51Z" }, - { url = "https://files.pythonhosted.org/packages/26/13/f7bccedbe52ea5a6eef1e4ebb686a8d7765319dfd0a5939f4238cb6e79e6/aiohttp-3.13.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7129a424b441c3fe018a414401bf1b9e1d49492445f5676a3aecf4f74f67fcdb", size = 491251, upload-time = "2025-10-17T14:00:05.756Z" }, - { url = "https://files.pythonhosted.org/packages/0c/7c/7ea51b5aed6cc69c873f62548da8345032aa3416336f2d26869d4d37b4a2/aiohttp-3.13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e1cb04ae64a594f6ddf5cbb024aba6b4773895ab6ecbc579d60414f8115e9e26", size = 490394, upload-time = "2025-10-17T14:00:07.504Z" }, - { url = "https://files.pythonhosted.org/packages/31/05/1172cc4af4557f6522efdee6eb2b9f900e1e320a97e25dffd3c5a6af651b/aiohttp-3.13.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:782d656a641e755decd6bd98d61d2a8ea062fd45fd3ff8d4173605dd0d2b56a1", size = 1737455, upload-time = "2025-10-17T14:00:09.403Z" }, - { url = "https://files.pythonhosted.org/packages/24/3d/ce6e4eca42f797d6b1cd3053cf3b0a22032eef3e4d1e71b9e93c92a3f201/aiohttp-3.13.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f92ad8169767429a6d2237331726c03ccc5f245222f9373aa045510976af2b35", size = 1699176, upload-time = "2025-10-17T14:00:11.314Z" }, - { url = "https://files.pythonhosted.org/packages/25/04/7127ba55653e04da51477372566b16ae786ef854e06222a1c96b4ba6c8ef/aiohttp-3.13.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0e778f634ca50ec005eefa2253856921c429581422d887be050f2c1c92e5ce12", size = 1767216, upload-time = "2025-10-17T14:00:13.668Z" }, - { url = "https://files.pythonhosted.org/packages/b8/3b/43bca1e75847e600f40df829a6b2f0f4e1d4c70fb6c4818fdc09a462afd5/aiohttp-3.13.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:9bc36b41cf4aab5d3b34d22934a696ab83516603d1bc1f3e4ff9930fe7d245e5", size = 1865870, upload-time = "2025-10-17T14:00:15.852Z" }, - { url = "https://files.pythonhosted.org/packages/9e/69/b204e5d43384197a614c88c1717c324319f5b4e7d0a1b5118da583028d40/aiohttp-3.13.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3fd4570ea696aee27204dd524f287127ed0966d14d309dc8cc440f474e3e7dbd", size = 1751021, upload-time = "2025-10-17T14:00:18.297Z" }, - { url = "https://files.pythonhosted.org/packages/1c/af/845dc6b6fdf378791d720364bf5150f80d22c990f7e3a42331d93b337cc7/aiohttp-3.13.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7bda795f08b8a620836ebfb0926f7973972a4bf8c74fdf9145e489f88c416811", size = 1561448, upload-time = "2025-10-17T14:00:20.152Z" }, - { url = "https://files.pythonhosted.org/packages/7a/91/d2ab08cd77ed76a49e4106b1cfb60bce2768242dd0c4f9ec0cb01e2cbf94/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:055a51d90e351aae53dcf324d0eafb2abe5b576d3ea1ec03827d920cf81a1c15", size = 1698196, upload-time = "2025-10-17T14:00:22.131Z" }, - { url = "https://files.pythonhosted.org/packages/5e/d1/082f0620dc428ecb8f21c08a191a4694915cd50f14791c74a24d9161cc50/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:d4131df864cbcc09bb16d3612a682af0db52f10736e71312574d90f16406a867", size = 1719252, upload-time = "2025-10-17T14:00:24.453Z" }, - { url = "https://files.pythonhosted.org/packages/fc/78/2af2f44491be7b08e43945b72d2b4fd76f0a14ba850ba9e41d28a7ce716a/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:163d3226e043f79bf47c87f8dfc89c496cc7bc9128cb7055ce026e435d551720", size = 1736529, upload-time = "2025-10-17T14:00:26.567Z" }, - { url = "https://files.pythonhosted.org/packages/b0/34/3e919ecdc93edaea8d140138049a0d9126141072e519535e2efa38eb7a02/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:a2370986a3b75c1a5f3d6f6d763fc6be4b430226577b0ed16a7c13a75bf43d8f", size = 1553723, upload-time = "2025-10-17T14:00:28.592Z" }, - { url = "https://files.pythonhosted.org/packages/21/4b/d8003aeda2f67f359b37e70a5a4b53fee336d8e89511ac307ff62aeefcdb/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d7c14de0c7c9f1e6e785ce6cbe0ed817282c2af0012e674f45b4e58c6d4ea030", size = 1763394, upload-time = "2025-10-17T14:00:31.051Z" }, - { url = "https://files.pythonhosted.org/packages/4c/7b/1dbe6a39e33af9baaafc3fc016a280663684af47ba9f0e5d44249c1f72ec/aiohttp-3.13.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb611489cf0db10b99beeb7280bd39e0ef72bc3eb6d8c0f0a16d8a56075d1eb7", size = 1718104, upload-time = "2025-10-17T14:00:33.407Z" }, - { url = "https://files.pythonhosted.org/packages/5c/88/bd1b38687257cce67681b9b0fa0b16437be03383fa1be4d1a45b168bef25/aiohttp-3.13.1-cp312-cp312-win32.whl", hash = "sha256:f90fe0ee75590f7428f7c8b5479389d985d83c949ea10f662ab928a5ed5cf5e6", size = 425303, upload-time = "2025-10-17T14:00:35.829Z" }, - { url = "https://files.pythonhosted.org/packages/0e/e3/4481f50dd6f27e9e58c19a60cff44029641640237e35d32b04aaee8cf95f/aiohttp-3.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:3461919a9dca272c183055f2aab8e6af0adc810a1b386cce28da11eb00c859d9", size = 452071, upload-time = "2025-10-17T14:00:37.764Z" }, + { url = "https://files.pythonhosted.org/packages/35/74/b321e7d7ca762638cdf8cdeceb39755d9c745aff7a64c8789be96ddf6e96/aiohttp-3.13.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4647d02df098f6434bafd7f32ad14942f05a9caa06c7016fdcc816f343997dd0", size = 743409, upload-time = "2025-10-28T20:56:00.354Z" }, + { url = "https://files.pythonhosted.org/packages/99/3d/91524b905ec473beaf35158d17f82ef5a38033e5809fe8742e3657cdbb97/aiohttp-3.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e3403f24bcb9c3b29113611c3c16a2a447c3953ecf86b79775e7be06f7ae7ccb", size = 497006, upload-time = "2025-10-28T20:56:01.85Z" }, + { url = "https://files.pythonhosted.org/packages/eb/d3/7f68bc02a67716fe80f063e19adbd80a642e30682ce74071269e17d2dba1/aiohttp-3.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:43dff14e35aba17e3d6d5ba628858fb8cb51e30f44724a2d2f0c75be492c55e9", size = 493195, upload-time = "2025-10-28T20:56:03.314Z" }, + { url = "https://files.pythonhosted.org/packages/98/31/913f774a4708775433b7375c4f867d58ba58ead833af96c8af3621a0d243/aiohttp-3.13.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2a9ea08e8c58bb17655630198833109227dea914cd20be660f52215f6de5613", size = 1747759, upload-time = "2025-10-28T20:56:04.904Z" }, + { url = "https://files.pythonhosted.org/packages/e8/63/04efe156f4326f31c7c4a97144f82132c3bb21859b7bb84748d452ccc17c/aiohttp-3.13.2-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53b07472f235eb80e826ad038c9d106c2f653584753f3ddab907c83f49eedead", size = 1704456, upload-time = "2025-10-28T20:56:06.986Z" }, + { url = "https://files.pythonhosted.org/packages/8e/02/4e16154d8e0a9cf4ae76f692941fd52543bbb148f02f098ca73cab9b1c1b/aiohttp-3.13.2-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e736c93e9c274fce6419af4aac199984d866e55f8a4cec9114671d0ea9688780", size = 1807572, upload-time = "2025-10-28T20:56:08.558Z" }, + { url = "https://files.pythonhosted.org/packages/34/58/b0583defb38689e7f06798f0285b1ffb3a6fb371f38363ce5fd772112724/aiohttp-3.13.2-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ff5e771f5dcbc81c64898c597a434f7682f2259e0cd666932a913d53d1341d1a", size = 1895954, upload-time = "2025-10-28T20:56:10.545Z" }, + { url = "https://files.pythonhosted.org/packages/6b/f3/083907ee3437425b4e376aa58b2c915eb1a33703ec0dc30040f7ae3368c6/aiohttp-3.13.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3b6fb0c207cc661fa0bf8c66d8d9b657331ccc814f4719468af61034b478592", size = 1747092, upload-time = "2025-10-28T20:56:12.118Z" }, + { url = "https://files.pythonhosted.org/packages/ac/61/98a47319b4e425cc134e05e5f3fc512bf9a04bf65aafd9fdcda5d57ec693/aiohttp-3.13.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:97a0895a8e840ab3520e2288db7cace3a1981300d48babeb50e7425609e2e0ab", size = 1606815, upload-time = "2025-10-28T20:56:14.191Z" }, + { url = "https://files.pythonhosted.org/packages/97/4b/e78b854d82f66bb974189135d31fce265dee0f5344f64dd0d345158a5973/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9e8f8afb552297aca127c90cb840e9a1d4bfd6a10d7d8f2d9176e1acc69bad30", size = 1723789, upload-time = "2025-10-28T20:56:16.101Z" }, + { url = "https://files.pythonhosted.org/packages/ed/fc/9d2ccc794fc9b9acd1379d625c3a8c64a45508b5091c546dea273a41929e/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:ed2f9c7216e53c3df02264f25d824b079cc5914f9e2deba94155190ef648ee40", size = 1718104, upload-time = "2025-10-28T20:56:17.655Z" }, + { url = "https://files.pythonhosted.org/packages/66/65/34564b8765ea5c7d79d23c9113135d1dd3609173da13084830f1507d56cf/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:99c5280a329d5fa18ef30fd10c793a190d996567667908bef8a7f81f8202b948", size = 1785584, upload-time = "2025-10-28T20:56:19.238Z" }, + { url = "https://files.pythonhosted.org/packages/30/be/f6a7a426e02fc82781afd62016417b3948e2207426d90a0e478790d1c8a4/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ca6ffef405fc9c09a746cb5d019c1672cd7f402542e379afc66b370833170cf", size = 1595126, upload-time = "2025-10-28T20:56:20.836Z" }, + { url = "https://files.pythonhosted.org/packages/e5/c7/8e22d5d28f94f67d2af496f14a83b3c155d915d1fe53d94b66d425ec5b42/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:47f438b1a28e926c37632bff3c44df7d27c9b57aaf4e34b1def3c07111fdb782", size = 1800665, upload-time = "2025-10-28T20:56:22.922Z" }, + { url = "https://files.pythonhosted.org/packages/d1/11/91133c8b68b1da9fc16555706aa7276fdf781ae2bb0876c838dd86b8116e/aiohttp-3.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9acda8604a57bb60544e4646a4615c1866ee6c04a8edef9b8ee6fd1d8fa2ddc8", size = 1739532, upload-time = "2025-10-28T20:56:25.924Z" }, + { url = "https://files.pythonhosted.org/packages/17/6b/3747644d26a998774b21a616016620293ddefa4d63af6286f389aedac844/aiohttp-3.13.2-cp311-cp311-win32.whl", hash = "sha256:868e195e39b24aaa930b063c08bb0c17924899c16c672a28a65afded9c46c6ec", size = 431876, upload-time = "2025-10-28T20:56:27.524Z" }, + { url = "https://files.pythonhosted.org/packages/c3/63/688462108c1a00eb9f05765331c107f95ae86f6b197b865d29e930b7e462/aiohttp-3.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:7fd19df530c292542636c2a9a85854fab93474396a52f1695e799186bbd7f24c", size = 456205, upload-time = "2025-10-28T20:56:29.062Z" }, + { url = "https://files.pythonhosted.org/packages/29/9b/01f00e9856d0a73260e86dd8ed0c2234a466c5c1712ce1c281548df39777/aiohttp-3.13.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b1e56bab2e12b2b9ed300218c351ee2a3d8c8fdab5b1ec6193e11a817767e47b", size = 737623, upload-time = "2025-10-28T20:56:30.797Z" }, + { url = "https://files.pythonhosted.org/packages/5a/1b/4be39c445e2b2bd0aab4ba736deb649fabf14f6757f405f0c9685019b9e9/aiohttp-3.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:364e25edaabd3d37b1db1f0cbcee8c73c9a3727bfa262b83e5e4cf3489a2a9dc", size = 492664, upload-time = "2025-10-28T20:56:32.708Z" }, + { url = "https://files.pythonhosted.org/packages/28/66/d35dcfea8050e131cdd731dff36434390479b4045a8d0b9d7111b0a968f1/aiohttp-3.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c5c94825f744694c4b8db20b71dba9a257cd2ba8e010a803042123f3a25d50d7", size = 491808, upload-time = "2025-10-28T20:56:34.57Z" }, + { url = "https://files.pythonhosted.org/packages/00/29/8e4609b93e10a853b65f8291e64985de66d4f5848c5637cddc70e98f01f8/aiohttp-3.13.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba2715d842ffa787be87cbfce150d5e88c87a98e0b62e0f5aa489169a393dbbb", size = 1738863, upload-time = "2025-10-28T20:56:36.377Z" }, + { url = "https://files.pythonhosted.org/packages/9d/fa/4ebdf4adcc0def75ced1a0d2d227577cd7b1b85beb7edad85fcc87693c75/aiohttp-3.13.2-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:585542825c4bc662221fb257889e011a5aa00f1ae4d75d1d246a5225289183e3", size = 1700586, upload-time = "2025-10-28T20:56:38.034Z" }, + { url = "https://files.pythonhosted.org/packages/da/04/73f5f02ff348a3558763ff6abe99c223381b0bace05cd4530a0258e52597/aiohttp-3.13.2-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:39d02cb6025fe1aabca329c5632f48c9532a3dabccd859e7e2f110668972331f", size = 1768625, upload-time = "2025-10-28T20:56:39.75Z" }, + { url = "https://files.pythonhosted.org/packages/f8/49/a825b79ffec124317265ca7d2344a86bcffeb960743487cb11988ffb3494/aiohttp-3.13.2-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e67446b19e014d37342f7195f592a2a948141d15a312fe0e700c2fd2f03124f6", size = 1867281, upload-time = "2025-10-28T20:56:41.471Z" }, + { url = "https://files.pythonhosted.org/packages/b9/48/adf56e05f81eac31edcfae45c90928f4ad50ef2e3ea72cb8376162a368f8/aiohttp-3.13.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4356474ad6333e41ccefd39eae869ba15a6c5299c9c01dfdcfdd5c107be4363e", size = 1752431, upload-time = "2025-10-28T20:56:43.162Z" }, + { url = "https://files.pythonhosted.org/packages/30/ab/593855356eead019a74e862f21523db09c27f12fd24af72dbc3555b9bfd9/aiohttp-3.13.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:eeacf451c99b4525f700f078becff32c32ec327b10dcf31306a8a52d78166de7", size = 1562846, upload-time = "2025-10-28T20:56:44.85Z" }, + { url = "https://files.pythonhosted.org/packages/39/0f/9f3d32271aa8dc35036e9668e31870a9d3b9542dd6b3e2c8a30931cb27ae/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8a9b889aeabd7a4e9af0b7f4ab5ad94d42e7ff679aaec6d0db21e3b639ad58d", size = 1699606, upload-time = "2025-10-28T20:56:46.519Z" }, + { url = "https://files.pythonhosted.org/packages/2c/3c/52d2658c5699b6ef7692a3f7128b2d2d4d9775f2a68093f74bca06cf01e1/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:fa89cb11bc71a63b69568d5b8a25c3ca25b6d54c15f907ca1c130d72f320b76b", size = 1720663, upload-time = "2025-10-28T20:56:48.528Z" }, + { url = "https://files.pythonhosted.org/packages/9b/d4/8f8f3ff1fb7fb9e3f04fcad4e89d8a1cd8fc7d05de67e3de5b15b33008ff/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8aa7c807df234f693fed0ecd507192fc97692e61fee5702cdc11155d2e5cadc8", size = 1737939, upload-time = "2025-10-28T20:56:50.77Z" }, + { url = "https://files.pythonhosted.org/packages/03/d3/ddd348f8a27a634daae39a1b8e291ff19c77867af438af844bf8b7e3231b/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:9eb3e33fdbe43f88c3c75fa608c25e7c47bbd80f48d012763cb67c47f39a7e16", size = 1555132, upload-time = "2025-10-28T20:56:52.568Z" }, + { url = "https://files.pythonhosted.org/packages/39/b8/46790692dc46218406f94374903ba47552f2f9f90dad554eed61bfb7b64c/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9434bc0d80076138ea986833156c5a48c9c7a8abb0c96039ddbb4afc93184169", size = 1764802, upload-time = "2025-10-28T20:56:54.292Z" }, + { url = "https://files.pythonhosted.org/packages/ba/e4/19ce547b58ab2a385e5f0b8aa3db38674785085abcf79b6e0edd1632b12f/aiohttp-3.13.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ff15c147b2ad66da1f2cbb0622313f2242d8e6e8f9b79b5206c84523a4473248", size = 1719512, upload-time = "2025-10-28T20:56:56.428Z" }, + { url = "https://files.pythonhosted.org/packages/70/30/6355a737fed29dcb6dfdd48682d5790cb5eab050f7b4e01f49b121d3acad/aiohttp-3.13.2-cp312-cp312-win32.whl", hash = "sha256:27e569eb9d9e95dbd55c0fc3ec3a9335defbf1d8bc1d20171a49f3c4c607b93e", size = 426690, upload-time = "2025-10-28T20:56:58.736Z" }, + { url = "https://files.pythonhosted.org/packages/0a/0d/b10ac09069973d112de6ef980c1f6bb31cb7dcd0bc363acbdad58f927873/aiohttp-3.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:8709a0f05d59a71f33fd05c17fc11fcb8c30140506e13c2f5e8ee1b8964e1b45", size = 453465, upload-time = "2025-10-28T20:57:00.795Z" }, ] [[package]] name = "aiomysql" -version = "0.2.0" +version = "0.3.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pymysql" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/67/76/2c5b55e4406a1957ffdfd933a94c2517455291c97d2b81cec6813754791a/aiomysql-0.2.0.tar.gz", hash = "sha256:558b9c26d580d08b8c5fd1be23c5231ce3aeff2dadad989540fee740253deb67", size = 114706, upload-time = "2023-06-11T19:57:53.608Z" } +sdist = { url = "https://files.pythonhosted.org/packages/29/e0/302aeffe8d90853556f47f3106b89c16cc2ec2a4d269bdfd82e3f4ae12cc/aiomysql-0.3.2.tar.gz", hash = "sha256:72d15ef5cfc34c03468eb41e1b90adb9fd9347b0b589114bd23ead569a02ac1a", size = 108311, upload-time = "2025-10-22T00:15:21.278Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/87/c982ee8b333c85b8ae16306387d703a1fcdfc81a2f3f15a24820ab1a512d/aiomysql-0.2.0-py3-none-any.whl", hash = "sha256:b7c26da0daf23a5ec5e0b133c03d20657276e4eae9b73e040b72787f6f6ade0a", size = 44215, upload-time = "2023-06-11T19:57:51.09Z" }, + { url = "https://files.pythonhosted.org/packages/4c/af/aae0153c3e28712adaf462328f6c7a3c196a1c1c27b491de4377dd3e6b52/aiomysql-0.3.2-py3-none-any.whl", hash = "sha256:c82c5ba04137d7afd5c693a258bea8ead2aad77101668044143a991e04632eb2", size = 71834, upload-time = "2025-10-22T00:15:15.905Z" }, ] [[package]] @@ -124,16 +124,16 @@ wheels = [ [[package]] name = "alembic" -version = "1.17.0" +version = "1.17.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mako" }, { name = "sqlalchemy" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6b/45/6f4555f2039f364c3ce31399529dcf48dd60726ff3715ad67f547d87dfd2/alembic-1.17.0.tar.gz", hash = "sha256:4652a0b3e19616b57d652b82bfa5e38bf5dbea0813eed971612671cb9e90c0fe", size = 1975526, upload-time = "2025-10-11T18:40:13.585Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6e/b6/2a81d7724c0c124edc5ec7a167e85858b6fd31b9611c6fb8ecf617b7e2d3/alembic-1.17.1.tar.gz", hash = "sha256:8a289f6778262df31571d29cca4c7fbacd2f0f582ea0816f4c399b6da7528486", size = 1981285, upload-time = "2025-10-29T00:23:16.667Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/44/1f/38e29b06bfed7818ebba1f84904afdc8153ef7b6c7e0d8f3bc6643f5989c/alembic-1.17.0-py3-none-any.whl", hash = "sha256:80523bc437d41b35c5db7e525ad9d908f79de65c27d6a5a5eab6df348a352d99", size = 247449, upload-time = "2025-10-11T18:40:16.288Z" }, + { url = "https://files.pythonhosted.org/packages/a5/32/7df1d81ec2e50fb661944a35183d87e62d3f6c6d9f8aff64a4f245226d55/alembic-1.17.1-py3-none-any.whl", hash = "sha256:cbc2386e60f89608bb63f30d2d6cc66c7aaed1fe105bd862828600e5ad167023", size = 247848, upload-time = "2025-10-29T00:23:18.79Z" }, ] [[package]] @@ -333,11 +333,11 @@ wheels = [ [[package]] name = "annotated-doc" -version = "0.0.3" +version = "0.0.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d7/a6/dc46877b911e40c00d395771ea710d5e77b6de7bacd5fdcd78d70cc5a48f/annotated_doc-0.0.3.tar.gz", hash = "sha256:e18370014c70187422c33e945053ff4c286f453a984eba84d0dbfa0c935adeda", size = 5535, upload-time = "2025-10-24T14:57:10.718Z" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/02/b7/cf592cb5de5cb3bade3357f8d2cf42bf103bbe39f459824b4939fd212911/annotated_doc-0.0.3-py3-none-any.whl", hash = "sha256:348ec6664a76f1fd3be81f43dffbee4c7e8ce931ba71ec67cc7f4ade7fbbb580", size = 5488, upload-time = "2025-10-24T14:57:09.462Z" }, + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, ] [[package]] @@ -365,14 +365,14 @@ wheels = [ [[package]] name = "apscheduler" -version = "3.11.0" +version = "3.11.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "tzlocal" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4e/00/6d6814ddc19be2df62c8c898c4df6b5b1914f3bd024b780028caa392d186/apscheduler-3.11.0.tar.gz", hash = "sha256:4c622d250b0955a65d5d0eb91c33e6d43fd879834bf541e0a18661ae60460133", size = 107347, upload-time = "2024-11-24T19:39:26.463Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/81/192db4f8471de5bc1f0d098783decffb1e6e69c4f8b4bc6711094691950b/apscheduler-3.11.1.tar.gz", hash = "sha256:0db77af6400c84d1747fe98a04b8b58f0080c77d11d338c4f507a9752880f221", size = 108044, upload-time = "2025-10-31T18:55:42.819Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/ae/9a053dd9229c0fde6b1f1f33f609ccff1ee79ddda364c756a924c6d8563b/APScheduler-3.11.0-py3-none-any.whl", hash = "sha256:fc134ca32e50f5eadcc4938e3a4545ab19131435e851abb40b34d63d5141c6da", size = 64004, upload-time = "2024-11-24T19:39:24.442Z" }, + { url = "https://files.pythonhosted.org/packages/58/9f/d3c76f76c73fcc959d28e9def45b8b1cc3d7722660c5003b19c1022fd7f4/apscheduler-3.11.1-py3-none-any.whl", hash = "sha256:6162cb5683cb09923654fa9bdd3130c4be4bfda6ad8990971c9597ecd52965d2", size = 64278, upload-time = "2025-10-31T18:55:41.186Z" }, ] [[package]] @@ -498,16 +498,16 @@ wheels = [ [[package]] name = "bce-python-sdk" -version = "0.9.46" +version = "0.9.52" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "future" }, { name = "pycryptodome" }, { name = "six" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/53/57/f98bc15c12cc022ef195f689ee57ed61d8a8677bda3089c4d58fb1872d45/bce_python_sdk-0.9.46.tar.gz", hash = "sha256:4bf01b22e6d172ccd94aa201f8bc6f2a98d0da4784160e77cfacfcc71c2686be", size = 253806, upload-time = "2025-09-15T06:51:52.753Z" } +sdist = { url = "https://files.pythonhosted.org/packages/83/0a/e49d7774ce186fd51c611a2533baff8e7db0d22baef12223773f389b06b1/bce_python_sdk-0.9.52.tar.gz", hash = "sha256:dd54213ac25b8b1260fb45f1fbc0f2b1c53bb0f9f594258ca0479f1fc85f7405", size = 275614, upload-time = "2025-11-12T09:09:28.227Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/f5/20e9ab324b22a77970c57bc8267e586e85e2aa1277d80f2c58ca8a39a13e/bce_python_sdk-0.9.46-py3-none-any.whl", hash = "sha256:655074da6592ce8b036f605d9a272bfdcd1f515eb2f8e3f0333bb7cc62f700cb", size = 352622, upload-time = "2025-09-15T06:51:50.811Z" }, + { url = "https://files.pythonhosted.org/packages/a3/d0/f57f75c96e8bb72144845f7208f712a54454f1d063d5ef02f1e9ea476b79/bce_python_sdk-0.9.52-py3-none-any.whl", hash = "sha256:f1ed39aa61c2d4a002cd2345e01dd92ac55c75960440d76163ead419b3b550e7", size = 390401, upload-time = "2025-11-12T09:09:26.663Z" }, ] [[package]] @@ -598,16 +598,16 @@ wheels = [ [[package]] name = "boto3-stubs" -version = "1.40.59" +version = "1.40.72" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore-stubs" }, { name = "types-s3transfer" }, { name = "typing-extensions", marker = "python_full_version < '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d7/36/92108a11eb151036867fcf09ed604dbcc172aa08411b9e752021bd276c7b/boto3_stubs-1.40.59.tar.gz", hash = "sha256:0791851ecfd465c5d76bd316f48c0113811c11c89521a48c5318d8a25b6fe8dd", size = 101012, upload-time = "2025-10-24T19:33:21.876Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4f/db/90881ac0b8afdfa9b95ae66b4094ed33f88b6086a8945229a95156257ca9/boto3_stubs-1.40.72.tar.gz", hash = "sha256:cbcf7b6e8a7f54e77fcb2b8d00041993fe4f76554c716b1d290e48650d569cd0", size = 99406, upload-time = "2025-11-12T20:36:23.685Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/15/01e5409e748f810dd379bc6d632793a68a3cfcd0d8e0e0404ebd34749e8e/boto3_stubs-1.40.59-py3-none-any.whl", hash = "sha256:8f2e623d9960d63ad4231a5991c25c92b145240e6b798d5045f79ff0e2807f8f", size = 69744, upload-time = "2025-10-24T19:33:16.615Z" }, + { url = "https://files.pythonhosted.org/packages/7a/ea/0f2814edc61c2e6fedd9b7a7fbc55149d1ffac7f7cd02d04cc51d1a3b1ca/boto3_stubs-1.40.72-py3-none-any.whl", hash = "sha256:4807f334b87914f75db3c6cd85f7eb706b5777e6ddaf117f8d63219cc01fb4b2", size = 68982, upload-time = "2025-11-12T20:36:12.855Z" }, ] [package.optional-dependencies] @@ -631,14 +631,14 @@ wheels = [ [[package]] name = "botocore-stubs" -version = "1.40.59" +version = "1.40.72" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "types-awscrt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9a/f5/8a2874a9a426e98df3aa68da59467a8b691c3dfc793c7f1433bc3ea8b602/botocore_stubs-1.40.59.tar.gz", hash = "sha256:53611e55b64f7632ef9f7781d896f126926868cb182d181b95b15b28f8d777a8", size = 42219, upload-time = "2025-10-24T20:27:27.393Z" } +sdist = { url = "https://files.pythonhosted.org/packages/51/c9/17d5337cc81f107fd0a6d04b5b20c75bea0fe8b77bcc644de324487f8310/botocore_stubs-1.40.72.tar.gz", hash = "sha256:6d268d0dd9366dc15e7af52cbd0d3a3f3cd14e2191de0e280badc69f8d34708c", size = 42208, upload-time = "2025-11-12T21:23:53.344Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fd/cc/951184f1862153bd15f15753d9340103e11f8f9836c37149a6bd9ccbeb5c/botocore_stubs-1.40.59-py3-none-any.whl", hash = "sha256:0cfbb0b03597cf1f33d4fc3a2f0ec4d92de5773177f858406f86f01f454d09ad", size = 66541, upload-time = "2025-10-24T20:27:25.231Z" }, + { url = "https://files.pythonhosted.org/packages/3c/99/9387b31ec1d980af83ca097366cc10714757d2c1390b4ac6b692c07a9e7f/botocore_stubs-1.40.72-py3-none-any.whl", hash = "sha256:1166a81074714312d3843be3f879d16966cbffdc440ab61ad6f0cd8922fde679", size = 66542, upload-time = "2025-11-12T21:23:51.018Z" }, ] [[package]] @@ -668,44 +668,30 @@ wheels = [ [[package]] name = "brotli" -version = "1.1.0" +version = "1.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2f/c2/f9e977608bdf958650638c3f1e28f85a1b075f075ebbe77db8555463787b/Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724", size = 7372270, upload-time = "2023-09-07T14:05:41.643Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f7/16/c92ca344d646e71a43b8bb353f0a6490d7f6e06210f8554c8f874e454285/brotli-1.2.0.tar.gz", hash = "sha256:e310f77e41941c13340a95976fe66a8a95b01e783d430eeaf7a2f87e0a57dd0a", size = 7388632, upload-time = "2025-11-05T18:39:42.86Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/96/12/ad41e7fadd5db55459c4c401842b47f7fee51068f86dd2894dd0dcfc2d2a/Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc", size = 873068, upload-time = "2023-09-07T14:03:37.779Z" }, - { url = "https://files.pythonhosted.org/packages/95/4e/5afab7b2b4b61a84e9c75b17814198ce515343a44e2ed4488fac314cd0a9/Brotli-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c8146669223164fc87a7e3de9f81e9423c67a79d6b3447994dfb9c95da16e2d6", size = 446244, upload-time = "2023-09-07T14:03:39.223Z" }, - { url = "https://files.pythonhosted.org/packages/9d/e6/f305eb61fb9a8580c525478a4a34c5ae1a9bcb12c3aee619114940bc513d/Brotli-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30924eb4c57903d5a7526b08ef4a584acc22ab1ffa085faceb521521d2de32dd", size = 2906500, upload-time = "2023-09-07T14:03:40.858Z" }, - { url = "https://files.pythonhosted.org/packages/3e/4f/af6846cfbc1550a3024e5d3775ede1e00474c40882c7bf5b37a43ca35e91/Brotli-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ceb64bbc6eac5a140ca649003756940f8d6a7c444a68af170b3187623b43bebf", size = 2943950, upload-time = "2023-09-07T14:03:42.896Z" }, - { url = "https://files.pythonhosted.org/packages/b3/e7/ca2993c7682d8629b62630ebf0d1f3bb3d579e667ce8e7ca03a0a0576a2d/Brotli-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a469274ad18dc0e4d316eefa616d1d0c2ff9da369af19fa6f3daa4f09671fd61", size = 2918527, upload-time = "2023-09-07T14:03:44.552Z" }, - { url = "https://files.pythonhosted.org/packages/b3/96/da98e7bedc4c51104d29cc61e5f449a502dd3dbc211944546a4cc65500d3/Brotli-1.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524f35912131cc2cabb00edfd8d573b07f2d9f21fa824bd3fb19725a9cf06327", size = 2845489, upload-time = "2023-09-07T14:03:46.594Z" }, - { url = "https://files.pythonhosted.org/packages/e8/ef/ccbc16947d6ce943a7f57e1a40596c75859eeb6d279c6994eddd69615265/Brotli-1.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5b3cc074004d968722f51e550b41a27be656ec48f8afaeeb45ebf65b561481dd", size = 2914080, upload-time = "2023-09-07T14:03:48.204Z" }, - { url = "https://files.pythonhosted.org/packages/80/d6/0bd38d758d1afa62a5524172f0b18626bb2392d717ff94806f741fcd5ee9/Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9", size = 2813051, upload-time = "2023-09-07T14:03:50.348Z" }, - { url = "https://files.pythonhosted.org/packages/14/56/48859dd5d129d7519e001f06dcfbb6e2cf6db92b2702c0c2ce7d97e086c1/Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265", size = 2938172, upload-time = "2023-09-07T14:03:52.395Z" }, - { url = "https://files.pythonhosted.org/packages/3d/77/a236d5f8cd9e9f4348da5acc75ab032ab1ab2c03cc8f430d24eea2672888/Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8", size = 2933023, upload-time = "2023-09-07T14:03:53.96Z" }, - { url = "https://files.pythonhosted.org/packages/f1/87/3b283efc0f5cb35f7f84c0c240b1e1a1003a5e47141a4881bf87c86d0ce2/Brotli-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c247dd99d39e0338a604f8c2b3bc7061d5c2e9e2ac7ba9cc1be5a69cb6cd832f", size = 2935871, upload-time = "2024-10-18T12:32:16.688Z" }, - { url = "https://files.pythonhosted.org/packages/f3/eb/2be4cc3e2141dc1a43ad4ca1875a72088229de38c68e842746b342667b2a/Brotli-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1b2c248cd517c222d89e74669a4adfa5577e06ab68771a529060cf5a156e9757", size = 2847784, upload-time = "2024-10-18T12:32:18.459Z" }, - { url = "https://files.pythonhosted.org/packages/66/13/b58ddebfd35edde572ccefe6890cf7c493f0c319aad2a5badee134b4d8ec/Brotli-1.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2a24c50840d89ded6c9a8fdc7b6ed3692ed4e86f1c4a4a938e1e92def92933e0", size = 3034905, upload-time = "2024-10-18T12:32:20.192Z" }, - { url = "https://files.pythonhosted.org/packages/84/9c/bc96b6c7db824998a49ed3b38e441a2cae9234da6fa11f6ed17e8cf4f147/Brotli-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f31859074d57b4639318523d6ffdca586ace54271a73ad23ad021acd807eb14b", size = 2929467, upload-time = "2024-10-18T12:32:21.774Z" }, - { url = "https://files.pythonhosted.org/packages/e7/71/8f161dee223c7ff7fea9d44893fba953ce97cf2c3c33f78ba260a91bcff5/Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50", size = 333169, upload-time = "2023-09-07T14:03:55.404Z" }, - { url = "https://files.pythonhosted.org/packages/02/8a/fece0ee1057643cb2a5bbf59682de13f1725f8482b2c057d4e799d7ade75/Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1", size = 357253, upload-time = "2023-09-07T14:03:56.643Z" }, - { url = "https://files.pythonhosted.org/packages/5c/d0/5373ae13b93fe00095a58efcbce837fd470ca39f703a235d2a999baadfbc/Brotli-1.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:32d95b80260d79926f5fab3c41701dbb818fde1c9da590e77e571eefd14abe28", size = 815693, upload-time = "2024-10-18T12:32:23.824Z" }, - { url = "https://files.pythonhosted.org/packages/8e/48/f6e1cdf86751300c288c1459724bfa6917a80e30dbfc326f92cea5d3683a/Brotli-1.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b760c65308ff1e462f65d69c12e4ae085cff3b332d894637f6273a12a482d09f", size = 422489, upload-time = "2024-10-18T12:32:25.641Z" }, - { url = "https://files.pythonhosted.org/packages/06/88/564958cedce636d0f1bed313381dfc4b4e3d3f6015a63dae6146e1b8c65c/Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409", size = 873081, upload-time = "2023-09-07T14:03:57.967Z" }, - { url = "https://files.pythonhosted.org/packages/58/79/b7026a8bb65da9a6bb7d14329fd2bd48d2b7f86d7329d5cc8ddc6a90526f/Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2", size = 446244, upload-time = "2023-09-07T14:03:59.319Z" }, - { url = "https://files.pythonhosted.org/packages/e5/18/c18c32ecea41b6c0004e15606e274006366fe19436b6adccc1ae7b2e50c2/Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451", size = 2906505, upload-time = "2023-09-07T14:04:01.327Z" }, - { url = "https://files.pythonhosted.org/packages/08/c8/69ec0496b1ada7569b62d85893d928e865df29b90736558d6c98c2031208/Brotli-1.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f4bf76817c14aa98cc6697ac02f3972cb8c3da93e9ef16b9c66573a68014f91", size = 2944152, upload-time = "2023-09-07T14:04:03.033Z" }, - { url = "https://files.pythonhosted.org/packages/ab/fb/0517cea182219d6768113a38167ef6d4eb157a033178cc938033a552ed6d/Brotli-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0c5516f0aed654134a2fc936325cc2e642f8a0e096d075209672eb321cff408", size = 2919252, upload-time = "2023-09-07T14:04:04.675Z" }, - { url = "https://files.pythonhosted.org/packages/c7/53/73a3431662e33ae61a5c80b1b9d2d18f58dfa910ae8dd696e57d39f1a2f5/Brotli-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c3020404e0b5eefd7c9485ccf8393cfb75ec38ce75586e046573c9dc29967a0", size = 2845955, upload-time = "2023-09-07T14:04:06.585Z" }, - { url = "https://files.pythonhosted.org/packages/55/ac/bd280708d9c5ebdbf9de01459e625a3e3803cce0784f47d633562cf40e83/Brotli-1.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ed11165dd45ce798d99a136808a794a748d5dc38511303239d4e2363c0695dc", size = 2914304, upload-time = "2023-09-07T14:04:08.668Z" }, - { url = "https://files.pythonhosted.org/packages/76/58/5c391b41ecfc4527d2cc3350719b02e87cb424ef8ba2023fb662f9bf743c/Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180", size = 2814452, upload-time = "2023-09-07T14:04:10.736Z" }, - { url = "https://files.pythonhosted.org/packages/c7/4e/91b8256dfe99c407f174924b65a01f5305e303f486cc7a2e8a5d43c8bec3/Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248", size = 2938751, upload-time = "2023-09-07T14:04:12.875Z" }, - { url = "https://files.pythonhosted.org/packages/5a/a6/e2a39a5d3b412938362bbbeba5af904092bf3f95b867b4a3eb856104074e/Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966", size = 2933757, upload-time = "2023-09-07T14:04:14.551Z" }, - { url = "https://files.pythonhosted.org/packages/13/f0/358354786280a509482e0e77c1a5459e439766597d280f28cb097642fc26/Brotli-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:87a3044c3a35055527ac75e419dfa9f4f3667a1e887ee80360589eb8c90aabb9", size = 2936146, upload-time = "2024-10-18T12:32:27.257Z" }, - { url = "https://files.pythonhosted.org/packages/80/f7/daf538c1060d3a88266b80ecc1d1c98b79553b3f117a485653f17070ea2a/Brotli-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c5529b34c1c9d937168297f2c1fde7ebe9ebdd5e121297ff9c043bdb2ae3d6fb", size = 2848055, upload-time = "2024-10-18T12:32:29.376Z" }, - { url = "https://files.pythonhosted.org/packages/ad/cf/0eaa0585c4077d3c2d1edf322d8e97aabf317941d3a72d7b3ad8bce004b0/Brotli-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ca63e1890ede90b2e4454f9a65135a4d387a4585ff8282bb72964fab893f2111", size = 3035102, upload-time = "2024-10-18T12:32:31.371Z" }, - { url = "https://files.pythonhosted.org/packages/d8/63/1c1585b2aa554fe6dbce30f0c18bdbc877fa9a1bf5ff17677d9cca0ac122/Brotli-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e79e6520141d792237c70bcd7a3b122d00f2613769ae0cb61c52e89fd3443839", size = 2930029, upload-time = "2024-10-18T12:32:33.293Z" }, - { url = "https://files.pythonhosted.org/packages/5f/3b/4e3fd1893eb3bbfef8e5a80d4508bec17a57bb92d586c85c12d28666bb13/Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0", size = 333276, upload-time = "2023-09-07T14:04:16.49Z" }, - { url = "https://files.pythonhosted.org/packages/3d/d5/942051b45a9e883b5b6e98c041698b1eb2012d25e5948c58d6bf85b1bb43/Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951", size = 357255, upload-time = "2023-09-07T14:04:17.83Z" }, + { url = "https://files.pythonhosted.org/packages/7a/ef/f285668811a9e1ddb47a18cb0b437d5fc2760d537a2fe8a57875ad6f8448/brotli-1.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:15b33fe93cedc4caaff8a0bd1eb7e3dab1c61bb22a0bf5bdfdfd97cd7da79744", size = 863110, upload-time = "2025-11-05T18:38:12.978Z" }, + { url = "https://files.pythonhosted.org/packages/50/62/a3b77593587010c789a9d6eaa527c79e0848b7b860402cc64bc0bc28a86c/brotli-1.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:898be2be399c221d2671d29eed26b6b2713a02c2119168ed914e7d00ceadb56f", size = 445438, upload-time = "2025-11-05T18:38:14.208Z" }, + { url = "https://files.pythonhosted.org/packages/cd/e1/7fadd47f40ce5549dc44493877db40292277db373da5053aff181656e16e/brotli-1.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:350c8348f0e76fff0a0fd6c26755d2653863279d086d3aa2c290a6a7251135dd", size = 1534420, upload-time = "2025-11-05T18:38:15.111Z" }, + { url = "https://files.pythonhosted.org/packages/12/8b/1ed2f64054a5a008a4ccd2f271dbba7a5fb1a3067a99f5ceadedd4c1d5a7/brotli-1.2.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e1ad3fda65ae0d93fec742a128d72e145c9c7a99ee2fcd667785d99eb25a7fe", size = 1632619, upload-time = "2025-11-05T18:38:16.094Z" }, + { url = "https://files.pythonhosted.org/packages/89/5a/7071a621eb2d052d64efd5da2ef55ecdac7c3b0c6e4f9d519e9c66d987ef/brotli-1.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:40d918bce2b427a0c4ba189df7a006ac0c7277c180aee4617d99e9ccaaf59e6a", size = 1426014, upload-time = "2025-11-05T18:38:17.177Z" }, + { url = "https://files.pythonhosted.org/packages/26/6d/0971a8ea435af5156acaaccec1a505f981c9c80227633851f2810abd252a/brotli-1.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2a7f1d03727130fc875448b65b127a9ec5d06d19d0148e7554384229706f9d1b", size = 1489661, upload-time = "2025-11-05T18:38:18.41Z" }, + { url = "https://files.pythonhosted.org/packages/f3/75/c1baca8b4ec6c96a03ef8230fab2a785e35297632f402ebb1e78a1e39116/brotli-1.2.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9c79f57faa25d97900bfb119480806d783fba83cd09ee0b33c17623935b05fa3", size = 1599150, upload-time = "2025-11-05T18:38:19.792Z" }, + { url = "https://files.pythonhosted.org/packages/0d/1a/23fcfee1c324fd48a63d7ebf4bac3a4115bdb1b00e600f80f727d850b1ae/brotli-1.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:844a8ceb8483fefafc412f85c14f2aae2fb69567bf2a0de53cdb88b73e7c43ae", size = 1493505, upload-time = "2025-11-05T18:38:20.913Z" }, + { url = "https://files.pythonhosted.org/packages/36/e5/12904bbd36afeef53d45a84881a4810ae8810ad7e328a971ebbfd760a0b3/brotli-1.2.0-cp311-cp311-win32.whl", hash = "sha256:aa47441fa3026543513139cb8926a92a8e305ee9c71a6209ef7a97d91640ea03", size = 334451, upload-time = "2025-11-05T18:38:21.94Z" }, + { url = "https://files.pythonhosted.org/packages/02/8b/ecb5761b989629a4758c394b9301607a5880de61ee2ee5fe104b87149ebc/brotli-1.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:022426c9e99fd65d9475dce5c195526f04bb8be8907607e27e747893f6ee3e24", size = 369035, upload-time = "2025-11-05T18:38:22.941Z" }, + { url = "https://files.pythonhosted.org/packages/11/ee/b0a11ab2315c69bb9b45a2aaed022499c9c24a205c3a49c3513b541a7967/brotli-1.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:35d382625778834a7f3061b15423919aa03e4f5da34ac8e02c074e4b75ab4f84", size = 861543, upload-time = "2025-11-05T18:38:24.183Z" }, + { url = "https://files.pythonhosted.org/packages/e1/2f/29c1459513cd35828e25531ebfcbf3e92a5e49f560b1777a9af7203eb46e/brotli-1.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7a61c06b334bd99bc5ae84f1eeb36bfe01400264b3c352f968c6e30a10f9d08b", size = 444288, upload-time = "2025-11-05T18:38:25.139Z" }, + { url = "https://files.pythonhosted.org/packages/3d/6f/feba03130d5fceadfa3a1bb102cb14650798c848b1df2a808356f939bb16/brotli-1.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:acec55bb7c90f1dfc476126f9711a8e81c9af7fb617409a9ee2953115343f08d", size = 1528071, upload-time = "2025-11-05T18:38:26.081Z" }, + { url = "https://files.pythonhosted.org/packages/2b/38/f3abb554eee089bd15471057ba85f47e53a44a462cfce265d9bf7088eb09/brotli-1.2.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:260d3692396e1895c5034f204f0db022c056f9e2ac841593a4cf9426e2a3faca", size = 1626913, upload-time = "2025-11-05T18:38:27.284Z" }, + { url = "https://files.pythonhosted.org/packages/03/a7/03aa61fbc3c5cbf99b44d158665f9b0dd3d8059be16c460208d9e385c837/brotli-1.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:072e7624b1fc4d601036ab3f4f27942ef772887e876beff0301d261210bca97f", size = 1419762, upload-time = "2025-11-05T18:38:28.295Z" }, + { url = "https://files.pythonhosted.org/packages/21/1b/0374a89ee27d152a5069c356c96b93afd1b94eae83f1e004b57eb6ce2f10/brotli-1.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adedc4a67e15327dfdd04884873c6d5a01d3e3b6f61406f99b1ed4865a2f6d28", size = 1484494, upload-time = "2025-11-05T18:38:29.29Z" }, + { url = "https://files.pythonhosted.org/packages/cf/57/69d4fe84a67aef4f524dcd075c6eee868d7850e85bf01d778a857d8dbe0a/brotli-1.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7a47ce5c2288702e09dc22a44d0ee6152f2c7eda97b3c8482d826a1f3cfc7da7", size = 1593302, upload-time = "2025-11-05T18:38:30.639Z" }, + { url = "https://files.pythonhosted.org/packages/d5/3b/39e13ce78a8e9a621c5df3aeb5fd181fcc8caba8c48a194cd629771f6828/brotli-1.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:af43b8711a8264bb4e7d6d9a6d004c3a2019c04c01127a868709ec29962b6036", size = 1487913, upload-time = "2025-11-05T18:38:31.618Z" }, + { url = "https://files.pythonhosted.org/packages/62/28/4d00cb9bd76a6357a66fcd54b4b6d70288385584063f4b07884c1e7286ac/brotli-1.2.0-cp312-cp312-win32.whl", hash = "sha256:e99befa0b48f3cd293dafeacdd0d191804d105d279e0b387a32054c1180f3161", size = 334362, upload-time = "2025-11-05T18:38:32.939Z" }, + { url = "https://files.pythonhosted.org/packages/1c/4e/bc1dcac9498859d5e353c9b153627a3752868a9d5f05ce8dedd81a2354ab/brotli-1.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:b35c13ce241abdd44cb8ca70683f20c0c079728a36a996297adb5334adfc1c44", size = 369115, upload-time = "2025-11-05T18:38:33.765Z" }, ] [[package]] @@ -793,11 +779,11 @@ wheels = [ [[package]] name = "certifi" -version = "2025.10.5" +version = "2025.11.12" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519, upload-time = "2025-10-05T04:12:15.808Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" }, + { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" }, ] [[package]] @@ -1052,7 +1038,7 @@ wheels = [ [[package]] name = "clickzetta-connector-python" -version = "0.8.104" +version = "0.8.106" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "future" }, @@ -1066,7 +1052,7 @@ dependencies = [ { name = "urllib3" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/94/c7eee2224bdab39d16dfe5bb7687f5525c7ed345b7fe8812e18a2d9a6335/clickzetta_connector_python-0.8.104-py3-none-any.whl", hash = "sha256:ae3e466d990677f96c769ec1c29318237df80c80fe9c1e21ba1eaf42bdef0207", size = 79382, upload-time = "2025-09-10T08:46:39.731Z" }, + { url = "https://files.pythonhosted.org/packages/23/38/749c708619f402d4d582dfa73fbeb64ade77b1f250a93bd064d2a1aa3776/clickzetta_connector_python-0.8.106-py3-none-any.whl", hash = "sha256:120d6700051d97609dbd6655c002ab3bc260b7c8e67d39dfc7191e749563f7b4", size = 78121, upload-time = "2025-10-29T02:38:15.014Z" }, ] [[package]] @@ -1302,14 +1288,14 @@ wheels = [ [[package]] name = "deprecated" -version = "1.2.18" +version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744, upload-time = "2025-01-27T10:46:25.7Z" } +sdist = { url = "https://files.pythonhosted.org/packages/49/85/12f0a49a7c4ffb70572b6c2ef13c90c88fd190debda93b23f026b25f9634/deprecated-1.3.1.tar.gz", hash = "sha256:b1b50e0ff0c1fddaa5708a2c6b0a6588bb09b892825ab2b214ac9ea9d92a5223", size = 2932523, upload-time = "2025-10-30T08:19:02.757Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998, upload-time = "2025-01-27T10:46:09.186Z" }, + { url = "https://files.pythonhosted.org/packages/84/d0/205d54408c08b13550c733c4b85429e7ead111c7f0014309637425520a9a/deprecated-1.3.1-py2.py3-none-any.whl", hash = "sha256:597bfef186b6f60181535a29fbe44865ce137a5079f295b479886c82729d5f3f", size = 11298, upload-time = "2025-10-30T08:19:00.758Z" }, ] [[package]] @@ -1326,7 +1312,7 @@ wheels = [ [[package]] name = "dify-api" -version = "1.9.2" +version = "1.10.0" source = { virtual = "." } dependencies = [ { name = "apscheduler" }, @@ -1362,6 +1348,7 @@ dependencies = [ { name = "json-repair" }, { name = "langfuse" }, { name = "langsmith" }, + { name = "litellm" }, { name = "markdown" }, { name = "numpy" }, { name = "openpyxl" }, @@ -1555,6 +1542,7 @@ requires-dist = [ { name = "json-repair", specifier = ">=0.41.1" }, { name = "langfuse", specifier = "~=2.51.3" }, { name = "langsmith", specifier = "~=0.1.77" }, + { name = "litellm", specifier = "==1.77.1" }, { name = "markdown", specifier = "~=3.5.1" }, { name = "numpy", specifier = "~=1.26.4" }, { name = "openpyxl", specifier = "~=3.1.5" }, @@ -1596,11 +1584,11 @@ requires-dist = [ { name = "sentry-sdk", extras = ["flask"], specifier = "~=2.28.0" }, { name = "sqlalchemy", specifier = "~=2.0.29" }, { name = "sseclient-py", specifier = "~=1.8.0" }, - { name = "starlette", specifier = "==0.47.2" }, + { name = "starlette", specifier = "==0.49.1" }, { name = "tiktoken", specifier = "~=0.9.0" }, { name = "transformers", specifier = "~=4.56.1" }, { name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.16.1" }, - { name = "weave", specifier = "~=0.51.0" }, + { name = "weave", specifier = ">=0.52.16" }, { name = "weaviate-client", specifier = "==4.17.0" }, { name = "webvtt-py", specifier = "~=0.5.1" }, { name = "yarl", specifier = "~=1.18.3" }, @@ -1702,9 +1690,9 @@ vdb = [ { name = "pgvector", specifier = "==0.2.5" }, { name = "pymilvus", specifier = "~=2.5.0" }, { name = "pymochow", specifier = "==2.2.9" }, - { name = "pyobvector", specifier = "~=0.2.15" }, + { name = "pyobvector", specifier = "~=0.2.17" }, { name = "qdrant-client", specifier = "==1.9.0" }, - { name = "tablestore", specifier = "==6.2.0" }, + { name = "tablestore", specifier = "==6.3.7" }, { name = "tcvectordb", specifier = "~=1.6.4" }, { name = "tidb-vector", specifier = "==0.0.9" }, { name = "upstash-vector", specifier = "==0.6.0" }, @@ -1857,7 +1845,7 @@ wheels = [ [[package]] name = "fastapi" -version = "0.120.0" +version = "0.121.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-doc" }, @@ -1865,9 +1853,9 @@ dependencies = [ { name = "starlette" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f7/0e/7f29e8f7219e4526747db182e1afb5a4b6abc3201768fb38d81fa2536241/fastapi-0.120.0.tar.gz", hash = "sha256:6ce2c1cfb7000ac14ffd8ddb2bc12e62d023a36c20ec3710d09d8e36fab177a0", size = 337603, upload-time = "2025-10-23T20:56:34.743Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6b/a4/29e1b861fc9017488ed02ff1052feffa40940cb355ed632a8845df84ce84/fastapi-0.121.1.tar.gz", hash = "sha256:b6dba0538fd15dab6fe4d3e5493c3957d8a9e1e9257f56446b5859af66f32441", size = 342523, upload-time = "2025-11-08T21:48:14.068Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/60/7a639ceaba54aec4e1d5676498c568abc654b95762d456095b6cb529b1ca/fastapi-0.120.0-py3-none-any.whl", hash = "sha256:84009182e530c47648da2f07eb380b44b69889a4acfd9e9035ee4605c5cfc469", size = 108243, upload-time = "2025-10-23T20:56:33.281Z" }, + { url = "https://files.pythonhosted.org/packages/94/fd/2e6f7d706899cc08690c5f6641e2ffbfffe019e8f16ce77104caa5730910/fastapi-0.121.1-py3-none-any.whl", hash = "sha256:2c5c7028bc3a58d8f5f09aecd3fd88a000ccc0c5ad627693264181a3c33aa1fc", size = 109192, upload-time = "2025-11-08T21:48:12.458Z" }, ] [[package]] @@ -2098,11 +2086,11 @@ wheels = [ [[package]] name = "fsspec" -version = "2025.9.0" +version = "2025.10.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/de/e0/bab50af11c2d75c9c4a2a26a5254573c0bd97cea152254401510950486fa/fsspec-2025.9.0.tar.gz", hash = "sha256:19fd429483d25d28b65ec68f9f4adc16c17ea2c7c7bf54ec61360d478fb19c19", size = 304847, upload-time = "2025-09-02T19:10:49.215Z" } +sdist = { url = "https://files.pythonhosted.org/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7", size = 199289, upload-time = "2025-09-02T19:10:47.708Z" }, + { url = "https://files.pythonhosted.org/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" }, ] [[package]] @@ -2308,15 +2296,15 @@ wheels = [ [[package]] name = "google-cloud-core" -version = "2.4.3" +version = "2.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "google-api-core" }, { name = "google-auth" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d6/b8/2b53838d2acd6ec6168fd284a990c76695e84c65deee79c9f3a4276f6b4f/google_cloud_core-2.4.3.tar.gz", hash = "sha256:1fab62d7102844b278fe6dead3af32408b1df3eb06f5c7e8634cbd40edc4da53", size = 35861, upload-time = "2025-03-10T21:05:38.948Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a6/03/ef0bc99d0e0faf4fdbe67ac445e18cdaa74824fd93cd069e7bb6548cb52d/google_cloud_core-2.5.0.tar.gz", hash = "sha256:7c1b7ef5c92311717bd05301aa1a91ffbc565673d3b0b4163a52d8413a186963", size = 36027, upload-time = "2025-10-29T23:17:39.513Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/40/86/bda7241a8da2d28a754aad2ba0f6776e35b67e37c36ae0c45d49370f1014/google_cloud_core-2.4.3-py2.py3-none-any.whl", hash = "sha256:5130f9f4c14b4fafdff75c79448f9495cfade0d8775facf1b09c3bf67e027f6e", size = 29348, upload-time = "2025-03-10T21:05:37.785Z" }, + { url = "https://files.pythonhosted.org/packages/89/20/bfa472e327c8edee00f04beecc80baeddd2ab33ee0e86fd7654da49d45e9/google_cloud_core-2.5.0-py3-none-any.whl", hash = "sha256:67d977b41ae6c7211ee830c7912e41003ea8194bff15ae7d72fd6f51e57acabc", size = 29469, upload-time = "2025-10-29T23:17:38.548Z" }, ] [[package]] @@ -2428,11 +2416,11 @@ requests = [ [[package]] name = "graphql-core" -version = "3.2.6" +version = "3.2.7" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c4/16/7574029da84834349b60ed71614d66ca3afe46e9bf9c7b9562102acb7d4f/graphql_core-3.2.6.tar.gz", hash = "sha256:c08eec22f9e40f0bd61d805907e3b3b1b9a320bc606e23dc145eebca07c8fbab", size = 505353, upload-time = "2025-01-26T16:36:27.374Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ac/9b/037a640a2983b09aed4a823f9cf1729e6d780b0671f854efa4727a7affbe/graphql_core-3.2.7.tar.gz", hash = "sha256:27b6904bdd3b43f2a0556dad5d579bdfdeab1f38e8e8788e555bdcb586a6f62c", size = 513484, upload-time = "2025-11-01T22:30:40.436Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/4f/7297663840621022bc73c22d7d9d80dbc78b4db6297f764b545cd5dd462d/graphql_core-3.2.6-py3-none-any.whl", hash = "sha256:78b016718c161a6fb20a7d97bbf107f331cd1afe53e45566c59f776ed7f0b45f", size = 203416, upload-time = "2025-01-26T16:36:24.868Z" }, + { url = "https://files.pythonhosted.org/packages/0a/14/933037032608787fb92e365883ad6a741c235e0ff992865ec5d904a38f1e/graphql_core-3.2.7-py3-none-any.whl", hash = "sha256:17fc8f3ca4a42913d8e24d9ac9f08deddf0a0b2483076575757f6c412ead2ec0", size = 207262, upload-time = "2025-11-01T22:30:38.912Z" }, ] [[package]] @@ -2458,6 +2446,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" }, { url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" }, { url = "https://files.pythonhosted.org/packages/3f/cc/b07000438a29ac5cfb2194bfc128151d52f333cee74dd7dfe3fb733fc16c/greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa", size = 1142073, upload-time = "2025-08-07T13:18:21.737Z" }, + { url = "https://files.pythonhosted.org/packages/67/24/28a5b2fa42d12b3d7e5614145f0bd89714c34c08be6aabe39c14dd52db34/greenlet-3.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9c6de1940a7d828635fbd254d69db79e54619f165ee7ce32fda763a9cb6a58c", size = 1548385, upload-time = "2025-11-04T12:42:11.067Z" }, + { url = "https://files.pythonhosted.org/packages/6a/05/03f2f0bdd0b0ff9a4f7b99333d57b53a7709c27723ec8123056b084e69cd/greenlet-3.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03c5136e7be905045160b1b9fdca93dd6727b180feeafda6818e6496434ed8c5", size = 1613329, upload-time = "2025-11-04T12:42:12.928Z" }, { url = "https://files.pythonhosted.org/packages/d8/0f/30aef242fcab550b0b3520b8e3561156857c94288f0332a79928c31a52cf/greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9", size = 299100, upload-time = "2025-08-07T13:44:12.287Z" }, { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, @@ -2467,56 +2457,58 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" }, + { url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" }, + { url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" }, { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" }, ] [[package]] name = "grimp" -version = "3.12" +version = "3.13" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1b/a4/463903a1cfbc19d3e7125d6614bb900df2b34dd675c7d93544d154819d2b/grimp-3.12.tar.gz", hash = "sha256:1a733b1d719c42bd2fada58240975fa7d09936b57120c34b64cfb31e42701010", size = 845594, upload-time = "2025-10-09T09:51:02.064Z" } +sdist = { url = "https://files.pythonhosted.org/packages/80/b3/ff0d704cdc5cf399d74aabd2bf1694d4c4c3231d4d74b011b8f39f686a86/grimp-3.13.tar.gz", hash = "sha256:759bf6e05186e6473ee71af4119ec181855b2b324f4fcdd78dee9e5b59d87874", size = 847508, upload-time = "2025-10-29T13:04:57.704Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0f/b5/1c89600bf181d41502aed51b73b3a5889158dee35c534f51df3666779587/grimp-3.12-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:e6c02e51eebfcf71146d42f47c9ce353ac1902ae446e18d0e663ab9fdaa0496c", size = 2062043, upload-time = "2025-10-09T09:49:57.035Z" }, - { url = "https://files.pythonhosted.org/packages/1f/86/bab32c5e26949a82299853ccb28ee30a7899d0355b0d209b535eb03bc04e/grimp-3.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:79bc2b0ff6072c43c0ddc4479b25b7a8198795486478cfe3be0503b2c7d32c7f", size = 1981378, upload-time = "2025-10-09T09:49:49.237Z" }, - { url = "https://files.pythonhosted.org/packages/b5/03/b9f7e465488e8593de9a1e88355c3cfba04c02c3a34a6b02cbe946e0d587/grimp-3.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3986f11a9dd4167a2943cf6e80b458c0a825b48609713736cc8f2de135000810", size = 2130579, upload-time = "2025-10-09T09:48:36.035Z" }, - { url = "https://files.pythonhosted.org/packages/1b/d0/81c776327354f32f86f321dd8468b32ba6b52dc3511d912d24c4fac96da4/grimp-3.12-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7a2abe55844f9dad25499ff9456d680496f390d160b6b3a4e5aeabc0183813b4", size = 2091201, upload-time = "2025-10-09T09:48:52.57Z" }, - { url = "https://files.pythonhosted.org/packages/9d/7e/116ac4c1e4407a123fba4bb076b2e880643d70b3f4f1621c3323b5d66e12/grimp-3.12-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e59112d0f557335b619bcf10263d11873579230bd3df4a4b19224ec18e7212d6", size = 2240782, upload-time = "2025-10-09T09:49:30.915Z" }, - { url = "https://files.pythonhosted.org/packages/06/7f/89bbec1241a8504499975f0f08befea0cf3d27c52f9808602fff8075c639/grimp-3.12-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b858e2e5a489c36710322970aa82bfbd3f1c4107c8564960629a59d2f17a53d0", size = 2423143, upload-time = "2025-10-09T09:49:05.18Z" }, - { url = "https://files.pythonhosted.org/packages/86/d7/2f416439b624b2a91bf2e0e456f58d74d51aa7ad239099cf4a8911d952c0/grimp-3.12-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d46cc1222dd301e0be371b97f0cdecae178089704e8a285e3edd4750ec46270a", size = 2303850, upload-time = "2025-10-09T09:49:19.073Z" }, - { url = "https://files.pythonhosted.org/packages/60/bd/8c2f48c26151eb9a65bc41f01004b43cb1b31791ffb61758d40d2f6b485a/grimp-3.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ef06822f75856af28e7fcc580034043c543b1c99b07d2bd467bd173a7f10691", size = 2168571, upload-time = "2025-10-09T09:49:39.844Z" }, - { url = "https://files.pythonhosted.org/packages/5a/45/01a839434ff88be24317aa52cc1ba158833bd1d071efe0da1b14838af024/grimp-3.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4c19f1cba8a95c898473dd18f9c81358019d67f87f140b0b8401550e6d21c5a3", size = 2310869, upload-time = "2025-10-09T09:50:05.153Z" }, - { url = "https://files.pythonhosted.org/packages/ba/7b/0dc45fdc15562c2faf8a95a8685d3805d27decdef6fcfb66d9b577ed2f12/grimp-3.12-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:600e8dbc1cd9c6decbc22089730221c65591b7ba5f89751d07fc7ad014d99aa1", size = 2353397, upload-time = "2025-10-09T09:50:17.755Z" }, - { url = "https://files.pythonhosted.org/packages/a8/ec/07734ecc4f1489ffc071417f7bc881c939bcfdfba10eb585bce510ede1b2/grimp-3.12-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:259ba53b82cfb9c2c2d097b2237970c4e9903fa2d0b664b7e12329d9a64924f9", size = 2350166, upload-time = "2025-10-09T09:50:32.237Z" }, - { url = "https://files.pythonhosted.org/packages/a4/f5/45d80e2fa205066a484f0c1a667a249408a49bb3b665d62677f879920aa0/grimp-3.12-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a593549b1f66b1c12574e71f9e8c0073b372888c6b6706e2617bba2713ae28c2", size = 2360590, upload-time = "2025-10-09T09:50:49.961Z" }, - { url = "https://files.pythonhosted.org/packages/e6/f2/7ab1bc4d613189183c17741ff0d03490d9749eb5130b8b56e82ed77098b0/grimp-3.12-cp311-cp311-win32.whl", hash = "sha256:356ee969443f06c6c3a270f5a7221f946f0cb135a8b8ece2009990b293504bb3", size = 1748183, upload-time = "2025-10-09T09:51:13.503Z" }, - { url = "https://files.pythonhosted.org/packages/91/62/195f37a68d07fab40c8934ae8e39f9ff1f9a5bf3e375059b9cf14ccba302/grimp-3.12-cp311-cp311-win_amd64.whl", hash = "sha256:75e1f0d74f3a242a1c34e464d775c36b1c8b9d8c92b35f46f221e73e9b2f0065", size = 1851099, upload-time = "2025-10-09T09:51:04.747Z" }, - { url = "https://files.pythonhosted.org/packages/12/ac/0f55980a59c07439a965d3975f1cf3a6574f7d773910b9d6924790e0dddf/grimp-3.12-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:af399fc0ffddfbd7ea6c2e8546be1ab5284ee800f15a445705bdda5d63501b34", size = 2058862, upload-time = "2025-10-09T09:49:58.478Z" }, - { url = "https://files.pythonhosted.org/packages/cc/b1/5fdcb1db7cb3253c78d87a0b8c3f7f9c5214b273861300b51c897c55e6b8/grimp-3.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2f08358acbaf9a4b324537bf344fd2d76b5f9b6f1bfaf9a431e9453fc0eaee5f", size = 1977586, upload-time = "2025-10-09T09:49:50.49Z" }, - { url = "https://files.pythonhosted.org/packages/c9/b9/e5f6d265b71430f9641daa9476cde8c23549e396c558b39a0bdc7fee824f/grimp-3.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6eeb1616cafe9074fcb390fcfc01e6e5a0e0ddd5acb9dd37579985b2879c239a", size = 2130610, upload-time = "2025-10-09T09:48:38.472Z" }, - { url = "https://files.pythonhosted.org/packages/da/e1/2d0601c9aac2ab7340504e85ca4cd55f2991501a03e421bec78f53a07478/grimp-3.12-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99e648e299f7cd3daaee2cb745192e7ea159c7d38df76b4dcca12a2ef68a3ede", size = 2092775, upload-time = "2025-10-09T09:48:53.841Z" }, - { url = "https://files.pythonhosted.org/packages/db/a1/e63315477127ed8f31a1a93911d084bf704d6e126ca27650e3c3389701a6/grimp-3.12-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b24c5ce351030d1f83e69acd76a06863dd87041ceb25572339f7334e210cbc4", size = 2239336, upload-time = "2025-10-09T09:49:32.185Z" }, - { url = "https://files.pythonhosted.org/packages/f2/09/cd76d35121f053a95a58fc5830756c62e5c9de74aa4e16b4dc27ce6ada2c/grimp-3.12-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fd40a5ec09d1dfafaae88b53231ab79378183e2e9a03e7b26b7a30133d027d8a", size = 2421851, upload-time = "2025-10-09T09:49:06.893Z" }, - { url = "https://files.pythonhosted.org/packages/40/46/e8390a7c5ed85b4dbeff4e873f1ece8d9acf72d72f084b397ccc2facfa3b/grimp-3.12-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0aebdfad66d6f4e8b0f7364ce0429d208be3510918097f969428165074d3103e", size = 2304849, upload-time = "2025-10-09T09:49:20.695Z" }, - { url = "https://files.pythonhosted.org/packages/bd/81/f73edbc48a283f634233b6153ac43e4e7b9f58108ffc19da803b0015cb60/grimp-3.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76fd06be98d6bea9ea8a804da22c80accf1d277fe04abd5f3dff05d087f056f7", size = 2168655, upload-time = "2025-10-09T09:49:41.118Z" }, - { url = "https://files.pythonhosted.org/packages/84/1a/8fa5752f725b8872010627bd10e1aedccdb406c3b4118ec3fe127155284e/grimp-3.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a73a42a43e268ac5b196386beae1ec646f4572409e731bccf2a99ab4ed5c46bf", size = 2311124, upload-time = "2025-10-09T09:50:06.477Z" }, - { url = "https://files.pythonhosted.org/packages/83/a0/02d6b2a86289a4ac73f44f59aaee43c1dc936c984204c73d2affe4570eb6/grimp-3.12-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:af990af7d5e64f484d12cdefacfaaed4ea9418ac4d0a5a928953fd91aaf8df80", size = 2354216, upload-time = "2025-10-09T09:50:19.114Z" }, - { url = "https://files.pythonhosted.org/packages/7b/48/0368289f5bbdf943a48305824b30411b35ef2c7cd8edf2bad48d67b3897e/grimp-3.12-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:82ee28c1e9835572af2c733f7e5913a44193c53ae8ca488039164593b4a750fa", size = 2348372, upload-time = "2025-10-09T09:50:37.479Z" }, - { url = "https://files.pythonhosted.org/packages/26/73/b4f90b4926791d720f6069fc8c8b3e204721d1db839a1c00fbcee1e2a36d/grimp-3.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:afdceaea00e305909cb30d68e91b94fcf71d1a7234052549ea31148785a03a52", size = 2361167, upload-time = "2025-10-09T09:50:51.733Z" }, - { url = "https://files.pythonhosted.org/packages/0b/ae/94d34c732d531c7165c8942d7995495aac64e9bb5c28cc6751349eacdcde/grimp-3.12-cp312-cp312-win32.whl", hash = "sha256:40f8e048254d2437dffcd383d2301a82c35d9a3082e878b707d87a6e8c539614", size = 1747179, upload-time = "2025-10-09T09:51:15.224Z" }, - { url = "https://files.pythonhosted.org/packages/5b/cd/48bc396ee2f36e72d5c50ba8b4d7f817fc2cdac7b9ab77d2b097f50a4447/grimp-3.12-cp312-cp312-win_amd64.whl", hash = "sha256:199172d17f22199bf400a0bd5c4985784622201e887a023fe799ca3f3437dedf", size = 1850691, upload-time = "2025-10-09T09:51:05.984Z" }, - { url = "https://files.pythonhosted.org/packages/d9/31/c72e53a46692dc8358cff1af1a9494430a0fecd4c3f2d0d8e9c2eb5e828d/grimp-3.12-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:567d037a3db083e54bee621daba59a2e01fd1391364ae0a0c737995f6eed910b", size = 2131392, upload-time = "2025-10-09T09:48:46.857Z" }, - { url = "https://files.pythonhosted.org/packages/39/10/15e43be32734baaebeee090dca16f06ea5ba933b209b8e1c0d5986dabb32/grimp-3.12-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9b4cc756c91c3d8582ee70b5e013c0e34fdb31c7f808cefe9d15509c45fec31e", size = 2092481, upload-time = "2025-10-09T09:49:00.754Z" }, - { url = "https://files.pythonhosted.org/packages/a1/4a/c9349dee284c2d9384714741896f0f84a1d66011a69cdc364e4d94e188b1/grimp-3.12-pp311-pypy311_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84bd47f9a8619cb8966f18cb6faf5f6cb8d35ade99312477dd8e9de3a9ae4cb7", size = 2242260, upload-time = "2025-10-09T09:49:37.183Z" }, - { url = "https://files.pythonhosted.org/packages/d8/63/3935823f89c12320840bbf018858eeaca7d5285f9769a48921587a88adeb/grimp-3.12-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f30e01855c67a39857c87e6c0eafe5e8891010a35e06cf2145f2cfce8ea9780", size = 2422371, upload-time = "2025-10-09T09:49:14.616Z" }, - { url = "https://files.pythonhosted.org/packages/71/8e/5a75c2335a2dc61738b19318dcdd16392015a984211e3d0b9f6679dc6c89/grimp-3.12-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d07e825f6b052186dabd8dbbcc7e008a3b56e551725e2ba47169fe1e4bde76ac", size = 2304257, upload-time = "2025-10-09T09:49:26.908Z" }, - { url = "https://files.pythonhosted.org/packages/40/99/462d86bc9401a39859f272b867331a678f4b5324a539dc771bdae6d36309/grimp-3.12-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f1a1289d4282be2891ada75ec5d3099e856518c4236b1196e367b630485f8ce", size = 2169360, upload-time = "2025-10-09T09:49:46.575Z" }, - { url = "https://files.pythonhosted.org/packages/d0/07/6d2929f05dae189265633588819d990df35644ad74b6ec74207091dff18d/grimp-3.12-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:85136b555aeb7d3965fdb40af4e4af2011f911b0fde8c20979bf4db7b06455f5", size = 2312280, upload-time = "2025-10-09T09:50:13.491Z" }, - { url = "https://files.pythonhosted.org/packages/5c/47/7e49417e2c496da0b6141e711dca40726d2b30a0adc6db9d04b74c7bafa7/grimp-3.12-pp311-pypy311_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:963efd6ec86e7b47fde835b2526b6be7a3f489857a1cd47a747c94b3e670550a", size = 2354449, upload-time = "2025-10-09T09:50:27.596Z" }, - { url = "https://files.pythonhosted.org/packages/2c/08/2e1db56797e4e26334b3ee4ef1a5fbf56155d74a0318215ed4dcad02ef43/grimp-3.12-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:c9e2ee478b66f0e20c92af6123142ffd6b604c36e9b3a8d391ea9172cc18b6b3", size = 2350545, upload-time = "2025-10-09T09:50:45.623Z" }, - { url = "https://files.pythonhosted.org/packages/37/78/53594064f11b0ae9e72b3e9df5c055f00c5bff44962f7b777846504fc50d/grimp-3.12-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e8826362d4e403aa2e03d480e3e4d64284a6b6ccafc2c5777bb2bed2535bdc4e", size = 2361926, upload-time = "2025-10-09T09:50:58.605Z" }, + { url = "https://files.pythonhosted.org/packages/45/cc/d272cf87728a7e6ddb44d3c57c1d3cbe7daf2ffe4dc76e3dc9b953b69ab1/grimp-3.13-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:57745996698932768274a2ed9ba3e5c424f60996c53ecaf1c82b75be9e819ee9", size = 2074518, upload-time = "2025-10-29T13:03:58.51Z" }, + { url = "https://files.pythonhosted.org/packages/06/11/31dc622c5a0d1615b20532af2083f4bba2573aebbba5b9d6911dfd60a37d/grimp-3.13-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ca29f09710342b94fa6441f4d1102a0e49f0b463b1d91e43223baa949c5e9337", size = 1988182, upload-time = "2025-10-29T13:03:50.129Z" }, + { url = "https://files.pythonhosted.org/packages/aa/83/a0e19beb5c42df09e9a60711b227b4f910ba57f46bea258a9e1df883976c/grimp-3.13-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:adda25aa158e11d96dd27166300b955c8ec0c76ce2fd1a13597e9af012aada06", size = 2145832, upload-time = "2025-10-29T13:02:35.218Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f5/13752205e290588e970fdc019b4ab2c063ca8da352295c332e34df5d5842/grimp-3.13-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03e17029d75500a5282b40cb15cdae030bf14df9dfaa6a2b983f08898dfe74b6", size = 2106762, upload-time = "2025-10-29T13:02:51.681Z" }, + { url = "https://files.pythonhosted.org/packages/ff/30/c4d62543beda4b9a483a6cd5b7dd5e4794aafb511f144d21a452467989a1/grimp-3.13-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6cbfc9d2d0ebc0631fb4012a002f3d8f4e3acb8325be34db525c0392674433b8", size = 2256674, upload-time = "2025-10-29T13:03:27.923Z" }, + { url = "https://files.pythonhosted.org/packages/9b/ea/d07ed41b7121719c3f7bf30c9881dbde69efeacfc2daf4e4a628efe5f123/grimp-3.13-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:161449751a085484608c5b9f863e41e8fb2a98e93f7312ead5d831e487a94518", size = 2442699, upload-time = "2025-10-29T13:03:04.451Z" }, + { url = "https://files.pythonhosted.org/packages/fe/a0/1923f0480756effb53c7e6cef02a3918bb519a86715992720838d44f0329/grimp-3.13-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:119628fbe7f941d1e784edac98e8ced7e78a0b966a4ff2c449e436ee860bd507", size = 2317145, upload-time = "2025-10-29T13:03:15.941Z" }, + { url = "https://files.pythonhosted.org/packages/0d/d9/aef4c8350090653e34bc755a5d9e39cc300f5c46c651c1d50195f69bf9ab/grimp-3.13-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ca1ac776baf1fa105342b23c72f2e7fdd6771d4cce8d2903d28f92fd34a9e8f", size = 2180288, upload-time = "2025-10-29T13:03:41.023Z" }, + { url = "https://files.pythonhosted.org/packages/9f/2e/a206f76eccffa56310a1c5d5950ed34923a34ae360cb38e297604a288837/grimp-3.13-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:941ff414cc66458f56e6af93c618266091ea70bfdabe7a84039be31d937051ee", size = 2328696, upload-time = "2025-10-29T13:04:06.888Z" }, + { url = "https://files.pythonhosted.org/packages/40/3b/88ff1554409b58faf2673854770e6fc6e90167a182f5166147b7618767d7/grimp-3.13-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:87ad9bcd1caaa2f77c369d61a04b9f2f1b87f4c3b23ae6891b2c943193c4ec62", size = 2367574, upload-time = "2025-10-29T13:04:21.404Z" }, + { url = "https://files.pythonhosted.org/packages/b6/b3/e9c99ecd94567465a0926ae7136e589aed336f6979a4cddcb8dfba16d27c/grimp-3.13-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:751fe37104a4f023d5c6556558b723d843d44361245c20f51a5d196de00e4774", size = 2358842, upload-time = "2025-10-29T13:04:34.26Z" }, + { url = "https://files.pythonhosted.org/packages/74/65/a5fffeeb9273e06dfbe962c8096331ba181ca8415c5f9d110b347f2c0c34/grimp-3.13-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9b561f79ec0b3a4156937709737191ad57520f2d58fa1fc43cd79f67839a3cd7", size = 2382268, upload-time = "2025-10-29T13:04:46.864Z" }, + { url = "https://files.pythonhosted.org/packages/d9/79/2f3b4323184329b26b46de2b6d1bd64ba1c26e0a9c3cfa0aaecec237b75e/grimp-3.13-cp311-cp311-win32.whl", hash = "sha256:52405ea8c8f20cf5d2d1866c80ee3f0243a38af82bd49d1464c5e254bf2e1f8f", size = 1759345, upload-time = "2025-10-29T13:05:10.435Z" }, + { url = "https://files.pythonhosted.org/packages/b6/ce/e86cf73e412a6bf531cbfa5c733f8ca48b28ebea23a037338be763f24849/grimp-3.13-cp311-cp311-win_amd64.whl", hash = "sha256:6a45d1d3beeefad69717b3718e53680fb3579fe67696b86349d6f39b75e850bf", size = 1859382, upload-time = "2025-10-29T13:05:01.071Z" }, + { url = "https://files.pythonhosted.org/packages/1d/06/ff7e3d72839f46f0fccdc79e1afe332318986751e20f65d7211a5e51366c/grimp-3.13-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3e715c56ffdd055e5c84d27b4c02d83369b733e6a24579d42bbbc284bd0664a9", size = 2070161, upload-time = "2025-10-29T13:03:59.755Z" }, + { url = "https://files.pythonhosted.org/packages/58/2f/a95bdf8996db9400fd7e288f32628b2177b8840fe5f6b7cd96247b5fa173/grimp-3.13-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f794dea35a4728b948ab8fec970ffbdf2589b34209f3ab902cf8a9148cf1eaad", size = 1984365, upload-time = "2025-10-29T13:03:51.805Z" }, + { url = "https://files.pythonhosted.org/packages/1f/45/cc3d7f3b7b4d93e0b9d747dc45ed73a96203ba083dc857f24159eb6966b4/grimp-3.13-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69571270f2c27e8a64b968195aa7ecc126797112a9bf1e804ff39ba9f42d6f6d", size = 2145486, upload-time = "2025-10-29T13:02:36.591Z" }, + { url = "https://files.pythonhosted.org/packages/16/92/a6e493b71cb5a9145ad414cc4790c3779853372b840a320f052b22879606/grimp-3.13-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8f7b226398ae476762ef0afb5ef8f838d39c8e0e2f6d1a4378ce47059b221a4a", size = 2106747, upload-time = "2025-10-29T13:02:53.084Z" }, + { url = "https://files.pythonhosted.org/packages/db/8d/36a09f39fe14ad8843ef3ff81090ef23abbd02984c1fcc1cef30e5713d82/grimp-3.13-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5498aeac4df0131a1787fcbe9bb460b52fc9b781ec6bba607fd6a7d6d3ea6fce", size = 2257027, upload-time = "2025-10-29T13:03:29.44Z" }, + { url = "https://files.pythonhosted.org/packages/a1/7a/90f78787f80504caeef501f1bff47e8b9f6058d45995f1d4c921df17bfef/grimp-3.13-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4be702bb2b5c001a6baf709c452358470881e15e3e074cfc5308903603485dcb", size = 2441208, upload-time = "2025-10-29T13:03:05.733Z" }, + { url = "https://files.pythonhosted.org/packages/61/71/0fbd3a3e914512b9602fa24c8ebc85a8925b101f04f8a8c1d1e220e0a717/grimp-3.13-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fcf988f3e3d272a88f7be68f0c1d3719fee8624d902e9c0346b9015a0ea6a65", size = 2318758, upload-time = "2025-10-29T13:03:17.454Z" }, + { url = "https://files.pythonhosted.org/packages/34/e9/29c685e88b3b0688f0a2e30c0825e02076ecdf22bc0e37b1468562eaa09a/grimp-3.13-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ede36d104ff88c208140f978de3345f439345f35b8ef2b4390c59ef6984deba", size = 2180523, upload-time = "2025-10-29T13:03:42.3Z" }, + { url = "https://files.pythonhosted.org/packages/86/bc/7cc09574b287b8850a45051e73272f365259d9b6ca58d7b8773265c6fe35/grimp-3.13-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b35e44bb8dc80e0bd909a64387f722395453593a1884caca9dc0748efea33764", size = 2328855, upload-time = "2025-10-29T13:04:08.111Z" }, + { url = "https://files.pythonhosted.org/packages/34/86/3b0845900c8f984a57c6afe3409b20638065462d48b6afec0fd409fd6118/grimp-3.13-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:becb88e9405fc40896acd6e2b9bbf6f242a5ae2fd43a1ec0a32319ab6c10a227", size = 2367756, upload-time = "2025-10-29T13:04:22.736Z" }, + { url = "https://files.pythonhosted.org/packages/06/2d/4e70e8c06542db92c3fffaecb43ebfc4114a411505bff574d4da7d82c7db/grimp-3.13-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a66585b4af46c3fbadbef495483514bee037e8c3075ed179ba4f13e494eb7899", size = 2358595, upload-time = "2025-10-29T13:04:35.595Z" }, + { url = "https://files.pythonhosted.org/packages/dd/06/c511d39eb6c73069af277f4e74991f1f29a05d90cab61f5416b9fc43932f/grimp-3.13-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:29f68c6e2ff70d782ca0e989ec4ec44df73ba847937bcbb6191499224a2f84e2", size = 2381464, upload-time = "2025-10-29T13:04:48.265Z" }, + { url = "https://files.pythonhosted.org/packages/86/f5/42197d69e4c9e2e7eed091d06493da3824e07c37324155569aa895c3b5f7/grimp-3.13-cp312-cp312-win32.whl", hash = "sha256:cc996dcd1a44ae52d257b9a3e98838f8ecfdc42f7c62c8c82c2fcd3828155c98", size = 1758510, upload-time = "2025-10-29T13:05:11.74Z" }, + { url = "https://files.pythonhosted.org/packages/30/dd/59c5f19f51e25f3dbf1c9e88067a88165f649ba1b8e4174dbaf1c950f78b/grimp-3.13-cp312-cp312-win_amd64.whl", hash = "sha256:e2966435947e45b11568f04a65863dcf836343c11ae44aeefdaa7f07eb1a0576", size = 1859530, upload-time = "2025-10-29T13:05:02.638Z" }, + { url = "https://files.pythonhosted.org/packages/e5/81/82de1b5d82701214b1f8e32b2e71fde8e1edbb4f2cdca9beb22ee6c8796d/grimp-3.13-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a6a3c76525b018c85c0e3a632d94d72be02225f8ada56670f3f213cf0762be4", size = 2145955, upload-time = "2025-10-29T13:02:47.559Z" }, + { url = "https://files.pythonhosted.org/packages/8c/ae/ada18cb73bdf97094af1c60070a5b85549482a57c509ee9a23fdceed4fc3/grimp-3.13-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:239e9b347af4da4cf69465bfa7b2901127f6057bc73416ba8187fb1eabafc6ea", size = 2107150, upload-time = "2025-10-29T13:02:59.891Z" }, + { url = "https://files.pythonhosted.org/packages/10/5e/6d8c65643ad5a1b6e00cc2cd8f56fc063923485f07c59a756fa61eefe7f2/grimp-3.13-pp311-pypy311_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6db85ce2dc2f804a2edd1c1e9eaa46d282e1f0051752a83ca08ca8b87f87376", size = 2257515, upload-time = "2025-10-29T13:03:36.705Z" }, + { url = "https://files.pythonhosted.org/packages/b2/62/72cbfd7d0f2b95a53edd01d5f6b0d02bde38db739a727e35b76c13e0d0a8/grimp-3.13-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e000f3590bcc6ff7c781ebbc1ac4eb919f97180f13cc4002c868822167bd9aed", size = 2441262, upload-time = "2025-10-29T13:03:12.158Z" }, + { url = "https://files.pythonhosted.org/packages/18/00/b9209ab385567c3bddffb5d9eeecf9cb432b05c30ca8f35904b06e206a89/grimp-3.13-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e2374c217c862c1af933a430192d6a7c6723ed1d90303f1abbc26f709bbb9263", size = 2318557, upload-time = "2025-10-29T13:03:23.925Z" }, + { url = "https://files.pythonhosted.org/packages/11/4d/a3d73c11d09da00a53ceafe2884a71c78f5a76186af6d633cadd6c85d850/grimp-3.13-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ed0ff17d559ff2e7fa1be8ae086bc4fedcace5d7b12017f60164db8d9a8d806", size = 2180811, upload-time = "2025-10-29T13:03:47.461Z" }, + { url = "https://files.pythonhosted.org/packages/c1/9a/1cdfaa7d7beefd8859b190dfeba11d5ec074e8702b2903e9f182d662ed63/grimp-3.13-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:43960234aabce018c8d796ec8b77c484a1c9cbb6a3bc036a0d307c8dade9874c", size = 2329205, upload-time = "2025-10-29T13:04:15.845Z" }, + { url = "https://files.pythonhosted.org/packages/86/73/b36f86ef98df96e7e8a6166dfa60c8db5d597f051e613a3112f39a870b4c/grimp-3.13-pp311-pypy311_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:44420b638b3e303f32314bd4d309f15de1666629035acd1cdd3720c15917ac85", size = 2368745, upload-time = "2025-10-29T13:04:29.706Z" }, + { url = "https://files.pythonhosted.org/packages/02/2f/0ce37872fad5c4b82d727f6e435fd5bc76f701279bddc9666710318940cf/grimp-3.13-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:f6127fdb982cf135612504d34aa16b841f421e54751fcd54f80b9531decb2b3f", size = 2358753, upload-time = "2025-10-29T13:04:42.632Z" }, + { url = "https://files.pythonhosted.org/packages/bb/23/935c888ac9ee71184fe5adf5ea86648746739be23c85932857ac19fc1d17/grimp-3.13-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:69893a9ef1edea25226ed17e8e8981e32900c59703972e0780c0e927ce624f75", size = 2383066, upload-time = "2025-10-29T13:04:55.073Z" }, ] [[package]] @@ -2834,15 +2826,14 @@ wheels = [ [[package]] name = "hypothesis" -version = "6.142.4" +version = "6.147.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "attrs" }, { name = "sortedcontainers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/47/0b/76a062d1d6cd68342b460c2f5627e1ad1102a3dd781acd5c096c75aca0d6/hypothesis-6.142.4.tar.gz", hash = "sha256:b3e71a84708994aa910ea47f1483ad892a7c390839959d689b2a2b07ebfd160e", size = 466047, upload-time = "2025-10-25T16:19:03.838Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/53/e19fe74671fd60db86344a4623c818fac58b813cc3efbb7ea3b3074dcb71/hypothesis-6.147.0.tar.gz", hash = "sha256:72e6004ea3bd1460bdb4640b6389df23b87ba7a4851893fd84d1375635d3e507", size = 468587, upload-time = "2025-11-06T20:27:29.682Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3e/9f/8010f93e175ecd996f54df9019ee8c58025fc21ed47658b0a58dd25ebe8b/hypothesis-6.142.4-py3-none-any.whl", hash = "sha256:25eecc73fadecd8b491aed822204cfe4be9c98ff5c1e8e038d181136ffc54b5b", size = 533467, upload-time = "2025-10-25T16:19:00.443Z" }, + { url = "https://files.pythonhosted.org/packages/b2/1b/932eddc3d55c4ed6c585006cffe6c6a133b5e1797d873de0bcf5208e4fed/hypothesis-6.147.0-py3-none-any.whl", hash = "sha256:de588807b6da33550d32f47bcd42b1a86d061df85673aa73e6443680249d185e", size = 535595, upload-time = "2025-11-06T20:27:23.536Z" }, ] [[package]] @@ -2856,16 +2847,16 @@ wheels = [ [[package]] name = "import-linter" -version = "2.5.2" +version = "2.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "grimp" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6c/fd/49913b98fdeb5a8a120ca756abfc9aa7fdef7c20da1d728173e98ce11160/import_linter-2.5.2.tar.gz", hash = "sha256:d8f2dc6432975cc35edc4cc0bfcf1b811f05500b377ce0c3f62729d68f46c698", size = 159664, upload-time = "2025-10-09T10:53:24.635Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/20/37f3661ccbdba41072a74cb7a57a932b6884ab6c489318903d2d870c6c07/import_linter-2.6.tar.gz", hash = "sha256:60429a450eb6ebeed536f6d2b83428b026c5747ca69d029812e2f1360b136f85", size = 161294, upload-time = "2025-11-10T09:59:20.977Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/f4/f20eeb9e6ab178ce011457cd936877202556f14b7af3ef2b3c3e26f3758a/import_linter-2.5.2-py3-none-any.whl", hash = "sha256:a70b64c2451dc6b96ff9ef5af4e3f6a2c8b63532a66a3c96a7c31ca086b10003", size = 44140, upload-time = "2025-10-09T10:53:23.367Z" }, + { url = "https://files.pythonhosted.org/packages/44/df/02389e13d340229baa687bd0b9be4878e13668ce0beadbe531fb2b597386/import_linter-2.6-py3-none-any.whl", hash = "sha256:4e835141294b803325a619b8c789398320b81f0bde7771e0dd36f34524e51b1e", size = 46488, upload-time = "2025-11-10T09:59:19.611Z" }, ] [[package]] @@ -2945,44 +2936,44 @@ wheels = [ [[package]] name = "jiter" -version = "0.11.1" +version = "0.12.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a3/68/0357982493a7b20925aece061f7fb7a2678e3b232f8d73a6edb7e5304443/jiter-0.11.1.tar.gz", hash = "sha256:849dcfc76481c0ea0099391235b7ca97d7279e0fa4c86005457ac7c88e8b76dc", size = 168385, upload-time = "2025-10-17T11:31:15.186Z" } +sdist = { url = "https://files.pythonhosted.org/packages/45/9d/e0660989c1370e25848bb4c52d061c71837239738ad937e83edca174c273/jiter-0.12.0.tar.gz", hash = "sha256:64dfcd7d5c168b38d3f9f8bba7fc639edb3418abcc74f22fdbe6b8938293f30b", size = 168294, upload-time = "2025-11-09T20:49:23.302Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/34/c9e6cfe876f9a24f43ed53fe29f052ce02bd8d5f5a387dbf46ad3764bef0/jiter-0.11.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:9b0088ff3c374ce8ce0168523ec8e97122ebb788f950cf7bb8e39c7dc6a876a2", size = 310160, upload-time = "2025-10-17T11:28:59.174Z" }, - { url = "https://files.pythonhosted.org/packages/bc/9f/b06ec8181d7165858faf2ac5287c54fe52b2287760b7fe1ba9c06890255f/jiter-0.11.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:74433962dd3c3090655e02e461267095d6c84f0741c7827de11022ef8d7ff661", size = 316573, upload-time = "2025-10-17T11:29:00.905Z" }, - { url = "https://files.pythonhosted.org/packages/66/49/3179d93090f2ed0c6b091a9c210f266d2d020d82c96f753260af536371d0/jiter-0.11.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d98030e345e6546df2cc2c08309c502466c66c4747b043f1a0d415fada862b8", size = 348998, upload-time = "2025-10-17T11:29:02.321Z" }, - { url = "https://files.pythonhosted.org/packages/ae/9d/63db2c8eabda7a9cad65a2e808ca34aaa8689d98d498f5a2357d7a2e2cec/jiter-0.11.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1d6db0b2e788db46bec2cf729a88b6dd36959af2abd9fa2312dfba5acdd96dcb", size = 363413, upload-time = "2025-10-17T11:29:03.787Z" }, - { url = "https://files.pythonhosted.org/packages/25/ff/3e6b3170c5053053c7baddb8d44e2bf11ff44cd71024a280a8438ae6ba32/jiter-0.11.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:55678fbbda261eafe7289165dd2ddd0e922df5f9a1ae46d7c79a5a15242bd7d1", size = 487144, upload-time = "2025-10-17T11:29:05.37Z" }, - { url = "https://files.pythonhosted.org/packages/b0/50/b63fcadf699893269b997f4c2e88400bc68f085c6db698c6e5e69d63b2c1/jiter-0.11.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a6b74fae8e40497653b52ce6ca0f1b13457af769af6fb9c1113efc8b5b4d9be", size = 376215, upload-time = "2025-10-17T11:29:07.123Z" }, - { url = "https://files.pythonhosted.org/packages/39/8c/57a8a89401134167e87e73471b9cca321cf651c1fd78c45f3a0f16932213/jiter-0.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a55a453f8b035eb4f7852a79a065d616b7971a17f5e37a9296b4b38d3b619e4", size = 359163, upload-time = "2025-10-17T11:29:09.047Z" }, - { url = "https://files.pythonhosted.org/packages/4b/96/30b0cdbffbb6f753e25339d3dbbe26890c9ef119928314578201c758aace/jiter-0.11.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2638148099022e6bdb3f42904289cd2e403609356fb06eb36ddec2d50958bc29", size = 385344, upload-time = "2025-10-17T11:29:10.69Z" }, - { url = "https://files.pythonhosted.org/packages/c6/d5/31dae27c1cc9410ad52bb514f11bfa4f286f7d6ef9d287b98b8831e156ec/jiter-0.11.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:252490567a5d990986f83b95a5f1ca1bf205ebd27b3e9e93bb7c2592380e29b9", size = 517972, upload-time = "2025-10-17T11:29:12.174Z" }, - { url = "https://files.pythonhosted.org/packages/61/1e/5905a7a3aceab80de13ab226fd690471a5e1ee7e554dc1015e55f1a6b896/jiter-0.11.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d431d52b0ca2436eea6195f0f48528202100c7deda354cb7aac0a302167594d5", size = 508408, upload-time = "2025-10-17T11:29:13.597Z" }, - { url = "https://files.pythonhosted.org/packages/91/12/1c49b97aa49077e136e8591cef7162f0d3e2860ae457a2d35868fd1521ef/jiter-0.11.1-cp311-cp311-win32.whl", hash = "sha256:db6f41e40f8bae20c86cb574b48c4fd9f28ee1c71cb044e9ec12e78ab757ba3a", size = 203937, upload-time = "2025-10-17T11:29:14.894Z" }, - { url = "https://files.pythonhosted.org/packages/6d/9d/2255f7c17134ee9892c7e013c32d5bcf4bce64eb115402c9fe5e727a67eb/jiter-0.11.1-cp311-cp311-win_amd64.whl", hash = "sha256:0cc407b8e6cdff01b06bb80f61225c8b090c3df108ebade5e0c3c10993735b19", size = 207589, upload-time = "2025-10-17T11:29:16.166Z" }, - { url = "https://files.pythonhosted.org/packages/3c/28/6307fc8f95afef84cae6caf5429fee58ef16a582c2ff4db317ceb3e352fa/jiter-0.11.1-cp311-cp311-win_arm64.whl", hash = "sha256:fe04ea475392a91896d1936367854d346724a1045a247e5d1c196410473b8869", size = 188391, upload-time = "2025-10-17T11:29:17.488Z" }, - { url = "https://files.pythonhosted.org/packages/15/8b/318e8af2c904a9d29af91f78c1e18f0592e189bbdb8a462902d31fe20682/jiter-0.11.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:c92148eec91052538ce6823dfca9525f5cfc8b622d7f07e9891a280f61b8c96c", size = 305655, upload-time = "2025-10-17T11:29:18.859Z" }, - { url = "https://files.pythonhosted.org/packages/f7/29/6c7de6b5d6e511d9e736312c0c9bfcee8f9b6bef68182a08b1d78767e627/jiter-0.11.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ecd4da91b5415f183a6be8f7158d127bdd9e6a3174138293c0d48d6ea2f2009d", size = 315645, upload-time = "2025-10-17T11:29:20.889Z" }, - { url = "https://files.pythonhosted.org/packages/ac/5f/ef9e5675511ee0eb7f98dd8c90509e1f7743dbb7c350071acae87b0145f3/jiter-0.11.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7e3ac25c00b9275684d47aa42febaa90a9958e19fd1726c4ecf755fbe5e553b", size = 348003, upload-time = "2025-10-17T11:29:22.712Z" }, - { url = "https://files.pythonhosted.org/packages/56/1b/abe8c4021010b0a320d3c62682769b700fb66f92c6db02d1a1381b3db025/jiter-0.11.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:57d7305c0a841858f866cd459cd9303f73883fb5e097257f3d4a3920722c69d4", size = 365122, upload-time = "2025-10-17T11:29:24.408Z" }, - { url = "https://files.pythonhosted.org/packages/2a/2d/4a18013939a4f24432f805fbd5a19893e64650b933edb057cd405275a538/jiter-0.11.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e86fa10e117dce22c547f31dd6d2a9a222707d54853d8de4e9a2279d2c97f239", size = 488360, upload-time = "2025-10-17T11:29:25.724Z" }, - { url = "https://files.pythonhosted.org/packages/f0/77/38124f5d02ac4131f0dfbcfd1a19a0fac305fa2c005bc4f9f0736914a1a4/jiter-0.11.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ae5ef1d48aec7e01ee8420155d901bb1d192998fa811a65ebb82c043ee186711", size = 376884, upload-time = "2025-10-17T11:29:27.056Z" }, - { url = "https://files.pythonhosted.org/packages/7b/43/59fdc2f6267959b71dd23ce0bd8d4aeaf55566aa435a5d00f53d53c7eb24/jiter-0.11.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb68e7bf65c990531ad8715e57d50195daf7c8e6f1509e617b4e692af1108939", size = 358827, upload-time = "2025-10-17T11:29:28.698Z" }, - { url = "https://files.pythonhosted.org/packages/7d/d0/b3cc20ff5340775ea3bbaa0d665518eddecd4266ba7244c9cb480c0c82ec/jiter-0.11.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43b30c8154ded5845fa454ef954ee67bfccce629b2dea7d01f795b42bc2bda54", size = 385171, upload-time = "2025-10-17T11:29:30.078Z" }, - { url = "https://files.pythonhosted.org/packages/d2/bc/94dd1f3a61f4dc236f787a097360ec061ceeebebf4ea120b924d91391b10/jiter-0.11.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:586cafbd9dd1f3ce6a22b4a085eaa6be578e47ba9b18e198d4333e598a91db2d", size = 518359, upload-time = "2025-10-17T11:29:31.464Z" }, - { url = "https://files.pythonhosted.org/packages/7e/8c/12ee132bd67e25c75f542c227f5762491b9a316b0dad8e929c95076f773c/jiter-0.11.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:677cc2517d437a83bb30019fd4cf7cad74b465914c56ecac3440d597ac135250", size = 509205, upload-time = "2025-10-17T11:29:32.895Z" }, - { url = "https://files.pythonhosted.org/packages/39/d5/9de848928ce341d463c7e7273fce90ea6d0ea4343cd761f451860fa16b59/jiter-0.11.1-cp312-cp312-win32.whl", hash = "sha256:fa992af648fcee2b850a3286a35f62bbbaeddbb6dbda19a00d8fbc846a947b6e", size = 205448, upload-time = "2025-10-17T11:29:34.217Z" }, - { url = "https://files.pythonhosted.org/packages/ee/b0/8002d78637e05009f5e3fb5288f9d57d65715c33b5d6aa20fd57670feef5/jiter-0.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:88b5cae9fa51efeb3d4bd4e52bfd4c85ccc9cac44282e2a9640893a042ba4d87", size = 204285, upload-time = "2025-10-17T11:29:35.446Z" }, - { url = "https://files.pythonhosted.org/packages/9f/a2/bb24d5587e4dff17ff796716542f663deee337358006a80c8af43ddc11e5/jiter-0.11.1-cp312-cp312-win_arm64.whl", hash = "sha256:9a6cae1ab335551917f882f2c3c1efe7617b71b4c02381e4382a8fc80a02588c", size = 188712, upload-time = "2025-10-17T11:29:37.027Z" }, - { url = "https://files.pythonhosted.org/packages/9d/51/bd41562dd284e2a18b6dc0a99d195fd4a3560d52ab192c42e56fe0316643/jiter-0.11.1-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:e642b5270e61dd02265866398707f90e365b5db2eb65a4f30c789d826682e1f6", size = 306871, upload-time = "2025-10-17T11:31:03.616Z" }, - { url = "https://files.pythonhosted.org/packages/ba/cb/64e7f21dd357e8cd6b3c919c26fac7fc198385bbd1d85bb3b5355600d787/jiter-0.11.1-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:464ba6d000585e4e2fd1e891f31f1231f497273414f5019e27c00a4b8f7a24ad", size = 301454, upload-time = "2025-10-17T11:31:05.338Z" }, - { url = "https://files.pythonhosted.org/packages/55/b0/54bdc00da4ef39801b1419a01035bd8857983de984fd3776b0be6b94add7/jiter-0.11.1-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:055568693ab35e0bf3a171b03bb40b2dcb10352359e0ab9b5ed0da2bf1eb6f6f", size = 336801, upload-time = "2025-10-17T11:31:06.893Z" }, - { url = "https://files.pythonhosted.org/packages/de/8f/87176ed071d42e9db415ed8be787ef4ef31a4fa27f52e6a4fbf34387bd28/jiter-0.11.1-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0c69ea798d08a915ba4478113efa9e694971e410056392f4526d796f136d3fa", size = 343452, upload-time = "2025-10-17T11:31:08.259Z" }, - { url = "https://files.pythonhosted.org/packages/a6/bc/950dd7f170c6394b6fdd73f989d9e729bd98907bcc4430ef080a72d06b77/jiter-0.11.1-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:0d4d6993edc83cf75e8c6828a8d6ce40a09ee87e38c7bfba6924f39e1337e21d", size = 302626, upload-time = "2025-10-17T11:31:09.645Z" }, - { url = "https://files.pythonhosted.org/packages/3a/65/43d7971ca82ee100b7b9b520573eeef7eabc0a45d490168ebb9a9b5bb8b2/jiter-0.11.1-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:f78d151c83a87a6cf5461d5ee55bc730dd9ae227377ac6f115b922989b95f838", size = 297034, upload-time = "2025-10-17T11:31:10.975Z" }, - { url = "https://files.pythonhosted.org/packages/19/4c/000e1e0c0c67e96557a279f8969487ea2732d6c7311698819f977abae837/jiter-0.11.1-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9022974781155cd5521d5cb10997a03ee5e31e8454c9d999dcdccd253f2353f", size = 337328, upload-time = "2025-10-17T11:31:12.399Z" }, - { url = "https://files.pythonhosted.org/packages/d9/71/71408b02c6133153336d29fa3ba53000f1e1a3f78bb2fc2d1a1865d2e743/jiter-0.11.1-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18c77aaa9117510d5bdc6a946baf21b1f0cfa58ef04d31c8d016f206f2118960", size = 343697, upload-time = "2025-10-17T11:31:13.773Z" }, + { url = "https://files.pythonhosted.org/packages/32/f9/eaca4633486b527ebe7e681c431f529b63fe2709e7c5242fc0f43f77ce63/jiter-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8f8a7e317190b2c2d60eb2e8aa835270b008139562d70fe732e1c0020ec53c9", size = 316435, upload-time = "2025-11-09T20:47:02.087Z" }, + { url = "https://files.pythonhosted.org/packages/10/c1/40c9f7c22f5e6ff715f28113ebaba27ab85f9af2660ad6e1dd6425d14c19/jiter-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2218228a077e784c6c8f1a8e5d6b8cb1dea62ce25811c356364848554b2056cd", size = 320548, upload-time = "2025-11-09T20:47:03.409Z" }, + { url = "https://files.pythonhosted.org/packages/6b/1b/efbb68fe87e7711b00d2cfd1f26bb4bfc25a10539aefeaa7727329ffb9cb/jiter-0.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9354ccaa2982bf2188fd5f57f79f800ef622ec67beb8329903abf6b10da7d423", size = 351915, upload-time = "2025-11-09T20:47:05.171Z" }, + { url = "https://files.pythonhosted.org/packages/15/2d/c06e659888c128ad1e838123d0638f0efad90cc30860cb5f74dd3f2fc0b3/jiter-0.12.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8f2607185ea89b4af9a604d4c7ec40e45d3ad03ee66998b031134bc510232bb7", size = 368966, upload-time = "2025-11-09T20:47:06.508Z" }, + { url = "https://files.pythonhosted.org/packages/6b/20/058db4ae5fb07cf6a4ab2e9b9294416f606d8e467fb74c2184b2a1eeacba/jiter-0.12.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3a585a5e42d25f2e71db5f10b171f5e5ea641d3aa44f7df745aa965606111cc2", size = 482047, upload-time = "2025-11-09T20:47:08.382Z" }, + { url = "https://files.pythonhosted.org/packages/49/bb/dc2b1c122275e1de2eb12905015d61e8316b2f888bdaac34221c301495d6/jiter-0.12.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd9e21d34edff5a663c631f850edcb786719c960ce887a5661e9c828a53a95d9", size = 380835, upload-time = "2025-11-09T20:47:09.81Z" }, + { url = "https://files.pythonhosted.org/packages/23/7d/38f9cd337575349de16da575ee57ddb2d5a64d425c9367f5ef9e4612e32e/jiter-0.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a612534770470686cd5431478dc5a1b660eceb410abade6b1b74e320ca98de6", size = 364587, upload-time = "2025-11-09T20:47:11.529Z" }, + { url = "https://files.pythonhosted.org/packages/f0/a3/b13e8e61e70f0bb06085099c4e2462647f53cc2ca97614f7fedcaa2bb9f3/jiter-0.12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3985aea37d40a908f887b34d05111e0aae822943796ebf8338877fee2ab67725", size = 390492, upload-time = "2025-11-09T20:47:12.993Z" }, + { url = "https://files.pythonhosted.org/packages/07/71/e0d11422ed027e21422f7bc1883c61deba2d9752b720538430c1deadfbca/jiter-0.12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b1207af186495f48f72529f8d86671903c8c10127cac6381b11dddc4aaa52df6", size = 522046, upload-time = "2025-11-09T20:47:14.6Z" }, + { url = "https://files.pythonhosted.org/packages/9f/59/b968a9aa7102a8375dbbdfbd2aeebe563c7e5dddf0f47c9ef1588a97e224/jiter-0.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ef2fb241de583934c9915a33120ecc06d94aa3381a134570f59eed784e87001e", size = 513392, upload-time = "2025-11-09T20:47:16.011Z" }, + { url = "https://files.pythonhosted.org/packages/ca/e4/7df62002499080dbd61b505c5cb351aa09e9959d176cac2aa8da6f93b13b/jiter-0.12.0-cp311-cp311-win32.whl", hash = "sha256:453b6035672fecce8007465896a25b28a6b59cfe8fbc974b2563a92f5a92a67c", size = 206096, upload-time = "2025-11-09T20:47:17.344Z" }, + { url = "https://files.pythonhosted.org/packages/bb/60/1032b30ae0572196b0de0e87dce3b6c26a1eff71aad5fe43dee3082d32e0/jiter-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:ca264b9603973c2ad9435c71a8ec8b49f8f715ab5ba421c85a51cde9887e421f", size = 204899, upload-time = "2025-11-09T20:47:19.365Z" }, + { url = "https://files.pythonhosted.org/packages/49/d5/c145e526fccdb834063fb45c071df78b0cc426bbaf6de38b0781f45d956f/jiter-0.12.0-cp311-cp311-win_arm64.whl", hash = "sha256:cb00ef392e7d684f2754598c02c409f376ddcef857aae796d559e6cacc2d78a5", size = 188070, upload-time = "2025-11-09T20:47:20.75Z" }, + { url = "https://files.pythonhosted.org/packages/92/c9/5b9f7b4983f1b542c64e84165075335e8a236fa9e2ea03a0c79780062be8/jiter-0.12.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:305e061fa82f4680607a775b2e8e0bcb071cd2205ac38e6ef48c8dd5ebe1cf37", size = 314449, upload-time = "2025-11-09T20:47:22.999Z" }, + { url = "https://files.pythonhosted.org/packages/98/6e/e8efa0e78de00db0aee82c0cf9e8b3f2027efd7f8a71f859d8f4be8e98ef/jiter-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c1860627048e302a528333c9307c818c547f214d8659b0705d2195e1a94b274", size = 319855, upload-time = "2025-11-09T20:47:24.779Z" }, + { url = "https://files.pythonhosted.org/packages/20/26/894cd88e60b5d58af53bec5c6759d1292bd0b37a8b5f60f07abf7a63ae5f/jiter-0.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df37577a4f8408f7e0ec3205d2a8f87672af8f17008358063a4d6425b6081ce3", size = 350171, upload-time = "2025-11-09T20:47:26.469Z" }, + { url = "https://files.pythonhosted.org/packages/f5/27/a7b818b9979ac31b3763d25f3653ec3a954044d5e9f5d87f2f247d679fd1/jiter-0.12.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:75fdd787356c1c13a4f40b43c2156276ef7a71eb487d98472476476d803fb2cf", size = 365590, upload-time = "2025-11-09T20:47:27.918Z" }, + { url = "https://files.pythonhosted.org/packages/ba/7e/e46195801a97673a83746170b17984aa8ac4a455746354516d02ca5541b4/jiter-0.12.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1eb5db8d9c65b112aacf14fcd0faae9913d07a8afea5ed06ccdd12b724e966a1", size = 479462, upload-time = "2025-11-09T20:47:29.654Z" }, + { url = "https://files.pythonhosted.org/packages/ca/75/f833bfb009ab4bd11b1c9406d333e3b4357709ed0570bb48c7c06d78c7dd/jiter-0.12.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:73c568cc27c473f82480abc15d1301adf333a7ea4f2e813d6a2c7d8b6ba8d0df", size = 378983, upload-time = "2025-11-09T20:47:31.026Z" }, + { url = "https://files.pythonhosted.org/packages/71/b3/7a69d77943cc837d30165643db753471aff5df39692d598da880a6e51c24/jiter-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4321e8a3d868919bcb1abb1db550d41f2b5b326f72df29e53b2df8b006eb9403", size = 361328, upload-time = "2025-11-09T20:47:33.286Z" }, + { url = "https://files.pythonhosted.org/packages/b0/ac/a78f90caf48d65ba70d8c6efc6f23150bc39dc3389d65bbec2a95c7bc628/jiter-0.12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a51bad79f8cc9cac2b4b705039f814049142e0050f30d91695a2d9a6611f126", size = 386740, upload-time = "2025-11-09T20:47:34.703Z" }, + { url = "https://files.pythonhosted.org/packages/39/b6/5d31c2cc8e1b6a6bcf3c5721e4ca0a3633d1ab4754b09bc7084f6c4f5327/jiter-0.12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2a67b678f6a5f1dd6c36d642d7db83e456bc8b104788262aaefc11a22339f5a9", size = 520875, upload-time = "2025-11-09T20:47:36.058Z" }, + { url = "https://files.pythonhosted.org/packages/30/b5/4df540fae4e9f68c54b8dab004bd8c943a752f0b00efd6e7d64aa3850339/jiter-0.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efe1a211fe1fd14762adea941e3cfd6c611a136e28da6c39272dbb7a1bbe6a86", size = 511457, upload-time = "2025-11-09T20:47:37.932Z" }, + { url = "https://files.pythonhosted.org/packages/07/65/86b74010e450a1a77b2c1aabb91d4a91dd3cd5afce99f34d75fd1ac64b19/jiter-0.12.0-cp312-cp312-win32.whl", hash = "sha256:d779d97c834b4278276ec703dc3fc1735fca50af63eb7262f05bdb4e62203d44", size = 204546, upload-time = "2025-11-09T20:47:40.47Z" }, + { url = "https://files.pythonhosted.org/packages/1c/c7/6659f537f9562d963488e3e55573498a442503ced01f7e169e96a6110383/jiter-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e8269062060212b373316fe69236096aaf4c49022d267c6736eebd66bbbc60bb", size = 205196, upload-time = "2025-11-09T20:47:41.794Z" }, + { url = "https://files.pythonhosted.org/packages/21/f4/935304f5169edadfec7f9c01eacbce4c90bb9a82035ac1de1f3bd2d40be6/jiter-0.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:06cb970936c65de926d648af0ed3d21857f026b1cf5525cb2947aa5e01e05789", size = 186100, upload-time = "2025-11-09T20:47:43.007Z" }, + { url = "https://files.pythonhosted.org/packages/fe/54/5339ef1ecaa881c6948669956567a64d2670941925f245c434f494ffb0e5/jiter-0.12.0-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:4739a4657179ebf08f85914ce50332495811004cc1747852e8b2041ed2aab9b8", size = 311144, upload-time = "2025-11-09T20:49:10.503Z" }, + { url = "https://files.pythonhosted.org/packages/27/74/3446c652bffbd5e81ab354e388b1b5fc1d20daac34ee0ed11ff096b1b01a/jiter-0.12.0-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:41da8def934bf7bec16cb24bd33c0ca62126d2d45d81d17b864bd5ad721393c3", size = 305877, upload-time = "2025-11-09T20:49:12.269Z" }, + { url = "https://files.pythonhosted.org/packages/a1/f4/ed76ef9043450f57aac2d4fbeb27175aa0eb9c38f833be6ef6379b3b9a86/jiter-0.12.0-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c44ee814f499c082e69872d426b624987dbc5943ab06e9bbaa4f81989fdb79e", size = 340419, upload-time = "2025-11-09T20:49:13.803Z" }, + { url = "https://files.pythonhosted.org/packages/21/01/857d4608f5edb0664aa791a3d45702e1a5bcfff9934da74035e7b9803846/jiter-0.12.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd2097de91cf03eaa27b3cbdb969addf83f0179c6afc41bbc4513705e013c65d", size = 347212, upload-time = "2025-11-09T20:49:15.643Z" }, + { url = "https://files.pythonhosted.org/packages/cb/f5/12efb8ada5f5c9edc1d4555fe383c1fb2eac05ac5859258a72d61981d999/jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:e8547883d7b96ef2e5fe22b88f8a4c8725a56e7f4abafff20fd5272d634c7ecb", size = 309974, upload-time = "2025-11-09T20:49:17.187Z" }, + { url = "https://files.pythonhosted.org/packages/85/15/d6eb3b770f6a0d332675141ab3962fd4a7c270ede3515d9f3583e1d28276/jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:89163163c0934854a668ed783a2546a0617f71706a2551a4a0666d91ab365d6b", size = 304233, upload-time = "2025-11-09T20:49:18.734Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3e/e7e06743294eea2cf02ced6aa0ff2ad237367394e37a0e2b4a1108c67a36/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d96b264ab7d34bbb2312dedc47ce07cd53f06835eacbc16dde3761f47c3a9e7f", size = 338537, upload-time = "2025-11-09T20:49:20.317Z" }, + { url = "https://files.pythonhosted.org/packages/2f/9c/6753e6522b8d0ef07d3a3d239426669e984fb0eba15a315cdbc1253904e4/jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24e864cb30ab82311c6425655b0cdab0a98c5d973b065c66a3f020740c2324c", size = 346110, upload-time = "2025-11-09T20:49:21.817Z" }, ] [[package]] @@ -3005,11 +2996,11 @@ wheels = [ [[package]] name = "json-repair" -version = "0.52.3" +version = "0.53.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7e/f4/30c46f60c2f87d02ae0ab40347cfb30dc4906b6c8aa8157730fe7d959041/json_repair-0.52.3.tar.gz", hash = "sha256:5e6a6e4df41eb4ad2d316312940095964964f98c5f754583a0ae6efa4d6a613d", size = 35572, upload-time = "2025-10-22T04:37:33.879Z" } +sdist = { url = "https://files.pythonhosted.org/packages/69/9c/be1d84106529aeacbe6151c1e1dc202f5a5cfa0d9bac748d4a1039ebb913/json_repair-0.53.0.tar.gz", hash = "sha256:97fcbf1eea0bbcf6d5cc94befc573623ab4bbba6abdc394cfd3b933a2571266d", size = 36204, upload-time = "2025-11-08T13:45:15.807Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8c/cd/20e2aa73c9b937fbad7fe42624e1bc2552a2d589be9f0adbd7d98d06ef2a/json_repair-0.52.3-py3-none-any.whl", hash = "sha256:cf8affd088ccd7a3c196dad7f6152c005f126f63a614b9ea5d0c1dfa7f5d7186", size = 26522, upload-time = "2025-10-22T04:37:32.44Z" }, + { url = "https://files.pythonhosted.org/packages/ba/49/e588ec59b64222c8d38585f9ceffbf71870c3cbfb2873e53297c4f4afd0b/json_repair-0.53.0-py3-none-any.whl", hash = "sha256:17f7439e41ae39964e1d678b1def38cb8ec43d607340564acf3e62d8ce47a727", size = 27404, upload-time = "2025-11-08T13:45:14.464Z" }, ] [[package]] @@ -3227,26 +3218,26 @@ wheels = [ [[package]] name = "lz4" -version = "4.4.4" +version = "4.4.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c6/5a/945f5086326d569f14c84ac6f7fcc3229f0b9b1e8cc536b951fd53dfb9e1/lz4-4.4.4.tar.gz", hash = "sha256:070fd0627ec4393011251a094e08ed9fdcc78cb4e7ab28f507638eee4e39abda", size = 171884, upload-time = "2025-04-01T22:55:58.62Z" } +sdist = { url = "https://files.pythonhosted.org/packages/57/51/f1b86d93029f418033dddf9b9f79c8d2641e7454080478ee2aab5123173e/lz4-4.4.5.tar.gz", hash = "sha256:5f0b9e53c1e82e88c10d7c180069363980136b9d7a8306c4dca4f760d60c39f0", size = 172886, upload-time = "2025-11-03T13:02:36.061Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/28/e8/63843dc5ecb1529eb38e1761ceed04a0ad52a9ad8929ab8b7930ea2e4976/lz4-4.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ddfc7194cd206496c445e9e5b0c47f970ce982c725c87bd22de028884125b68f", size = 220898, upload-time = "2025-04-01T22:55:23.085Z" }, - { url = "https://files.pythonhosted.org/packages/e4/94/c53de5f07c7dc11cf459aab2a1d754f5df5f693bfacbbe1e4914bfd02f1e/lz4-4.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:714f9298c86f8e7278f1c6af23e509044782fa8220eb0260f8f8f1632f820550", size = 189685, upload-time = "2025-04-01T22:55:24.413Z" }, - { url = "https://files.pythonhosted.org/packages/fe/59/c22d516dd0352f2a3415d1f665ccef2f3e74ecec3ca6a8f061a38f97d50d/lz4-4.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8474c91de47733856c6686df3c4aca33753741da7e757979369c2c0d32918ba", size = 1239225, upload-time = "2025-04-01T22:55:25.737Z" }, - { url = "https://files.pythonhosted.org/packages/81/af/665685072e71f3f0e626221b7922867ec249cd8376aca761078c8f11f5da/lz4-4.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80dd27d7d680ea02c261c226acf1d41de2fd77af4fb2da62b278a9376e380de0", size = 1265881, upload-time = "2025-04-01T22:55:26.817Z" }, - { url = "https://files.pythonhosted.org/packages/90/04/b4557ae381d3aa451388a29755cc410066f5e2f78c847f66f154f4520a68/lz4-4.4.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9b7d6dddfd01b49aedb940fdcaf32f41dc58c926ba35f4e31866aeec2f32f4f4", size = 1185593, upload-time = "2025-04-01T22:55:27.896Z" }, - { url = "https://files.pythonhosted.org/packages/7b/e4/03636979f4e8bf92c557f998ca98ee4e6ef92e92eaf0ed6d3c7f2524e790/lz4-4.4.4-cp311-cp311-win32.whl", hash = "sha256:4134b9fd70ac41954c080b772816bb1afe0c8354ee993015a83430031d686a4c", size = 88259, upload-time = "2025-04-01T22:55:29.03Z" }, - { url = "https://files.pythonhosted.org/packages/07/f0/9efe53b4945441a5d2790d455134843ad86739855b7e6199977bf6dc8898/lz4-4.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:f5024d3ca2383470f7c4ef4d0ed8eabad0b22b23eeefde1c192cf1a38d5e9f78", size = 99916, upload-time = "2025-04-01T22:55:29.933Z" }, - { url = "https://files.pythonhosted.org/packages/87/c8/1675527549ee174b9e1db089f7ddfbb962a97314657269b1e0344a5eaf56/lz4-4.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:6ea715bb3357ea1665f77874cf8f55385ff112553db06f3742d3cdcec08633f7", size = 89741, upload-time = "2025-04-01T22:55:31.184Z" }, - { url = "https://files.pythonhosted.org/packages/f7/2d/5523b4fabe11cd98f040f715728d1932eb7e696bfe94391872a823332b94/lz4-4.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:23ae267494fdd80f0d2a131beff890cf857f1b812ee72dbb96c3204aab725553", size = 220669, upload-time = "2025-04-01T22:55:32.032Z" }, - { url = "https://files.pythonhosted.org/packages/91/06/1a5bbcacbfb48d8ee5b6eb3fca6aa84143a81d92946bdb5cd6b005f1863e/lz4-4.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fff9f3a1ed63d45cb6514bfb8293005dc4141341ce3500abdfeb76124c0b9b2e", size = 189661, upload-time = "2025-04-01T22:55:33.413Z" }, - { url = "https://files.pythonhosted.org/packages/fa/08/39eb7ac907f73e11a69a11576a75a9e36406b3241c0ba41453a7eb842abb/lz4-4.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ea7f07329f85a8eda4d8cf937b87f27f0ac392c6400f18bea2c667c8b7f8ecc", size = 1238775, upload-time = "2025-04-01T22:55:34.835Z" }, - { url = "https://files.pythonhosted.org/packages/e9/26/05840fbd4233e8d23e88411a066ab19f1e9de332edddb8df2b6a95c7fddc/lz4-4.4.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ccab8f7f7b82f9fa9fc3b0ba584d353bd5aa818d5821d77d5b9447faad2aaad", size = 1265143, upload-time = "2025-04-01T22:55:35.933Z" }, - { url = "https://files.pythonhosted.org/packages/b7/5d/5f2db18c298a419932f3ab2023deb689863cf8fd7ed875b1c43492479af2/lz4-4.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e43e9d48b2daf80e486213128b0763deed35bbb7a59b66d1681e205e1702d735", size = 1185032, upload-time = "2025-04-01T22:55:37.454Z" }, - { url = "https://files.pythonhosted.org/packages/c4/e6/736ab5f128694b0f6aac58343bcf37163437ac95997276cd0be3ea4c3342/lz4-4.4.4-cp312-cp312-win32.whl", hash = "sha256:33e01e18e4561b0381b2c33d58e77ceee850a5067f0ece945064cbaac2176962", size = 88284, upload-time = "2025-04-01T22:55:38.536Z" }, - { url = "https://files.pythonhosted.org/packages/40/b8/243430cb62319175070e06e3a94c4c7bd186a812e474e22148ae1290d47d/lz4-4.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d21d1a2892a2dcc193163dd13eaadabb2c1b803807a5117d8f8588b22eaf9f12", size = 99918, upload-time = "2025-04-01T22:55:39.628Z" }, - { url = "https://files.pythonhosted.org/packages/6c/e1/0686c91738f3e6c2e1a243e0fdd4371667c4d2e5009b0a3605806c2aa020/lz4-4.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:2f4f2965c98ab254feddf6b5072854a6935adab7bc81412ec4fe238f07b85f62", size = 89736, upload-time = "2025-04-01T22:55:40.5Z" }, + { url = "https://files.pythonhosted.org/packages/93/5b/6edcd23319d9e28b1bedf32768c3d1fd56eed8223960a2c47dacd2cec2af/lz4-4.4.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d6da84a26b3aa5da13a62e4b89ab36a396e9327de8cd48b436a3467077f8ccd4", size = 207391, upload-time = "2025-11-03T13:01:36.644Z" }, + { url = "https://files.pythonhosted.org/packages/34/36/5f9b772e85b3d5769367a79973b8030afad0d6b724444083bad09becd66f/lz4-4.4.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:61d0ee03e6c616f4a8b69987d03d514e8896c8b1b7cc7598ad029e5c6aedfd43", size = 207146, upload-time = "2025-11-03T13:01:37.928Z" }, + { url = "https://files.pythonhosted.org/packages/04/f4/f66da5647c0d72592081a37c8775feacc3d14d2625bbdaabd6307c274565/lz4-4.4.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:33dd86cea8375d8e5dd001e41f321d0a4b1eb7985f39be1b6a4f466cd480b8a7", size = 1292623, upload-time = "2025-11-03T13:01:39.341Z" }, + { url = "https://files.pythonhosted.org/packages/85/fc/5df0f17467cdda0cad464a9197a447027879197761b55faad7ca29c29a04/lz4-4.4.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:609a69c68e7cfcfa9d894dc06be13f2e00761485b62df4e2472f1b66f7b405fb", size = 1279982, upload-time = "2025-11-03T13:01:40.816Z" }, + { url = "https://files.pythonhosted.org/packages/25/3b/b55cb577aa148ed4e383e9700c36f70b651cd434e1c07568f0a86c9d5fbb/lz4-4.4.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75419bb1a559af00250b8f1360d508444e80ed4b26d9d40ec5b09fe7875cb989", size = 1368674, upload-time = "2025-11-03T13:01:42.118Z" }, + { url = "https://files.pythonhosted.org/packages/fb/31/e97e8c74c59ea479598e5c55cbe0b1334f03ee74ca97726e872944ed42df/lz4-4.4.5-cp311-cp311-win32.whl", hash = "sha256:12233624f1bc2cebc414f9efb3113a03e89acce3ab6f72035577bc61b270d24d", size = 88168, upload-time = "2025-11-03T13:01:43.282Z" }, + { url = "https://files.pythonhosted.org/packages/18/47/715865a6c7071f417bef9b57c8644f29cb7a55b77742bd5d93a609274e7e/lz4-4.4.5-cp311-cp311-win_amd64.whl", hash = "sha256:8a842ead8ca7c0ee2f396ca5d878c4c40439a527ebad2b996b0444f0074ed004", size = 99491, upload-time = "2025-11-03T13:01:44.167Z" }, + { url = "https://files.pythonhosted.org/packages/14/e7/ac120c2ca8caec5c945e6356ada2aa5cfabd83a01e3170f264a5c42c8231/lz4-4.4.5-cp311-cp311-win_arm64.whl", hash = "sha256:83bc23ef65b6ae44f3287c38cbf82c269e2e96a26e560aa551735883388dcc4b", size = 91271, upload-time = "2025-11-03T13:01:45.016Z" }, + { url = "https://files.pythonhosted.org/packages/1b/ac/016e4f6de37d806f7cc8f13add0a46c9a7cfc41a5ddc2bc831d7954cf1ce/lz4-4.4.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:df5aa4cead2044bab83e0ebae56e0944cc7fcc1505c7787e9e1057d6d549897e", size = 207163, upload-time = "2025-11-03T13:01:45.895Z" }, + { url = "https://files.pythonhosted.org/packages/8d/df/0fadac6e5bd31b6f34a1a8dbd4db6a7606e70715387c27368586455b7fc9/lz4-4.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6d0bf51e7745484d2092b3a51ae6eb58c3bd3ce0300cf2b2c14f76c536d5697a", size = 207150, upload-time = "2025-11-03T13:01:47.205Z" }, + { url = "https://files.pythonhosted.org/packages/b7/17/34e36cc49bb16ca73fb57fbd4c5eaa61760c6b64bce91fcb4e0f4a97f852/lz4-4.4.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7b62f94b523c251cf32aa4ab555f14d39bd1a9df385b72443fd76d7c7fb051f5", size = 1292045, upload-time = "2025-11-03T13:01:48.667Z" }, + { url = "https://files.pythonhosted.org/packages/90/1c/b1d8e3741e9fc89ed3b5f7ef5f22586c07ed6bb04e8343c2e98f0fa7ff04/lz4-4.4.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2c3ea562c3af274264444819ae9b14dbbf1ab070aff214a05e97db6896c7597e", size = 1279546, upload-time = "2025-11-03T13:01:50.159Z" }, + { url = "https://files.pythonhosted.org/packages/55/d9/e3867222474f6c1b76e89f3bd914595af69f55bf2c1866e984c548afdc15/lz4-4.4.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24092635f47538b392c4eaeff14c7270d2c8e806bf4be2a6446a378591c5e69e", size = 1368249, upload-time = "2025-11-03T13:01:51.273Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e7/d667d337367686311c38b580d1ca3d5a23a6617e129f26becd4f5dc458df/lz4-4.4.5-cp312-cp312-win32.whl", hash = "sha256:214e37cfe270948ea7eb777229e211c601a3e0875541c1035ab408fbceaddf50", size = 88189, upload-time = "2025-11-03T13:01:52.605Z" }, + { url = "https://files.pythonhosted.org/packages/a5/0b/a54cd7406995ab097fceb907c7eb13a6ddd49e0b231e448f1a81a50af65c/lz4-4.4.5-cp312-cp312-win_amd64.whl", hash = "sha256:713a777de88a73425cf08eb11f742cd2c98628e79a8673d6a52e3c5f0c116f33", size = 99497, upload-time = "2025-11-03T13:01:53.477Z" }, + { url = "https://files.pythonhosted.org/packages/6a/7e/dc28a952e4bfa32ca16fa2eb026e7a6ce5d1411fcd5986cd08c74ec187b9/lz4-4.4.5-cp312-cp312-win_arm64.whl", hash = "sha256:a88cbb729cc333334ccfb52f070463c21560fca63afcf636a9f160a55fac3301", size = 91279, upload-time = "2025-11-03T13:01:54.419Z" }, ] [[package]] @@ -3509,14 +3500,14 @@ wheels = [ [[package]] name = "mypy-boto3-bedrock-runtime" -version = "1.40.41" +version = "1.40.62" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions", marker = "python_full_version < '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c7/38/79989f7bce998776ed1a01c17f3f58e7bc6f5fc2bcbdff929701526fa2f1/mypy_boto3_bedrock_runtime-1.40.41.tar.gz", hash = "sha256:ee9bda6d6d478c8d0995e84e884bdf1798e150d437974ae27c175774a58ffaa5", size = 28333, upload-time = "2025-09-29T19:26:04.804Z" } +sdist = { url = "https://files.pythonhosted.org/packages/51/d0/ca3c58a1284f9142959fb00889322d4889278c2e4b165350d8e294c07d9c/mypy_boto3_bedrock_runtime-1.40.62.tar.gz", hash = "sha256:5505a60e2b5f9c845ee4778366d49c93c3723f6790d0cec116d8fc5f5609d846", size = 28611, upload-time = "2025-10-29T21:43:02.599Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3d/6c/d3431dadf473bb76aa590b1ed8cc91726a48b029b542eff9d3024f2d70b9/mypy_boto3_bedrock_runtime-1.40.41-py3-none-any.whl", hash = "sha256:d65dff200986ff06c6b3579ddcea102555f2067c8987fca379bf4f9ed8ba3121", size = 34181, upload-time = "2025-09-29T19:26:01.898Z" }, + { url = "https://files.pythonhosted.org/packages/4b/c5/ad62e5f80684ce5fe878d320634189ef29d00ee294cd62a37f3e51719f47/mypy_boto3_bedrock_runtime-1.40.62-py3-none-any.whl", hash = "sha256:e383e70b5dffb0b335b49fc1b2772f0d35118f99994bc7e731445ba0ab237831", size = 34497, upload-time = "2025-10-29T21:43:01.591Z" }, ] [[package]] @@ -3547,15 +3538,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/e1/45373c06781340c7b74fe9b88b85278ac05321889a307eaa5be079a997d4/mysql_connector_python-9.5.0-py2.py3-none-any.whl", hash = "sha256:ace137b88eb6fdafa1e5b2e03ac76ce1b8b1844b3a4af1192a02ae7c1a45bdee", size = 479047, upload-time = "2025-10-22T09:02:27.809Z" }, ] -[[package]] -name = "nest-asyncio" -version = "1.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/83/f8/51569ac65d696c8ecbee95938f89d4abf00f47d58d48f6fbabfe8f0baefe/nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", size = 7418, upload-time = "2024-01-21T14:25:19.227Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" }, -] - [[package]] name = "networkx" version = "3.5" @@ -3735,7 +3717,7 @@ wheels = [ [[package]] name = "openai" -version = "2.6.1" +version = "2.7.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -3747,9 +3729,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c4/44/303deb97be7c1c9b53118b52825cbd1557aeeff510f3a52566b1fa66f6a2/openai-2.6.1.tar.gz", hash = "sha256:27ae704d190615fca0c0fc2b796a38f8b5879645a3a52c9c453b23f97141bb49", size = 593043, upload-time = "2025-10-24T13:29:52.79Z" } +sdist = { url = "https://files.pythonhosted.org/packages/71/e3/cec27fa28ef36c4ccea71e9e8c20be9b8539618732989a82027575aab9d4/openai-2.7.2.tar.gz", hash = "sha256:082ef61163074d8efad0035dd08934cf5e3afd37254f70fc9165dd6a8c67dcbd", size = 595732, upload-time = "2025-11-10T16:42:31.108Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/15/0e/331df43df633e6105ff9cf45e0ce57762bd126a45ac16b25a43f6738d8a2/openai-2.6.1-py3-none-any.whl", hash = "sha256:904e4b5254a8416746a2f05649594fa41b19d799843cd134dac86167e094edef", size = 1005551, upload-time = "2025-10-24T13:29:50.973Z" }, + { url = "https://files.pythonhosted.org/packages/25/66/22cfe4b695b5fd042931b32c67d685e867bfd169ebf46036b95b57314c33/openai-2.7.2-py3-none-any.whl", hash = "sha256:116f522f4427f8a0a59b51655a356da85ce092f3ed6abeca65f03c8be6e073d9", size = 1008375, upload-time = "2025-11-10T16:42:28.574Z" }, ] [[package]] @@ -3770,7 +3752,7 @@ wheels = [ [[package]] name = "openinference-instrumentation" -version = "0.1.41" +version = "0.1.42" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "openinference-semantic-conventions" }, @@ -3778,18 +3760,18 @@ dependencies = [ { name = "opentelemetry-sdk" }, { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fb/1d/e40227fb6fd0683bdf47c8fed93c15b42f878a0054ef3a09c30dee9b5496/openinference_instrumentation-0.1.41.tar.gz", hash = "sha256:95beaebf8cd2729a4681aaf96a607657c9ed4d4bab6a2fce25e2e1c20850e224", size = 23835, upload-time = "2025-10-20T13:56:00.935Z" } +sdist = { url = "https://files.pythonhosted.org/packages/00/d0/b19061a21fd6127d2857c77744a36073bba9c1502d1d5e8517b708eb8b7c/openinference_instrumentation-0.1.42.tar.gz", hash = "sha256:2275babc34022e151b5492cfba41d3b12e28377f8e08cb45e5d64fe2d9d7fe37", size = 23954, upload-time = "2025-11-05T01:37:46.869Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/9f/09b901e2987f2742cc449694514be3423fd5af71d2f6ffbc5c4e157328c9/openinference_instrumentation-0.1.41-py3-none-any.whl", hash = "sha256:bc206d561ae2b745383a3f18bfc4efba3fc40617e7fee67015924852942704f9", size = 29967, upload-time = "2025-10-20T13:56:00.09Z" }, + { url = "https://files.pythonhosted.org/packages/c3/71/43ee4616fc95dbd2f560550f199c6652a5eb93f84e8aa0039bc95c19cfe0/openinference_instrumentation-0.1.42-py3-none-any.whl", hash = "sha256:e7521ff90833ef7cc65db526a2f59b76a496180abeaaee30ec6abbbc0b43f8ec", size = 30086, upload-time = "2025-11-05T01:37:43.866Z" }, ] [[package]] name = "openinference-semantic-conventions" -version = "0.1.24" +version = "0.1.25" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/07/15/be7566a4bba4b57f7c70b088f42735f2005e2c0adce646a537f63dcf21de/openinference_semantic_conventions-0.1.24.tar.gz", hash = "sha256:3223b8c3958525457a369d58ebf0c56230a1f00567ae1e99f1c2049a8ac2cacd", size = 12741, upload-time = "2025-10-10T03:49:13.987Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/68/81c8a0b90334ff11e4f285e4934c57f30bea3ef0c0b9f99b65e7b80fae3b/openinference_semantic_conventions-0.1.25.tar.gz", hash = "sha256:f0a8c2cfbd00195d1f362b4803518341e80867d446c2959bf1743f1894fce31d", size = 12767, upload-time = "2025-11-05T01:37:45.89Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9f/c5/fa81b19042b387826151f984a91fa3d0b52b08374e4d5786521ac2d9e704/openinference_semantic_conventions-0.1.24-py3-none-any.whl", hash = "sha256:b2d650ca7e39c5fb02bf908b8049d6ece2a2657757448e1925a38b59548a80b3", size = 10373, upload-time = "2025-10-10T03:49:00.318Z" }, + { url = "https://files.pythonhosted.org/packages/fd/3d/dd14ee2eb8a3f3054249562e76b253a1545c76adbbfd43a294f71acde5c3/openinference_semantic_conventions-0.1.25-py3-none-any.whl", hash = "sha256:3814240f3bd61f05d9562b761de70ee793d55b03bca1634edf57d7a2735af238", size = 10395, upload-time = "2025-11-05T01:37:43.697Z" }, ] [[package]] @@ -4110,7 +4092,7 @@ wheels = [ [[package]] name = "opik" -version = "1.8.87" +version = "1.8.102" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "boto3-stubs", extra = ["bedrock-runtime"] }, @@ -4129,9 +4111,9 @@ dependencies = [ { name = "tqdm" }, { name = "uuid6" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/48/25/8193d792f851a5998ed590378dcf0acb3297b0b3aaefdfb265b72bb25010/opik-1.8.87.tar.gz", hash = "sha256:1fb5cf2b5fa5a7b99805892c3d7331de385fa146dff0846dd4ca3b885d95c729", size = 436750, upload-time = "2025-10-24T11:01:05.454Z" } +sdist = { url = "https://files.pythonhosted.org/packages/30/af/f6382cea86bdfbfd0f9571960a15301da4a6ecd1506070d9252a0c0a7564/opik-1.8.102.tar.gz", hash = "sha256:c836a113e8b7fdf90770a3854dcc859b3c30d6347383d7c11e52971a530ed2c3", size = 490462, upload-time = "2025-11-05T18:54:50.142Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ac/2e/95b470f15bcaa29fe80fd93044bc00b6b1cc6ac60b4d410c6283259439bb/opik-1.8.87-py3-none-any.whl", hash = "sha256:49811008273cdd841d7a11486ff1d9a4d3f59afa7f8a2dc7fde284e5647a435a", size = 807810, upload-time = "2025-10-24T11:01:03.909Z" }, + { url = "https://files.pythonhosted.org/packages/b9/8b/9b15a01f8360201100b9a5d3e0aeeeda57833fca2b16d34b9fada147fc4b/opik-1.8.102-py3-none-any.whl", hash = "sha256:d8501134bf62bf95443de036f6eaa4f66006f81f9b99e0a8a09e21d8be8c1628", size = 885834, upload-time = "2025-11-05T18:54:48.22Z" }, ] [[package]] @@ -4471,7 +4453,7 @@ wheels = [ [[package]] name = "posthog" -version = "6.7.10" +version = "7.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "backoff" }, @@ -4481,9 +4463,9 @@ dependencies = [ { name = "six" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9f/28/b81f52830efa1eb3e22b7b8c39ef60cf964dfc49b3d64ec060691b0d6df5/posthog-6.7.10.tar.gz", hash = "sha256:212512af756e7641e398bf36131ad589790a08cfa80c267895f46e0ec943e547", size = 120427, upload-time = "2025-10-24T13:50:50.565Z" } +sdist = { url = "https://files.pythonhosted.org/packages/15/4d/16d777528149cd0e06306973081b5b070506abcd0fe831c6cb6966260d59/posthog-7.0.0.tar.gz", hash = "sha256:94973227f5fe5e7d656d305ff48c8bff3d505fd1e78b6fcd7ccc9dfe8d3401c2", size = 126504, upload-time = "2025-11-11T18:13:06.986Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/af/c7/fdcd03a3eb71d968c6e8c20f0e011895086aac5d1502ae30b4c1345f8152/posthog-6.7.10-py3-none-any.whl", hash = "sha256:dc0a8dd8ddfa6f721f0066a4e3fb0062c2e19dce3b3ae8875d2a0984f9fd5d59", size = 138703, upload-time = "2025-10-24T13:50:49.285Z" }, + { url = "https://files.pythonhosted.org/packages/ca/9a/dc29b9ff4e5233a3c071b6b4c85dba96f4fcb9169c460bc81abd98555fb3/posthog-7.0.0-py3-none-any.whl", hash = "sha256:676d8a5197a17bf7bd00e31020a5f232988f249f57aab532f0d01c6243835934", size = 144727, upload-time = "2025-11-11T18:13:05.444Z" }, ] [[package]] @@ -4565,16 +4547,16 @@ wheels = [ [[package]] name = "psutil" -version = "7.1.2" +version = "7.1.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cd/ec/7b8e6b9b1d22708138630ef34c53ab2b61032c04f16adfdbb96791c8c70c/psutil-7.1.2.tar.gz", hash = "sha256:aa225cdde1335ff9684708ee8c72650f6598d5ed2114b9a7c5802030b1785018", size = 487424, upload-time = "2025-10-25T10:46:34.931Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e1/88/bdd0a41e5857d5d703287598cbf08dad90aed56774ea52ae071bae9071b6/psutil-7.1.3.tar.gz", hash = "sha256:6c86281738d77335af7aec228328e944b30930899ea760ecf33a4dba66be5e74", size = 489059, upload-time = "2025-11-02T12:25:54.619Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/89/b9f8d47ddbc52d7301fc868e8224e5f44ed3c7f55e6d0f54ecaf5dd9ff5e/psutil-7.1.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c9ba5c19f2d46203ee8c152c7b01df6eec87d883cfd8ee1af2ef2727f6b0f814", size = 237244, upload-time = "2025-10-25T10:47:07.086Z" }, - { url = "https://files.pythonhosted.org/packages/c8/7a/8628c2f6b240680a67d73d8742bb9ff39b1820a693740e43096d5dcb01e5/psutil-7.1.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:2a486030d2fe81bec023f703d3d155f4823a10a47c36784c84f1cc7f8d39bedb", size = 238101, upload-time = "2025-10-25T10:47:09.523Z" }, - { url = "https://files.pythonhosted.org/packages/30/28/5e27f4d5a0e347f8e3cc16cd7d35533dbce086c95807f1f0e9cd77e26c10/psutil-7.1.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3efd8fc791492e7808a51cb2b94889db7578bfaea22df931424f874468e389e3", size = 258675, upload-time = "2025-10-25T10:47:11.082Z" }, - { url = "https://files.pythonhosted.org/packages/e5/5c/79cf60c9acf36d087f0db0f82066fca4a780e97e5b3a2e4c38209c03d170/psutil-7.1.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2aeb9b64f481b8eabfc633bd39e0016d4d8bbcd590d984af764d80bf0851b8a", size = 260203, upload-time = "2025-10-25T10:47:13.226Z" }, - { url = "https://files.pythonhosted.org/packages/f7/03/0a464404c51685dcb9329fdd660b1721e076ccd7b3d97dee066bcc9ffb15/psutil-7.1.2-cp37-abi3-win_amd64.whl", hash = "sha256:8e17852114c4e7996fe9da4745c2bdef001ebbf2f260dec406290e66628bdb91", size = 246714, upload-time = "2025-10-25T10:47:15.093Z" }, - { url = "https://files.pythonhosted.org/packages/6a/32/97ca2090f2f1b45b01b6aa7ae161cfe50671de097311975ca6eea3e7aabc/psutil-7.1.2-cp37-abi3-win_arm64.whl", hash = "sha256:3e988455e61c240cc879cb62a008c2699231bf3e3d061d7fce4234463fd2abb4", size = 243742, upload-time = "2025-10-25T10:47:17.302Z" }, + { url = "https://files.pythonhosted.org/packages/ef/94/46b9154a800253e7ecff5aaacdf8ebf43db99de4a2dfa18575b02548654e/psutil-7.1.3-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2bdbcd0e58ca14996a42adf3621a6244f1bb2e2e528886959c72cf1e326677ab", size = 238359, upload-time = "2025-11-02T12:26:25.284Z" }, + { url = "https://files.pythonhosted.org/packages/68/3a/9f93cff5c025029a36d9a92fef47220ab4692ee7f2be0fba9f92813d0cb8/psutil-7.1.3-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:bc31fa00f1fbc3c3802141eede66f3a2d51d89716a194bf2cd6fc68310a19880", size = 239171, upload-time = "2025-11-02T12:26:27.23Z" }, + { url = "https://files.pythonhosted.org/packages/ce/b1/5f49af514f76431ba4eea935b8ad3725cdeb397e9245ab919dbc1d1dc20f/psutil-7.1.3-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3bb428f9f05c1225a558f53e30ccbad9930b11c3fc206836242de1091d3e7dd3", size = 263261, upload-time = "2025-11-02T12:26:29.48Z" }, + { url = "https://files.pythonhosted.org/packages/e0/95/992c8816a74016eb095e73585d747e0a8ea21a061ed3689474fabb29a395/psutil-7.1.3-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56d974e02ca2c8eb4812c3f76c30e28836fffc311d55d979f1465c1feeb2b68b", size = 264635, upload-time = "2025-11-02T12:26:31.74Z" }, + { url = "https://files.pythonhosted.org/packages/55/4c/c3ed1a622b6ae2fd3c945a366e64eb35247a31e4db16cf5095e269e8eb3c/psutil-7.1.3-cp37-abi3-win_amd64.whl", hash = "sha256:f39c2c19fe824b47484b96f9692932248a54c43799a84282cfe58d05a6449efd", size = 247633, upload-time = "2025-11-02T12:26:33.887Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ad/33b2ccec09bf96c2b2ef3f9a6f66baac8253d7565d8839e024a6b905d45d/psutil-7.1.3-cp37-abi3-win_arm64.whl", hash = "sha256:bd0d69cee829226a761e92f28140bec9a5ee9d5b4fb4b0cc589068dbfff559b1", size = 244608, upload-time = "2025-11-02T12:26:36.136Z" }, ] [[package]] @@ -4594,8 +4576,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/89/3fdb5902bdab8868bbedc1c6e6023a4e08112ceac5db97fc2012060e0c9a/psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2e164359396576a3cc701ba8af4751ae68a07235d7a380c631184a611220d9a4", size = 4410955, upload-time = "2025-10-10T11:11:21.21Z" }, { url = "https://files.pythonhosted.org/packages/ce/24/e18339c407a13c72b336e0d9013fbbbde77b6fd13e853979019a1269519c/psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d57c9c387660b8893093459738b6abddbb30a7eab058b77b0d0d1c7d521ddfd7", size = 4468007, upload-time = "2025-10-10T11:11:24.831Z" }, { url = "https://files.pythonhosted.org/packages/91/7e/b8441e831a0f16c159b5381698f9f7f7ed54b77d57bc9c5f99144cc78232/psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2c226ef95eb2250974bf6fa7a842082b31f68385c4f3268370e3f3870e7859ee", size = 4165012, upload-time = "2025-10-10T11:11:29.51Z" }, + { url = "https://files.pythonhosted.org/packages/0d/61/4aa89eeb6d751f05178a13da95516c036e27468c5d4d2509bb1e15341c81/psycopg2_binary-2.9.11-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a311f1edc9967723d3511ea7d2708e2c3592e3405677bf53d5c7246753591fbb", size = 3981881, upload-time = "2025-10-30T02:55:07.332Z" }, { url = "https://files.pythonhosted.org/packages/76/a1/2f5841cae4c635a9459fe7aca8ed771336e9383b6429e05c01267b0774cf/psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ebb415404821b6d1c47353ebe9c8645967a5235e6d88f914147e7fd411419e6f", size = 3650985, upload-time = "2025-10-10T11:11:34.975Z" }, { url = "https://files.pythonhosted.org/packages/84/74/4defcac9d002bca5709951b975173c8c2fa968e1a95dc713f61b3a8d3b6a/psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f07c9c4a5093258a03b28fab9b4f151aa376989e7f35f855088234e656ee6a94", size = 3296039, upload-time = "2025-10-10T11:11:40.432Z" }, + { url = "https://files.pythonhosted.org/packages/6d/c2/782a3c64403d8ce35b5c50e1b684412cf94f171dc18111be8c976abd2de1/psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:00ce1830d971f43b667abe4a56e42c1e2d594b32da4802e44a73bacacb25535f", size = 3043477, upload-time = "2025-10-30T02:55:11.182Z" }, { url = "https://files.pythonhosted.org/packages/c8/31/36a1d8e702aa35c38fc117c2b8be3f182613faa25d794b8aeaab948d4c03/psycopg2_binary-2.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cffe9d7697ae7456649617e8bb8d7a45afb71cd13f7ab22af3e5c61f04840908", size = 3345842, upload-time = "2025-10-10T11:11:45.366Z" }, { url = "https://files.pythonhosted.org/packages/6e/b4/a5375cda5b54cb95ee9b836930fea30ae5a8f14aa97da7821722323d979b/psycopg2_binary-2.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:304fd7b7f97eef30e91b8f7e720b3db75fee010b520e434ea35ed1ff22501d03", size = 2713894, upload-time = "2025-10-10T11:11:48.775Z" }, { url = "https://files.pythonhosted.org/packages/d8/91/f870a02f51be4a65987b45a7de4c2e1897dd0d01051e2b559a38fa634e3e/psycopg2_binary-2.9.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:be9b840ac0525a283a96b556616f5b4820e0526addb8dcf6525a0fa162730be4", size = 3756603, upload-time = "2025-10-10T11:11:52.213Z" }, @@ -4603,8 +4587,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2d/75/364847b879eb630b3ac8293798e380e441a957c53657995053c5ec39a316/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ab8905b5dcb05bf3fb22e0cf90e10f469563486ffb6a96569e51f897c750a76a", size = 4411159, upload-time = "2025-10-10T11:12:00.49Z" }, { url = "https://files.pythonhosted.org/packages/6f/a0/567f7ea38b6e1c62aafd58375665a547c00c608a471620c0edc364733e13/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:bf940cd7e7fec19181fdbc29d76911741153d51cab52e5c21165f3262125685e", size = 4468234, upload-time = "2025-10-10T11:12:04.892Z" }, { url = "https://files.pythonhosted.org/packages/30/da/4e42788fb811bbbfd7b7f045570c062f49e350e1d1f3df056c3fb5763353/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa0f693d3c68ae925966f0b14b8edda71696608039f4ed61b1fe9ffa468d16db", size = 4166236, upload-time = "2025-10-10T11:12:11.674Z" }, + { url = "https://files.pythonhosted.org/packages/3c/94/c1777c355bc560992af848d98216148be5f1be001af06e06fc49cbded578/psycopg2_binary-2.9.11-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a1cf393f1cdaf6a9b57c0a719a1068ba1069f022a59b8b1fe44b006745b59757", size = 3983083, upload-time = "2025-10-30T02:55:15.73Z" }, { url = "https://files.pythonhosted.org/packages/bd/42/c9a21edf0e3daa7825ed04a4a8588686c6c14904344344a039556d78aa58/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ef7a6beb4beaa62f88592ccc65df20328029d721db309cb3250b0aae0fa146c3", size = 3652281, upload-time = "2025-10-10T11:12:17.713Z" }, { url = "https://files.pythonhosted.org/packages/12/22/dedfbcfa97917982301496b6b5e5e6c5531d1f35dd2b488b08d1ebc52482/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:31b32c457a6025e74d233957cc9736742ac5a6cb196c6b68499f6bb51390bd6a", size = 3298010, upload-time = "2025-10-10T11:12:22.671Z" }, + { url = "https://files.pythonhosted.org/packages/66/ea/d3390e6696276078bd01b2ece417deac954dfdd552d2edc3d03204416c0c/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:edcb3aeb11cb4bf13a2af3c53a15b3d612edeb6409047ea0b5d6a21a9d744b34", size = 3044641, upload-time = "2025-10-30T02:55:19.929Z" }, { url = "https://files.pythonhosted.org/packages/12/9a/0402ded6cbd321da0c0ba7d34dc12b29b14f5764c2fc10750daa38e825fc/psycopg2_binary-2.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b6d93d7c0b61a1dd6197d208ab613eb7dcfdcca0a49c42ceb082257991de9d", size = 3347940, upload-time = "2025-10-10T11:12:26.529Z" }, { url = "https://files.pythonhosted.org/packages/b1/d2/99b55e85832ccde77b211738ff3925a5d73ad183c0b37bcbbe5a8ff04978/psycopg2_binary-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:b33fabeb1fde21180479b2d4667e994de7bbf0eec22832ba5d9b5e4cf65b6c6d", size = 2714147, upload-time = "2025-10-10T11:12:29.535Z" }, ] @@ -4815,7 +4801,7 @@ crypto = [ [[package]] name = "pymilvus" -version = "2.5.16" +version = "2.5.17" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "grpcio" }, @@ -4826,9 +4812,9 @@ dependencies = [ { name = "setuptools" }, { name = "ujson" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/24/e2/5613bc7b2af0ccd760177ca4255243c284cfc0f2cba3f10ff63325c4ca34/pymilvus-2.5.16.tar.gz", hash = "sha256:65f56b81806bc217cca3cf29b70a27d053dea4b1ffada910cf63a38f96381618", size = 1280614, upload-time = "2025-09-19T07:02:14.747Z" } +sdist = { url = "https://files.pythonhosted.org/packages/dc/85/91828a9282bb7f9b210c0a93831979c5829cba5533ac12e87014b6e2208b/pymilvus-2.5.17.tar.gz", hash = "sha256:48ff55db9598e1b4cc25f4fe645b00d64ebcfb03f79f9f741267fc2a35526d43", size = 1281485, upload-time = "2025-11-10T03:24:53.058Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/09/b67a55abee0a53ea50ba0de0cba6e1c0f7ca7ce2c15ffd6f40c059c25e88/pymilvus-2.5.16-py3-none-any.whl", hash = "sha256:76258a324f19c60fee247467e11cd7d6f35a64d2a9c753f5d7b1a5fa15dd6c8a", size = 243272, upload-time = "2025-09-19T07:02:12.443Z" }, + { url = "https://files.pythonhosted.org/packages/59/44/ee0c64617f58c123f570293f36b40f7b56fc123a2aa9573aa22e6ff0fb86/pymilvus-2.5.17-py3-none-any.whl", hash = "sha256:a43d36f2e5f793040917d35858d1ed2532307b7dfb03bc3eaf813aac085bc5a4", size = 244036, upload-time = "2025-11-10T03:24:51.496Z" }, ] [[package]] @@ -4856,7 +4842,7 @@ wheels = [ [[package]] name = "pyobvector" -version = "0.2.16" +version = "0.2.19" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiomysql" }, @@ -4866,18 +4852,17 @@ dependencies = [ { name = "sqlalchemy" }, { name = "sqlglot" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b4/c1/a418b1e10627d3b9d54c7bed460d90bd44c9e9c20be801d6606e9fa3fe01/pyobvector-0.2.16.tar.gz", hash = "sha256:de44588e75de616dee7a9cc5d5c016aeb3390a90fe52f99d9b8ad2476294f6c2", size = 39602, upload-time = "2025-09-03T08:52:23.932Z" } +sdist = { url = "https://files.pythonhosted.org/packages/19/9a/03da0d77f6033694ab7e7214abdd48c372102a185142db880ba00d6a6172/pyobvector-0.2.19.tar.gz", hash = "sha256:5e6847f08679cf6ded800b5b8ae89353173c33f5d90fd1392f55e5fafa4fb886", size = 46314, upload-time = "2025-11-10T08:30:10.186Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/83/7b/c103cca858de87476db5e7c7f0f386b429c3057a7291155c70560b15d951/pyobvector-0.2.16-py3-none-any.whl", hash = "sha256:0710272e5c807a6d0bdeee96972cdc9fdca04fc4b40c2d1260b08ff8b79190ef", size = 52664, upload-time = "2025-09-03T08:52:22.372Z" }, + { url = "https://files.pythonhosted.org/packages/72/48/d6b60ae86a2a2c0c607a33e0c8fc9e469500e06e5bb07ea7e9417910f458/pyobvector-0.2.19-py3-none-any.whl", hash = "sha256:0a6b93c950722ecbab72571e0ab81d0f8f4d1f52df9c25c00693392477e45e4b", size = 59886, upload-time = "2025-11-10T08:30:08.627Z" }, ] [[package]] name = "pypandoc" -version = "1.15" +version = "1.16" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e1/88/26e650d053df5f3874aa3c05901a14166ce3271f58bfe114fd776987efbd/pypandoc-1.15.tar.gz", hash = "sha256:ea25beebe712ae41d63f7410c08741a3cab0e420f6703f95bc9b3a749192ce13", size = 32940, upload-time = "2025-01-08T17:39:58.705Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/61/06/0763e0ccc81754d3eadb21b2cb86cf21bdedc9b52698c2ad6785db7f0a4e/pypandoc-1.15-py3-none-any.whl", hash = "sha256:4ededcc76c8770f27aaca6dff47724578428eca84212a31479403a9731fc2b16", size = 21321, upload-time = "2025-01-08T17:39:09.928Z" }, + { url = "https://files.pythonhosted.org/packages/24/77/af1fc54740a0712988f9518e629d38edc7b8ffccd7549203f19c3d8a2db6/pypandoc-1.16-py3-none-any.whl", hash = "sha256:868f390d48388743e7a5885915cbbaa005dea36a825ecdfd571f8c523416c822", size = 19425, upload-time = "2025-11-08T15:44:38.429Z" }, ] [[package]] @@ -4891,11 +4876,11 @@ wheels = [ [[package]] name = "pypdf" -version = "6.1.3" +version = "6.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/13/3d/b6ead84ee437444f96862beb68f9796da8c199793bed08e9397b77579f23/pypdf-6.1.3.tar.gz", hash = "sha256:8d420d1e79dc1743f31a57707cabb6dcd5b17e8b9a302af64b30202c5700ab9d", size = 5076271, upload-time = "2025-10-22T16:13:46.061Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4e/2b/8795ec0378384000b0a37a2b5e6d67fa3d84802945aa2c612a78a784d7d4/pypdf-6.2.0.tar.gz", hash = "sha256:46b4d8495d68ae9c818e7964853cd9984e6a04c19fe7112760195395992dce48", size = 5272001, upload-time = "2025-11-09T11:10:41.911Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/ed/494fd0cc1190a7c335e6958eeaee6f373a281869830255c2ed4785dac135/pypdf-6.1.3-py3-none-any.whl", hash = "sha256:eb049195e46f014fc155f566fa20e09d70d4646a9891164ac25fa0cbcfcdbcb5", size = 323863, upload-time = "2025-10-22T16:13:44.174Z" }, + { url = "https://files.pythonhosted.org/packages/de/ba/743ddcaf1a8fb439342399645921e2cf2c600464cba5531a11f1cc0822b6/pypdf-6.2.0-py3-none-any.whl", hash = "sha256:4c0f3e62677217a777ab79abe22bf1285442d70efabf552f61c7a03b6f5c569f", size = 326592, upload-time = "2025-11-09T11:10:39.941Z" }, ] [[package]] @@ -5108,11 +5093,11 @@ wheels = [ [[package]] name = "python-iso639" -version = "2025.2.18" +version = "2025.11.11" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d5/19/45aa1917c7b1f4eb71104795b9b0cbf97169b99ec46cd303445883536549/python_iso639-2025.2.18.tar.gz", hash = "sha256:34e31e8e76eb3fc839629e257b12bcfd957c6edcbd486bbf66ba5185d1f566e8", size = 173552, upload-time = "2025-02-18T13:48:08.607Z" } +sdist = { url = "https://files.pythonhosted.org/packages/89/6f/45bc5ae1c132ab7852a8642d66d25ffff6e4b398195127ac66158d3b5f4d/python_iso639-2025.11.11.tar.gz", hash = "sha256:75fab30f1a0f46b4e8161eafb84afe4ecd07eaada05e2c5364f14b0f9c864477", size = 173897, upload-time = "2025-11-11T15:23:00.893Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/54/a3/3ceaf89a17a1e1d5e7bbdfe5514aa3055d91285b37a5c8fed662969e3d56/python_iso639-2025.2.18-py3-none-any.whl", hash = "sha256:b2d471c37483a26f19248458b20e7bd96492e15368b01053b540126bcc23152f", size = 167631, upload-time = "2025-02-18T13:48:06.602Z" }, + { url = "https://files.pythonhosted.org/packages/03/69/081960288e4cd541cbdb90e1768373e1198b040bf2ae40cd25b9c9799205/python_iso639-2025.11.11-py3-none-any.whl", hash = "sha256:02ea4cfca2c189b5665e4e8adc8c17c62ab6e4910932541a23baddea33207ea2", size = 167723, upload-time = "2025-11-11T15:22:59.819Z" }, ] [[package]] @@ -5231,43 +5216,37 @@ wheels = [ [[package]] name = "rapidfuzz" -version = "3.14.1" +version = "3.14.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ed/fc/a98b616db9a42dcdda7c78c76bdfdf6fe290ac4c5ffbb186f73ec981ad5b/rapidfuzz-3.14.1.tar.gz", hash = "sha256:b02850e7f7152bd1edff27e9d584505b84968cacedee7a734ec4050c655a803c", size = 57869570, upload-time = "2025-09-08T21:08:15.922Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/28/9d808fe62375b9aab5ba92fa9b29371297b067c2790b2d7cda648b1e2f8d/rapidfuzz-3.14.3.tar.gz", hash = "sha256:2491937177868bc4b1e469087601d53f925e8d270ccc21e07404b4b5814b7b5f", size = 57863900, upload-time = "2025-11-01T11:54:52.321Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/c7/c3c860d512606225c11c8ee455b4dc0b0214dbcfac90a2c22dddf55320f3/rapidfuzz-3.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4d976701060886a791c8a9260b1d4139d14c1f1e9a6ab6116b45a1acf3baff67", size = 1938398, upload-time = "2025-09-08T21:05:44.031Z" }, - { url = "https://files.pythonhosted.org/packages/c0/f3/67f5c5cd4d728993c48c1dcb5da54338d77c03c34b4903cc7839a3b89faf/rapidfuzz-3.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5e6ba7e6eb2ab03870dcab441d707513db0b4264c12fba7b703e90e8b4296df2", size = 1392819, upload-time = "2025-09-08T21:05:45.549Z" }, - { url = "https://files.pythonhosted.org/packages/d5/06/400d44842f4603ce1bebeaeabe776f510e329e7dbf6c71b6f2805e377889/rapidfuzz-3.14.1-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e532bf46de5fd3a1efde73a16a4d231d011bce401c72abe3c6ecf9de681003f", size = 1391798, upload-time = "2025-09-08T21:05:47.044Z" }, - { url = "https://files.pythonhosted.org/packages/90/97/a6944955713b47d88e8ca4305ca7484940d808c4e6c4e28b6fa0fcbff97e/rapidfuzz-3.14.1-cp311-cp311-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f9b6a6fb8ed9b951e5f3b82c1ce6b1665308ec1a0da87f799b16e24fc59e4662", size = 1699136, upload-time = "2025-09-08T21:05:48.919Z" }, - { url = "https://files.pythonhosted.org/packages/a8/1e/f311a5c95ddf922db6dd8666efeceb9ac69e1319ed098ac80068a4041732/rapidfuzz-3.14.1-cp311-cp311-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5b6ac3f9810949caef0e63380b11a3c32a92f26bacb9ced5e32c33560fcdf8d1", size = 2236238, upload-time = "2025-09-08T21:05:50.844Z" }, - { url = "https://files.pythonhosted.org/packages/85/27/e14e9830255db8a99200f7111b158ddef04372cf6332a415d053fe57cc9c/rapidfuzz-3.14.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e52e4c34fd567f77513e886b66029c1ae02f094380d10eba18ba1c68a46d8b90", size = 3183685, upload-time = "2025-09-08T21:05:52.362Z" }, - { url = "https://files.pythonhosted.org/packages/61/b2/42850c9616ddd2887904e5dd5377912cbabe2776fdc9fd4b25e6e12fba32/rapidfuzz-3.14.1-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:2ef72e41b1a110149f25b14637f1cedea6df192462120bea3433980fe9d8ac05", size = 1231523, upload-time = "2025-09-08T21:05:53.927Z" }, - { url = "https://files.pythonhosted.org/packages/de/b5/6b90ed7127a1732efef39db46dd0afc911f979f215b371c325a2eca9cb15/rapidfuzz-3.14.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fb654a35b373d712a6b0aa2a496b2b5cdd9d32410cfbaecc402d7424a90ba72a", size = 2415209, upload-time = "2025-09-08T21:05:55.422Z" }, - { url = "https://files.pythonhosted.org/packages/70/60/af51c50d238c82f2179edc4b9f799cc5a50c2c0ebebdcfaa97ded7d02978/rapidfuzz-3.14.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:2b2c12e5b9eb8fe9a51b92fe69e9ca362c0970e960268188a6d295e1dec91e6d", size = 2532957, upload-time = "2025-09-08T21:05:57.048Z" }, - { url = "https://files.pythonhosted.org/packages/50/92/29811d2ba7c984251a342c4f9ccc7cc4aa09d43d800af71510cd51c36453/rapidfuzz-3.14.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4f069dec5c450bd987481e752f0a9979e8fdf8e21e5307f5058f5c4bb162fa56", size = 2815720, upload-time = "2025-09-08T21:05:58.618Z" }, - { url = "https://files.pythonhosted.org/packages/78/69/cedcdee16a49e49d4985eab73b59447f211736c5953a58f1b91b6c53a73f/rapidfuzz-3.14.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:4d0d9163725b7ad37a8c46988cae9ebab255984db95ad01bf1987ceb9e3058dd", size = 3323704, upload-time = "2025-09-08T21:06:00.576Z" }, - { url = "https://files.pythonhosted.org/packages/76/3e/5a3f9a5540f18e0126e36f86ecf600145344acb202d94b63ee45211a18b8/rapidfuzz-3.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db656884b20b213d846f6bc990c053d1f4a60e6d4357f7211775b02092784ca1", size = 4287341, upload-time = "2025-09-08T21:06:02.301Z" }, - { url = "https://files.pythonhosted.org/packages/46/26/45db59195929dde5832852c9de8533b2ac97dcc0d852d1f18aca33828122/rapidfuzz-3.14.1-cp311-cp311-win32.whl", hash = "sha256:4b42f7b9c58cbcfbfaddc5a6278b4ca3b6cd8983e7fd6af70ca791dff7105fb9", size = 1726574, upload-time = "2025-09-08T21:06:04.357Z" }, - { url = "https://files.pythonhosted.org/packages/01/5c/a4caf76535f35fceab25b2aaaed0baecf15b3d1fd40746f71985d20f8c4b/rapidfuzz-3.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:e5847f30d7d4edefe0cb37294d956d3495dd127c1c56e9128af3c2258a520bb4", size = 1547124, upload-time = "2025-09-08T21:06:06.002Z" }, - { url = "https://files.pythonhosted.org/packages/c6/66/aa93b52f95a314584d71fa0b76df00bdd4158aafffa76a350f1ae416396c/rapidfuzz-3.14.1-cp311-cp311-win_arm64.whl", hash = "sha256:5087d8ad453092d80c042a08919b1cb20c8ad6047d772dc9312acd834da00f75", size = 816958, upload-time = "2025-09-08T21:06:07.509Z" }, - { url = "https://files.pythonhosted.org/packages/df/77/2f4887c9b786f203e50b816c1cde71f96642f194e6fa752acfa042cf53fd/rapidfuzz-3.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:809515194f628004aac1b1b280c3734c5ea0ccbd45938c9c9656a23ae8b8f553", size = 1932216, upload-time = "2025-09-08T21:06:09.342Z" }, - { url = "https://files.pythonhosted.org/packages/de/bd/b5e445d156cb1c2a87d36d8da53daf4d2a1d1729b4851660017898b49aa0/rapidfuzz-3.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0afcf2d6cb633d0d4260d8df6a40de2d9c93e9546e2c6b317ab03f89aa120ad7", size = 1393414, upload-time = "2025-09-08T21:06:10.959Z" }, - { url = "https://files.pythonhosted.org/packages/de/bd/98d065dd0a4479a635df855616980eaae1a1a07a876db9400d421b5b6371/rapidfuzz-3.14.1-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5c1c3d07d53dcafee10599da8988d2b1f39df236aee501ecbd617bd883454fcd", size = 1377194, upload-time = "2025-09-08T21:06:12.471Z" }, - { url = "https://files.pythonhosted.org/packages/d3/8a/1265547b771128b686f3c431377ff1db2fa073397ed082a25998a7b06d4e/rapidfuzz-3.14.1-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6e9ee3e1eb0a027717ee72fe34dc9ac5b3e58119f1bd8dd15bc19ed54ae3e62b", size = 1669573, upload-time = "2025-09-08T21:06:14.016Z" }, - { url = "https://files.pythonhosted.org/packages/a8/57/e73755c52fb451f2054196404ccc468577f8da023b3a48c80bce29ee5d4a/rapidfuzz-3.14.1-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:70c845b64a033a20c44ed26bc890eeb851215148cc3e696499f5f65529afb6cb", size = 2217833, upload-time = "2025-09-08T21:06:15.666Z" }, - { url = "https://files.pythonhosted.org/packages/20/14/7399c18c460e72d1b754e80dafc9f65cb42a46cc8f29cd57d11c0c4acc94/rapidfuzz-3.14.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:26db0e815213d04234298dea0d884d92b9cb8d4ba954cab7cf67a35853128a33", size = 3159012, upload-time = "2025-09-08T21:06:17.631Z" }, - { url = "https://files.pythonhosted.org/packages/f8/5e/24f0226ddb5440cabd88605d2491f99ae3748a6b27b0bc9703772892ced7/rapidfuzz-3.14.1-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:6ad3395a416f8b126ff11c788531f157c7debeb626f9d897c153ff8980da10fb", size = 1227032, upload-time = "2025-09-08T21:06:21.06Z" }, - { url = "https://files.pythonhosted.org/packages/40/43/1d54a4ad1a5fac2394d5f28a3108e2bf73c26f4f23663535e3139cfede9b/rapidfuzz-3.14.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:61c5b9ab6f730e6478aa2def566223712d121c6f69a94c7cc002044799442afd", size = 2395054, upload-time = "2025-09-08T21:06:23.482Z" }, - { url = "https://files.pythonhosted.org/packages/0c/71/e9864cd5b0f086c4a03791f5dfe0155a1b132f789fe19b0c76fbabd20513/rapidfuzz-3.14.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:13e0ea3d0c533969158727d1bb7a08c2cc9a816ab83f8f0dcfde7e38938ce3e6", size = 2524741, upload-time = "2025-09-08T21:06:26.825Z" }, - { url = "https://files.pythonhosted.org/packages/b2/0c/53f88286b912faf4a3b2619a60df4f4a67bd0edcf5970d7b0c1143501f0c/rapidfuzz-3.14.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:6325ca435b99f4001aac919ab8922ac464999b100173317defb83eae34e82139", size = 2785311, upload-time = "2025-09-08T21:06:29.471Z" }, - { url = "https://files.pythonhosted.org/packages/53/9a/229c26dc4f91bad323f07304ee5ccbc28f0d21c76047a1e4f813187d0bad/rapidfuzz-3.14.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:07a9fad3247e68798424bdc116c1094e88ecfabc17b29edf42a777520347648e", size = 3303630, upload-time = "2025-09-08T21:06:31.094Z" }, - { url = "https://files.pythonhosted.org/packages/05/de/20e330d6d58cbf83da914accd9e303048b7abae2f198886f65a344b69695/rapidfuzz-3.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f8ff5dbe78db0a10c1f916368e21d328935896240f71f721e073cf6c4c8cdedd", size = 4262364, upload-time = "2025-09-08T21:06:32.877Z" }, - { url = "https://files.pythonhosted.org/packages/1f/10/2327f83fad3534a8d69fe9cd718f645ec1fe828b60c0e0e97efc03bf12f8/rapidfuzz-3.14.1-cp312-cp312-win32.whl", hash = "sha256:9c83270e44a6ae7a39fc1d7e72a27486bccc1fa5f34e01572b1b90b019e6b566", size = 1711927, upload-time = "2025-09-08T21:06:34.669Z" }, - { url = "https://files.pythonhosted.org/packages/78/8d/199df0370133fe9f35bc72f3c037b53c93c5c1fc1e8d915cf7c1f6bb8557/rapidfuzz-3.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:e06664c7fdb51c708e082df08a6888fce4c5c416d7e3cc2fa66dd80eb76a149d", size = 1542045, upload-time = "2025-09-08T21:06:36.364Z" }, - { url = "https://files.pythonhosted.org/packages/b3/c6/cc5d4bd1b16ea2657c80b745d8b1c788041a31fad52e7681496197b41562/rapidfuzz-3.14.1-cp312-cp312-win_arm64.whl", hash = "sha256:6c7c26025f7934a169a23dafea6807cfc3fb556f1dd49229faf2171e5d8101cc", size = 813170, upload-time = "2025-09-08T21:06:38.001Z" }, - { url = "https://files.pythonhosted.org/packages/05/c7/1b17347e30f2b50dd976c54641aa12003569acb1bdaabf45a5cc6f471c58/rapidfuzz-3.14.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4a21ccdf1bd7d57a1009030527ba8fae1c74bf832d0a08f6b67de8f5c506c96f", size = 1862602, upload-time = "2025-09-08T21:08:09.088Z" }, - { url = "https://files.pythonhosted.org/packages/09/cf/95d0dacac77eda22499991bd5f304c77c5965fb27348019a48ec3fe4a3f6/rapidfuzz-3.14.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:589fb0af91d3aff318750539c832ea1100dbac2c842fde24e42261df443845f6", size = 1339548, upload-time = "2025-09-08T21:08:11.059Z" }, - { url = "https://files.pythonhosted.org/packages/b6/58/f515c44ba8c6fa5daa35134b94b99661ced852628c5505ead07b905c3fc7/rapidfuzz-3.14.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a4f18092db4825f2517d135445015b40033ed809a41754918a03ef062abe88a0", size = 1513859, upload-time = "2025-09-08T21:08:13.07Z" }, + { url = "https://files.pythonhosted.org/packages/76/25/5b0a33ad3332ee1213068c66f7c14e9e221be90bab434f0cb4defa9d6660/rapidfuzz-3.14.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dea2d113e260a5da0c4003e0a5e9fdf24a9dc2bb9eaa43abd030a1e46ce7837d", size = 1953885, upload-time = "2025-11-01T11:52:47.75Z" }, + { url = "https://files.pythonhosted.org/packages/2d/ab/f1181f500c32c8fcf7c966f5920c7e56b9b1d03193386d19c956505c312d/rapidfuzz-3.14.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e6c31a4aa68cfa75d7eede8b0ed24b9e458447db604c2db53f358be9843d81d3", size = 1390200, upload-time = "2025-11-01T11:52:49.491Z" }, + { url = "https://files.pythonhosted.org/packages/14/2a/0f2de974ececad873865c6bb3ea3ad07c976ac293d5025b2d73325aac1d4/rapidfuzz-3.14.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02821366d928e68ddcb567fed8723dad7ea3a979fada6283e6914d5858674850", size = 1389319, upload-time = "2025-11-01T11:52:51.224Z" }, + { url = "https://files.pythonhosted.org/packages/ed/69/309d8f3a0bb3031fd9b667174cc4af56000645298af7c2931be5c3d14bb4/rapidfuzz-3.14.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cfe8df315ab4e6db4e1be72c5170f8e66021acde22cd2f9d04d2058a9fd8162e", size = 3178495, upload-time = "2025-11-01T11:52:53.005Z" }, + { url = "https://files.pythonhosted.org/packages/10/b7/f9c44a99269ea5bf6fd6a40b84e858414b6e241288b9f2b74af470d222b1/rapidfuzz-3.14.3-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:769f31c60cd79420188fcdb3c823227fc4a6deb35cafec9d14045c7f6743acae", size = 1228443, upload-time = "2025-11-01T11:52:54.991Z" }, + { url = "https://files.pythonhosted.org/packages/f2/0a/3b3137abac7f19c9220e14cd7ce993e35071a7655e7ef697785a3edfea1a/rapidfuzz-3.14.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:54fa03062124e73086dae66a3451c553c1e20a39c077fd704dc7154092c34c63", size = 2411998, upload-time = "2025-11-01T11:52:56.629Z" }, + { url = "https://files.pythonhosted.org/packages/f3/b6/983805a844d44670eaae63831024cdc97ada4e9c62abc6b20703e81e7f9b/rapidfuzz-3.14.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:834d1e818005ed0d4ae38f6b87b86fad9b0a74085467ece0727d20e15077c094", size = 2530120, upload-time = "2025-11-01T11:52:58.298Z" }, + { url = "https://files.pythonhosted.org/packages/b4/cc/2c97beb2b1be2d7595d805682472f1b1b844111027d5ad89b65e16bdbaaa/rapidfuzz-3.14.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:948b00e8476a91f510dd1ec07272efc7d78c275d83b630455559671d4e33b678", size = 4283129, upload-time = "2025-11-01T11:53:00.188Z" }, + { url = "https://files.pythonhosted.org/packages/4d/03/2f0e5e94941045aefe7eafab72320e61285c07b752df9884ce88d6b8b835/rapidfuzz-3.14.3-cp311-cp311-win32.whl", hash = "sha256:43d0305c36f504232f18ea04e55f2059bb89f169d3119c4ea96a0e15b59e2a91", size = 1724224, upload-time = "2025-11-01T11:53:02.149Z" }, + { url = "https://files.pythonhosted.org/packages/cf/99/5fa23e204435803875daefda73fd61baeabc3c36b8fc0e34c1705aab8c7b/rapidfuzz-3.14.3-cp311-cp311-win_amd64.whl", hash = "sha256:ef6bf930b947bd0735c550683939a032090f1d688dfd8861d6b45307b96fd5c5", size = 1544259, upload-time = "2025-11-01T11:53:03.66Z" }, + { url = "https://files.pythonhosted.org/packages/48/35/d657b85fcc615a42661b98ac90ce8e95bd32af474603a105643963749886/rapidfuzz-3.14.3-cp311-cp311-win_arm64.whl", hash = "sha256:f3eb0ff3b75d6fdccd40b55e7414bb859a1cda77c52762c9c82b85569f5088e7", size = 814734, upload-time = "2025-11-01T11:53:05.008Z" }, + { url = "https://files.pythonhosted.org/packages/fa/8e/3c215e860b458cfbedb3ed73bc72e98eb7e0ed72f6b48099604a7a3260c2/rapidfuzz-3.14.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:685c93ea961d135893b5984a5a9851637d23767feabe414ec974f43babbd8226", size = 1945306, upload-time = "2025-11-01T11:53:06.452Z" }, + { url = "https://files.pythonhosted.org/packages/36/d9/31b33512015c899f4a6e6af64df8dfe8acddf4c8b40a4b3e0e6e1bcd00e5/rapidfuzz-3.14.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fa7c8f26f009f8c673fbfb443792f0cf8cf50c4e18121ff1e285b5e08a94fbdb", size = 1390788, upload-time = "2025-11-01T11:53:08.721Z" }, + { url = "https://files.pythonhosted.org/packages/a9/67/2ee6f8de6e2081ccd560a571d9c9063184fe467f484a17fa90311a7f4a2e/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:57f878330c8d361b2ce76cebb8e3e1dc827293b6abf404e67d53260d27b5d941", size = 1374580, upload-time = "2025-11-01T11:53:10.164Z" }, + { url = "https://files.pythonhosted.org/packages/30/83/80d22997acd928eda7deadc19ccd15883904622396d6571e935993e0453a/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c5f545f454871e6af05753a0172849c82feaf0f521c5ca62ba09e1b382d6382", size = 3154947, upload-time = "2025-11-01T11:53:12.093Z" }, + { url = "https://files.pythonhosted.org/packages/5b/cf/9f49831085a16384695f9fb096b99662f589e30b89b4a589a1ebc1a19d34/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:07aa0b5d8863e3151e05026a28e0d924accf0a7a3b605da978f0359bb804df43", size = 1223872, upload-time = "2025-11-01T11:53:13.664Z" }, + { url = "https://files.pythonhosted.org/packages/c8/0f/41ee8034e744b871c2e071ef0d360686f5ccfe5659f4fd96c3ec406b3c8b/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73b07566bc7e010e7b5bd490fb04bb312e820970180df6b5655e9e6224c137db", size = 2392512, upload-time = "2025-11-01T11:53:15.109Z" }, + { url = "https://files.pythonhosted.org/packages/da/86/280038b6b0c2ccec54fb957c732ad6b41cc1fd03b288d76545b9cf98343f/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6de00eb84c71476af7d3110cf25d8fe7c792d7f5fa86764ef0b4ca97e78ca3ed", size = 2521398, upload-time = "2025-11-01T11:53:17.146Z" }, + { url = "https://files.pythonhosted.org/packages/fa/7b/05c26f939607dca0006505e3216248ae2de631e39ef94dd63dbbf0860021/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d7843a1abf0091773a530636fdd2a49a41bcae22f9910b86b4f903e76ddc82dc", size = 4259416, upload-time = "2025-11-01T11:53:19.34Z" }, + { url = "https://files.pythonhosted.org/packages/40/eb/9e3af4103d91788f81111af1b54a28de347cdbed8eaa6c91d5e98a889aab/rapidfuzz-3.14.3-cp312-cp312-win32.whl", hash = "sha256:dea97ac3ca18cd3ba8f3d04b5c1fe4aa60e58e8d9b7793d3bd595fdb04128d7a", size = 1709527, upload-time = "2025-11-01T11:53:20.949Z" }, + { url = "https://files.pythonhosted.org/packages/b8/63/d06ecce90e2cf1747e29aeab9f823d21e5877a4c51b79720b2d3be7848f8/rapidfuzz-3.14.3-cp312-cp312-win_amd64.whl", hash = "sha256:b5100fd6bcee4d27f28f4e0a1c6b5127bc8ba7c2a9959cad9eab0bf4a7ab3329", size = 1538989, upload-time = "2025-11-01T11:53:22.428Z" }, + { url = "https://files.pythonhosted.org/packages/fc/6d/beee32dcda64af8128aab3ace2ccb33d797ed58c434c6419eea015fec779/rapidfuzz-3.14.3-cp312-cp312-win_arm64.whl", hash = "sha256:4e49c9e992bc5fc873bd0fff7ef16a4405130ec42f2ce3d2b735ba5d3d4eb70f", size = 811161, upload-time = "2025-11-01T11:53:23.811Z" }, + { url = "https://files.pythonhosted.org/packages/c9/33/b5bd6475c7c27164b5becc9b0e3eb978f1e3640fea590dd3dced6006ee83/rapidfuzz-3.14.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7cf174b52cb3ef5d49e45d0a1133b7e7d0ecf770ed01f97ae9962c5c91d97d23", size = 1888499, upload-time = "2025-11-01T11:54:42.094Z" }, + { url = "https://files.pythonhosted.org/packages/30/d2/89d65d4db4bb931beade9121bc71ad916b5fa9396e807d11b33731494e8e/rapidfuzz-3.14.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:442cba39957a008dfc5bdef21a9c3f4379e30ffb4e41b8555dbaf4887eca9300", size = 1336747, upload-time = "2025-11-01T11:54:43.957Z" }, + { url = "https://files.pythonhosted.org/packages/85/33/cd87d92b23f0b06e8914a61cea6850c6d495ca027f669fab7a379041827a/rapidfuzz-3.14.3-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1faa0f8f76ba75fd7b142c984947c280ef6558b5067af2ae9b8729b0a0f99ede", size = 1352187, upload-time = "2025-11-01T11:54:45.518Z" }, + { url = "https://files.pythonhosted.org/packages/22/20/9d30b4a1ab26aac22fff17d21dec7e9089ccddfe25151d0a8bb57001dc3d/rapidfuzz-3.14.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e6eefec45625c634926a9fd46c9e4f31118ac8f3156fff9494422cee45207e6", size = 3101472, upload-time = "2025-11-01T11:54:47.255Z" }, + { url = "https://files.pythonhosted.org/packages/b1/ad/fa2d3e5c29a04ead7eaa731c7cd1f30f9ec3c77b3a578fdf90280797cbcb/rapidfuzz-3.14.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56fefb4382bb12250f164250240b9dd7772e41c5c8ae976fd598a32292449cc5", size = 1511361, upload-time = "2025-11-01T11:54:49.057Z" }, ] [[package]] @@ -5332,38 +5311,38 @@ wheels = [ [[package]] name = "regex" -version = "2025.10.23" +version = "2025.11.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f8/c8/1d2160d36b11fbe0a61acb7c3c81ab032d9ec8ad888ac9e0a61b85ab99dd/regex-2025.10.23.tar.gz", hash = "sha256:8cbaf8ceb88f96ae2356d01b9adf5e6306fa42fa6f7eab6b97794e37c959ac26", size = 401266, upload-time = "2025-10-21T15:58:20.23Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/a9/546676f25e573a4cf00fe8e119b78a37b6a8fe2dc95cda877b30889c9c45/regex-2025.11.3.tar.gz", hash = "sha256:1fedc720f9bb2494ce31a58a1631f9c82df6a09b49c19517ea5cc280b4541e01", size = 414669, upload-time = "2025-11-03T21:34:22.089Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/82/e5/74b7cd5cd76b4171f9793042045bb1726f7856dd56e582fc3e058a7a8a5e/regex-2025.10.23-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6c531155bf9179345e85032052a1e5fe1a696a6abf9cea54b97e8baefff970fd", size = 487960, upload-time = "2025-10-21T15:54:53.253Z" }, - { url = "https://files.pythonhosted.org/packages/b9/08/854fa4b3b20471d1df1c71e831b6a1aa480281e37791e52a2df9641ec5c6/regex-2025.10.23-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:912e9df4e89d383681268d38ad8f5780d7cccd94ba0e9aa09ca7ab7ab4f8e7eb", size = 290425, upload-time = "2025-10-21T15:54:55.21Z" }, - { url = "https://files.pythonhosted.org/packages/ab/d3/6272b1dd3ca1271661e168762b234ad3e00dbdf4ef0c7b9b72d2d159efa7/regex-2025.10.23-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4f375c61bfc3138b13e762fe0ae76e3bdca92497816936534a0177201666f44f", size = 288278, upload-time = "2025-10-21T15:54:56.862Z" }, - { url = "https://files.pythonhosted.org/packages/14/8f/c7b365dd9d9bc0a36e018cb96f2ffb60d2ba8deb589a712b437f67de2920/regex-2025.10.23-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e248cc9446081119128ed002a3801f8031e0c219b5d3c64d3cc627da29ac0a33", size = 793289, upload-time = "2025-10-21T15:54:58.352Z" }, - { url = "https://files.pythonhosted.org/packages/d4/fb/b8fbe9aa16cf0c21f45ec5a6c74b4cecbf1a1c0deb7089d4a6f83a9c1caa/regex-2025.10.23-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b52bf9282fdf401e4f4e721f0f61fc4b159b1307244517789702407dd74e38ca", size = 860321, upload-time = "2025-10-21T15:54:59.813Z" }, - { url = "https://files.pythonhosted.org/packages/b0/81/bf41405c772324926a9bd8a640dedaa42da0e929241834dfce0733070437/regex-2025.10.23-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5c084889ab2c59765a0d5ac602fd1c3c244f9b3fcc9a65fdc7ba6b74c5287490", size = 907011, upload-time = "2025-10-21T15:55:01.968Z" }, - { url = "https://files.pythonhosted.org/packages/a4/fb/5ad6a8b92d3f88f3797b51bb4ef47499acc2d0b53d2fbe4487a892f37a73/regex-2025.10.23-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d80e8eb79009bdb0936658c44ca06e2fbbca67792013e3818eea3f5f228971c2", size = 800312, upload-time = "2025-10-21T15:55:04.15Z" }, - { url = "https://files.pythonhosted.org/packages/42/48/b4efba0168a2b57f944205d823f8e8a3a1ae6211a34508f014ec2c712f4f/regex-2025.10.23-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b6f259118ba87b814a8ec475380aee5f5ae97a75852a3507cf31d055b01b5b40", size = 782839, upload-time = "2025-10-21T15:55:05.641Z" }, - { url = "https://files.pythonhosted.org/packages/13/2a/c9efb4c6c535b0559c1fa8e431e0574d229707c9ca718600366fcfef6801/regex-2025.10.23-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9b8c72a242683dcc72d37595c4f1278dfd7642b769e46700a8df11eab19dfd82", size = 854270, upload-time = "2025-10-21T15:55:07.27Z" }, - { url = "https://files.pythonhosted.org/packages/34/2d/68eecc1bdaee020e8ba549502291c9450d90d8590d0552247c9b543ebf7b/regex-2025.10.23-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a8d7b7a0a3df9952f9965342159e0c1f05384c0f056a47ce8b61034f8cecbe83", size = 845771, upload-time = "2025-10-21T15:55:09.477Z" }, - { url = "https://files.pythonhosted.org/packages/a5/cd/a1ae499cf9b87afb47a67316bbf1037a7c681ffe447c510ed98c0aa2c01c/regex-2025.10.23-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:413bfea20a484c524858125e92b9ce6ffdd0a4b97d4ff96b5859aa119b0f1bdd", size = 788778, upload-time = "2025-10-21T15:55:11.396Z" }, - { url = "https://files.pythonhosted.org/packages/38/f9/70765e63f5ea7d43b2b6cd4ee9d3323f16267e530fb2a420d92d991cf0fc/regex-2025.10.23-cp311-cp311-win32.whl", hash = "sha256:f76deef1f1019a17dad98f408b8f7afc4bd007cbe835ae77b737e8c7f19ae575", size = 265666, upload-time = "2025-10-21T15:55:13.306Z" }, - { url = "https://files.pythonhosted.org/packages/9c/1a/18e9476ee1b63aaec3844d8e1cb21842dc19272c7e86d879bfc0dcc60db3/regex-2025.10.23-cp311-cp311-win_amd64.whl", hash = "sha256:59bba9f7125536f23fdab5deeea08da0c287a64c1d3acc1c7e99515809824de8", size = 277600, upload-time = "2025-10-21T15:55:15.087Z" }, - { url = "https://files.pythonhosted.org/packages/1d/1b/c019167b1f7a8ec77251457e3ff0339ed74ca8bce1ea13138dc98309c923/regex-2025.10.23-cp311-cp311-win_arm64.whl", hash = "sha256:b103a752b6f1632ca420225718d6ed83f6a6ced3016dd0a4ab9a6825312de566", size = 269974, upload-time = "2025-10-21T15:55:16.841Z" }, - { url = "https://files.pythonhosted.org/packages/f6/57/eeb274d83ab189d02d778851b1ac478477522a92b52edfa6e2ae9ff84679/regex-2025.10.23-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:7a44d9c00f7a0a02d3b777429281376370f3d13d2c75ae74eb94e11ebcf4a7fc", size = 489187, upload-time = "2025-10-21T15:55:18.322Z" }, - { url = "https://files.pythonhosted.org/packages/55/5c/7dad43a9b6ea88bf77e0b8b7729a4c36978e1043165034212fd2702880c6/regex-2025.10.23-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b83601f84fde939ae3478bb32a3aef36f61b58c3208d825c7e8ce1a735f143f2", size = 291122, upload-time = "2025-10-21T15:55:20.2Z" }, - { url = "https://files.pythonhosted.org/packages/66/21/38b71e6f2818f0f4b281c8fba8d9d57cfca7b032a648fa59696e0a54376a/regex-2025.10.23-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec13647907bb9d15fd192bbfe89ff06612e098a5709e7d6ecabbdd8f7908fc45", size = 288797, upload-time = "2025-10-21T15:55:21.932Z" }, - { url = "https://files.pythonhosted.org/packages/be/95/888f069c89e7729732a6d7cca37f76b44bfb53a1e35dda8a2c7b65c1b992/regex-2025.10.23-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78d76dd2957d62501084e7012ddafc5fcd406dd982b7a9ca1ea76e8eaaf73e7e", size = 798442, upload-time = "2025-10-21T15:55:23.747Z" }, - { url = "https://files.pythonhosted.org/packages/76/70/4f903c608faf786627a8ee17c06e0067b5acade473678b69c8094b248705/regex-2025.10.23-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8668e5f067e31a47699ebb354f43aeb9c0ef136f915bd864243098524482ac43", size = 864039, upload-time = "2025-10-21T15:55:25.656Z" }, - { url = "https://files.pythonhosted.org/packages/62/19/2df67b526bf25756c7f447dde554fc10a220fd839cc642f50857d01e4a7b/regex-2025.10.23-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a32433fe3deb4b2d8eda88790d2808fed0dc097e84f5e683b4cd4f42edef6cca", size = 912057, upload-time = "2025-10-21T15:55:27.309Z" }, - { url = "https://files.pythonhosted.org/packages/99/14/9a39b7c9e007968411bc3c843cc14cf15437510c0a9991f080cab654fd16/regex-2025.10.23-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d97d73818c642c938db14c0668167f8d39520ca9d983604575ade3fda193afcc", size = 803374, upload-time = "2025-10-21T15:55:28.9Z" }, - { url = "https://files.pythonhosted.org/packages/d4/f7/3495151dd3ca79949599b6d069b72a61a2c5e24fc441dccc79dcaf708fe6/regex-2025.10.23-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bca7feecc72ee33579e9f6ddf8babbe473045717a0e7dbc347099530f96e8b9a", size = 787714, upload-time = "2025-10-21T15:55:30.628Z" }, - { url = "https://files.pythonhosted.org/packages/28/65/ee882455e051131869957ee8597faea45188c9a98c0dad724cfb302d4580/regex-2025.10.23-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7e24af51e907d7457cc4a72691ec458320b9ae67dc492f63209f01eecb09de32", size = 858392, upload-time = "2025-10-21T15:55:32.322Z" }, - { url = "https://files.pythonhosted.org/packages/53/25/9287fef5be97529ebd3ac79d256159cb709a07eb58d4be780d1ca3885da8/regex-2025.10.23-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d10bcde58bbdf18146f3a69ec46dd03233b94a4a5632af97aa5378da3a47d288", size = 850484, upload-time = "2025-10-21T15:55:34.037Z" }, - { url = "https://files.pythonhosted.org/packages/f3/b4/b49b88b4fea2f14dc73e5b5842755e782fc2e52f74423d6f4adc130d5880/regex-2025.10.23-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:44383bc0c933388516c2692c9a7503e1f4a67e982f20b9a29d2fb70c6494f147", size = 789634, upload-time = "2025-10-21T15:55:35.958Z" }, - { url = "https://files.pythonhosted.org/packages/b6/3c/2f8d199d0e84e78bcd6bdc2be9b62410624f6b796e2893d1837ae738b160/regex-2025.10.23-cp312-cp312-win32.whl", hash = "sha256:6040a86f95438a0114bba16e51dfe27f1bc004fd29fe725f54a586f6d522b079", size = 266060, upload-time = "2025-10-21T15:55:37.902Z" }, - { url = "https://files.pythonhosted.org/packages/d7/67/c35e80969f6ded306ad70b0698863310bdf36aca57ad792f45ddc0e2271f/regex-2025.10.23-cp312-cp312-win_amd64.whl", hash = "sha256:436b4c4352fe0762e3bfa34a5567079baa2ef22aa9c37cf4d128979ccfcad842", size = 276931, upload-time = "2025-10-21T15:55:39.502Z" }, - { url = "https://files.pythonhosted.org/packages/f5/a1/4ed147de7d2b60174f758412c87fa51ada15cd3296a0ff047f4280aaa7ca/regex-2025.10.23-cp312-cp312-win_arm64.whl", hash = "sha256:f4b1b1991617055b46aff6f6db24888c1f05f4db9801349d23f09ed0714a9335", size = 270103, upload-time = "2025-10-21T15:55:41.24Z" }, + { url = "https://files.pythonhosted.org/packages/f7/90/4fb5056e5f03a7048abd2b11f598d464f0c167de4f2a51aa868c376b8c70/regex-2025.11.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:eadade04221641516fa25139273505a1c19f9bf97589a05bc4cfcd8b4a618031", size = 488081, upload-time = "2025-11-03T21:31:11.946Z" }, + { url = "https://files.pythonhosted.org/packages/85/23/63e481293fac8b069d84fba0299b6666df720d875110efd0338406b5d360/regex-2025.11.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:feff9e54ec0dd3833d659257f5c3f5322a12eee58ffa360984b716f8b92983f4", size = 290554, upload-time = "2025-11-03T21:31:13.387Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9d/b101d0262ea293a0066b4522dfb722eb6a8785a8c3e084396a5f2c431a46/regex-2025.11.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3b30bc921d50365775c09a7ed446359e5c0179e9e2512beec4a60cbcef6ddd50", size = 288407, upload-time = "2025-11-03T21:31:14.809Z" }, + { url = "https://files.pythonhosted.org/packages/0c/64/79241c8209d5b7e00577ec9dca35cd493cc6be35b7d147eda367d6179f6d/regex-2025.11.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f99be08cfead2020c7ca6e396c13543baea32343b7a9a5780c462e323bd8872f", size = 793418, upload-time = "2025-11-03T21:31:16.556Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e2/23cd5d3573901ce8f9757c92ca4db4d09600b865919b6d3e7f69f03b1afd/regex-2025.11.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6dd329a1b61c0ee95ba95385fb0c07ea0d3fe1a21e1349fa2bec272636217118", size = 860448, upload-time = "2025-11-03T21:31:18.12Z" }, + { url = "https://files.pythonhosted.org/packages/2a/4c/aecf31beeaa416d0ae4ecb852148d38db35391aac19c687b5d56aedf3a8b/regex-2025.11.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4c5238d32f3c5269d9e87be0cf096437b7622b6920f5eac4fd202468aaeb34d2", size = 907139, upload-time = "2025-11-03T21:31:20.753Z" }, + { url = "https://files.pythonhosted.org/packages/61/22/b8cb00df7d2b5e0875f60628594d44dba283e951b1ae17c12f99e332cc0a/regex-2025.11.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10483eefbfb0adb18ee9474498c9a32fcf4e594fbca0543bb94c48bac6183e2e", size = 800439, upload-time = "2025-11-03T21:31:22.069Z" }, + { url = "https://files.pythonhosted.org/packages/02/a8/c4b20330a5cdc7a8eb265f9ce593f389a6a88a0c5f280cf4d978f33966bc/regex-2025.11.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:78c2d02bb6e1da0720eedc0bad578049cad3f71050ef8cd065ecc87691bed2b0", size = 782965, upload-time = "2025-11-03T21:31:23.598Z" }, + { url = "https://files.pythonhosted.org/packages/b4/4c/ae3e52988ae74af4b04d2af32fee4e8077f26e51b62ec2d12d246876bea2/regex-2025.11.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e6b49cd2aad93a1790ce9cffb18964f6d3a4b0b3dbdbd5de094b65296fce6e58", size = 854398, upload-time = "2025-11-03T21:31:25.008Z" }, + { url = "https://files.pythonhosted.org/packages/06/d1/a8b9cf45874eda14b2e275157ce3b304c87e10fb38d9fc26a6e14eb18227/regex-2025.11.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:885b26aa3ee56433b630502dc3d36ba78d186a00cc535d3806e6bfd9ed3c70ab", size = 845897, upload-time = "2025-11-03T21:31:26.427Z" }, + { url = "https://files.pythonhosted.org/packages/ea/fe/1830eb0236be93d9b145e0bd8ab499f31602fe0999b1f19e99955aa8fe20/regex-2025.11.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ddd76a9f58e6a00f8772e72cff8ebcff78e022be95edf018766707c730593e1e", size = 788906, upload-time = "2025-11-03T21:31:28.078Z" }, + { url = "https://files.pythonhosted.org/packages/66/47/dc2577c1f95f188c1e13e2e69d8825a5ac582ac709942f8a03af42ed6e93/regex-2025.11.3-cp311-cp311-win32.whl", hash = "sha256:3e816cc9aac1cd3cc9a4ec4d860f06d40f994b5c7b4d03b93345f44e08cc68bf", size = 265812, upload-time = "2025-11-03T21:31:29.72Z" }, + { url = "https://files.pythonhosted.org/packages/50/1e/15f08b2f82a9bbb510621ec9042547b54d11e83cb620643ebb54e4eb7d71/regex-2025.11.3-cp311-cp311-win_amd64.whl", hash = "sha256:087511f5c8b7dfbe3a03f5d5ad0c2a33861b1fc387f21f6f60825a44865a385a", size = 277737, upload-time = "2025-11-03T21:31:31.422Z" }, + { url = "https://files.pythonhosted.org/packages/f4/fc/6500eb39f5f76c5e47a398df82e6b535a5e345f839581012a418b16f9cc3/regex-2025.11.3-cp311-cp311-win_arm64.whl", hash = "sha256:1ff0d190c7f68ae7769cd0313fe45820ba07ffebfddfaa89cc1eb70827ba0ddc", size = 270290, upload-time = "2025-11-03T21:31:33.041Z" }, + { url = "https://files.pythonhosted.org/packages/e8/74/18f04cb53e58e3fb107439699bd8375cf5a835eec81084e0bddbd122e4c2/regex-2025.11.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bc8ab71e2e31b16e40868a40a69007bc305e1109bd4658eb6cad007e0bf67c41", size = 489312, upload-time = "2025-11-03T21:31:34.343Z" }, + { url = "https://files.pythonhosted.org/packages/78/3f/37fcdd0d2b1e78909108a876580485ea37c91e1acf66d3bb8e736348f441/regex-2025.11.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:22b29dda7e1f7062a52359fca6e58e548e28c6686f205e780b02ad8ef710de36", size = 291256, upload-time = "2025-11-03T21:31:35.675Z" }, + { url = "https://files.pythonhosted.org/packages/bf/26/0a575f58eb23b7ebd67a45fccbc02ac030b737b896b7e7a909ffe43ffd6a/regex-2025.11.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3a91e4a29938bc1a082cc28fdea44be420bf2bebe2665343029723892eb073e1", size = 288921, upload-time = "2025-11-03T21:31:37.07Z" }, + { url = "https://files.pythonhosted.org/packages/ea/98/6a8dff667d1af907150432cf5abc05a17ccd32c72a3615410d5365ac167a/regex-2025.11.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:08b884f4226602ad40c5d55f52bf91a9df30f513864e0054bad40c0e9cf1afb7", size = 798568, upload-time = "2025-11-03T21:31:38.784Z" }, + { url = "https://files.pythonhosted.org/packages/64/15/92c1db4fa4e12733dd5a526c2dd2b6edcbfe13257e135fc0f6c57f34c173/regex-2025.11.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3e0b11b2b2433d1c39c7c7a30e3f3d0aeeea44c2a8d0bae28f6b95f639927a69", size = 864165, upload-time = "2025-11-03T21:31:40.559Z" }, + { url = "https://files.pythonhosted.org/packages/f9/e7/3ad7da8cdee1ce66c7cd37ab5ab05c463a86ffeb52b1a25fe7bd9293b36c/regex-2025.11.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:87eb52a81ef58c7ba4d45c3ca74e12aa4b4e77816f72ca25258a85b3ea96cb48", size = 912182, upload-time = "2025-11-03T21:31:42.002Z" }, + { url = "https://files.pythonhosted.org/packages/84/bd/9ce9f629fcb714ffc2c3faf62b6766ecb7a585e1e885eb699bcf130a5209/regex-2025.11.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a12ab1f5c29b4e93db518f5e3872116b7e9b1646c9f9f426f777b50d44a09e8c", size = 803501, upload-time = "2025-11-03T21:31:43.815Z" }, + { url = "https://files.pythonhosted.org/packages/7c/0f/8dc2e4349d8e877283e6edd6c12bdcebc20f03744e86f197ab6e4492bf08/regex-2025.11.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7521684c8c7c4f6e88e35ec89680ee1aa8358d3f09d27dfbdf62c446f5d4c695", size = 787842, upload-time = "2025-11-03T21:31:45.353Z" }, + { url = "https://files.pythonhosted.org/packages/f9/73/cff02702960bc185164d5619c0c62a2f598a6abff6695d391b096237d4ab/regex-2025.11.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7fe6e5440584e94cc4b3f5f4d98a25e29ca12dccf8873679a635638349831b98", size = 858519, upload-time = "2025-11-03T21:31:46.814Z" }, + { url = "https://files.pythonhosted.org/packages/61/83/0e8d1ae71e15bc1dc36231c90b46ee35f9d52fab2e226b0e039e7ea9c10a/regex-2025.11.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:8e026094aa12b43f4fd74576714e987803a315c76edb6b098b9809db5de58f74", size = 850611, upload-time = "2025-11-03T21:31:48.289Z" }, + { url = "https://files.pythonhosted.org/packages/c8/f5/70a5cdd781dcfaa12556f2955bf170cd603cb1c96a1827479f8faea2df97/regex-2025.11.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:435bbad13e57eb5606a68443af62bed3556de2f46deb9f7d4237bc2f1c9fb3a0", size = 789759, upload-time = "2025-11-03T21:31:49.759Z" }, + { url = "https://files.pythonhosted.org/packages/59/9b/7c29be7903c318488983e7d97abcf8ebd3830e4c956c4c540005fcfb0462/regex-2025.11.3-cp312-cp312-win32.whl", hash = "sha256:3839967cf4dc4b985e1570fd8d91078f0c519f30491c60f9ac42a8db039be204", size = 266194, upload-time = "2025-11-03T21:31:51.53Z" }, + { url = "https://files.pythonhosted.org/packages/1a/67/3b92df89f179d7c367be654ab5626ae311cb28f7d5c237b6bb976cd5fbbb/regex-2025.11.3-cp312-cp312-win_amd64.whl", hash = "sha256:e721d1b46e25c481dc5ded6f4b3f66c897c58d2e8cfdf77bbced84339108b0b9", size = 277069, upload-time = "2025-11-03T21:31:53.151Z" }, + { url = "https://files.pythonhosted.org/packages/d7/55/85ba4c066fe5094d35b249c3ce8df0ba623cfd35afb22d6764f23a52a1c5/regex-2025.11.3-cp312-cp312-win_arm64.whl", hash = "sha256:64350685ff08b1d3a6fff33f45a9ca183dc1d58bbfe4981604e70ec9801bbc26", size = 270330, upload-time = "2025-11-03T21:31:54.514Z" }, ] [[package]] @@ -5509,28 +5488,28 @@ wheels = [ [[package]] name = "ruff" -version = "0.14.2" +version = "0.14.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ee/34/8218a19b2055b80601e8fd201ec723c74c7fe1ca06d525a43ed07b6d8e85/ruff-0.14.2.tar.gz", hash = "sha256:98da787668f239313d9c902ca7c523fe11b8ec3f39345553a51b25abc4629c96", size = 5539663, upload-time = "2025-10-23T19:37:00.956Z" } +sdist = { url = "https://files.pythonhosted.org/packages/df/55/cccfca45157a2031dcbb5a462a67f7cf27f8b37d4b3b1cd7438f0f5c1df6/ruff-0.14.4.tar.gz", hash = "sha256:f459a49fe1085a749f15414ca76f61595f1a2cc8778ed7c279b6ca2e1fd19df3", size = 5587844, upload-time = "2025-11-06T22:07:45.033Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/16/dd/23eb2db5ad9acae7c845700493b72d3ae214dce0b226f27df89216110f2b/ruff-0.14.2-py3-none-linux_armv6l.whl", hash = "sha256:7cbe4e593505bdec5884c2d0a4d791a90301bc23e49a6b1eb642dd85ef9c64f1", size = 12533390, upload-time = "2025-10-23T19:36:18.044Z" }, - { url = "https://files.pythonhosted.org/packages/5a/8c/5f9acff43ddcf3f85130d0146d0477e28ccecc495f9f684f8f7119b74c0d/ruff-0.14.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:8d54b561729cee92f8d89c316ad7a3f9705533f5903b042399b6ae0ddfc62e11", size = 12887187, upload-time = "2025-10-23T19:36:22.664Z" }, - { url = "https://files.pythonhosted.org/packages/99/fa/047646491479074029665022e9f3dc6f0515797f40a4b6014ea8474c539d/ruff-0.14.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5c8753dfa44ebb2cde10ce5b4d2ef55a41fb9d9b16732a2c5df64620dbda44a3", size = 11925177, upload-time = "2025-10-23T19:36:24.778Z" }, - { url = "https://files.pythonhosted.org/packages/15/8b/c44cf7fe6e59ab24a9d939493a11030b503bdc2a16622cede8b7b1df0114/ruff-0.14.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d0bbeffb8d9f4fccf7b5198d566d0bad99a9cb622f1fc3467af96cb8773c9e3", size = 12358285, upload-time = "2025-10-23T19:36:26.979Z" }, - { url = "https://files.pythonhosted.org/packages/45/01/47701b26254267ef40369aea3acb62a7b23e921c27372d127e0f3af48092/ruff-0.14.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7047f0c5a713a401e43a88d36843d9c83a19c584e63d664474675620aaa634a8", size = 12303832, upload-time = "2025-10-23T19:36:29.192Z" }, - { url = "https://files.pythonhosted.org/packages/2d/5c/ae7244ca4fbdf2bee9d6405dcd5bc6ae51ee1df66eb7a9884b77b8af856d/ruff-0.14.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bf8d2f9aa1602599217d82e8e0af7fd33e5878c4d98f37906b7c93f46f9a839", size = 13036995, upload-time = "2025-10-23T19:36:31.861Z" }, - { url = "https://files.pythonhosted.org/packages/27/4c/0860a79ce6fd4c709ac01173f76f929d53f59748d0dcdd662519835dae43/ruff-0.14.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1c505b389e19c57a317cf4b42db824e2fca96ffb3d86766c1c9f8b96d32048a7", size = 14512649, upload-time = "2025-10-23T19:36:33.915Z" }, - { url = "https://files.pythonhosted.org/packages/7f/7f/d365de998069720a3abfc250ddd876fc4b81a403a766c74ff9bde15b5378/ruff-0.14.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a307fc45ebd887b3f26b36d9326bb70bf69b01561950cdcc6c0bdf7bb8e0f7cc", size = 14088182, upload-time = "2025-10-23T19:36:36.983Z" }, - { url = "https://files.pythonhosted.org/packages/6c/ea/d8e3e6b209162000a7be1faa41b0a0c16a133010311edc3329753cc6596a/ruff-0.14.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:61ae91a32c853172f832c2f40bd05fd69f491db7289fb85a9b941ebdd549781a", size = 13599516, upload-time = "2025-10-23T19:36:39.208Z" }, - { url = "https://files.pythonhosted.org/packages/fa/ea/c7810322086db68989fb20a8d5221dd3b79e49e396b01badca07b433ab45/ruff-0.14.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1967e40286f63ee23c615e8e7e98098dedc7301568bd88991f6e544d8ae096", size = 13272690, upload-time = "2025-10-23T19:36:41.453Z" }, - { url = "https://files.pythonhosted.org/packages/a9/39/10b05acf8c45786ef501d454e00937e1b97964f846bf28883d1f9619928a/ruff-0.14.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:2877f02119cdebf52a632d743a2e302dea422bfae152ebe2f193d3285a3a65df", size = 13496497, upload-time = "2025-10-23T19:36:43.61Z" }, - { url = "https://files.pythonhosted.org/packages/59/a1/1f25f8301e13751c30895092485fada29076e5e14264bdacc37202e85d24/ruff-0.14.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e681c5bc777de5af898decdcb6ba3321d0d466f4cb43c3e7cc2c3b4e7b843a05", size = 12266116, upload-time = "2025-10-23T19:36:45.625Z" }, - { url = "https://files.pythonhosted.org/packages/5c/fa/0029bfc9ce16ae78164e6923ef392e5f173b793b26cc39aa1d8b366cf9dc/ruff-0.14.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e21be42d72e224736f0c992cdb9959a2fa53c7e943b97ef5d081e13170e3ffc5", size = 12281345, upload-time = "2025-10-23T19:36:47.618Z" }, - { url = "https://files.pythonhosted.org/packages/a5/ab/ece7baa3c0f29b7683be868c024f0838770c16607bea6852e46b202f1ff6/ruff-0.14.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b8264016f6f209fac16262882dbebf3f8be1629777cf0f37e7aff071b3e9b92e", size = 12629296, upload-time = "2025-10-23T19:36:49.789Z" }, - { url = "https://files.pythonhosted.org/packages/a4/7f/638f54b43f3d4e48c6a68062794e5b367ddac778051806b9e235dfb7aa81/ruff-0.14.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5ca36b4cb4db3067a3b24444463ceea5565ea78b95fe9a07ca7cb7fd16948770", size = 13371610, upload-time = "2025-10-23T19:36:51.882Z" }, - { url = "https://files.pythonhosted.org/packages/8d/35/3654a973ebe5b32e1fd4a08ed2d46755af7267da7ac710d97420d7b8657d/ruff-0.14.2-py3-none-win32.whl", hash = "sha256:41775927d287685e08f48d8eb3f765625ab0b7042cc9377e20e64f4eb0056ee9", size = 12415318, upload-time = "2025-10-23T19:36:53.961Z" }, - { url = "https://files.pythonhosted.org/packages/71/30/3758bcf9e0b6a4193a6f51abf84254aba00887dfa8c20aba18aa366c5f57/ruff-0.14.2-py3-none-win_amd64.whl", hash = "sha256:0df3424aa5c3c08b34ed8ce099df1021e3adaca6e90229273496b839e5a7e1af", size = 13565279, upload-time = "2025-10-23T19:36:56.578Z" }, - { url = "https://files.pythonhosted.org/packages/2e/5d/aa883766f8ef9ffbe6aa24f7192fb71632f31a30e77eb39aa2b0dc4290ac/ruff-0.14.2-py3-none-win_arm64.whl", hash = "sha256:ea9d635e83ba21569fbacda7e78afbfeb94911c9434aff06192d9bc23fd5495a", size = 12554956, upload-time = "2025-10-23T19:36:58.714Z" }, + { url = "https://files.pythonhosted.org/packages/17/b9/67240254166ae1eaa38dec32265e9153ac53645a6c6670ed36ad00722af8/ruff-0.14.4-py3-none-linux_armv6l.whl", hash = "sha256:e6604613ffbcf2297cd5dcba0e0ac9bd0c11dc026442dfbb614504e87c349518", size = 12606781, upload-time = "2025-11-06T22:07:01.841Z" }, + { url = "https://files.pythonhosted.org/packages/46/c8/09b3ab245d8652eafe5256ab59718641429f68681ee713ff06c5c549f156/ruff-0.14.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d99c0b52b6f0598acede45ee78288e5e9b4409d1ce7f661f0fa36d4cbeadf9a4", size = 12946765, upload-time = "2025-11-06T22:07:05.858Z" }, + { url = "https://files.pythonhosted.org/packages/14/bb/1564b000219144bf5eed2359edc94c3590dd49d510751dad26202c18a17d/ruff-0.14.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9358d490ec030f1b51d048a7fd6ead418ed0826daf6149e95e30aa67c168af33", size = 11928120, upload-time = "2025-11-06T22:07:08.023Z" }, + { url = "https://files.pythonhosted.org/packages/a3/92/d5f1770e9988cc0742fefaa351e840d9aef04ec24ae1be36f333f96d5704/ruff-0.14.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81b40d27924f1f02dfa827b9c0712a13c0e4b108421665322218fc38caf615c2", size = 12370877, upload-time = "2025-11-06T22:07:10.015Z" }, + { url = "https://files.pythonhosted.org/packages/e2/29/e9282efa55f1973d109faf839a63235575519c8ad278cc87a182a366810e/ruff-0.14.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f5e649052a294fe00818650712083cddc6cc02744afaf37202c65df9ea52efa5", size = 12408538, upload-time = "2025-11-06T22:07:13.085Z" }, + { url = "https://files.pythonhosted.org/packages/8e/01/930ed6ecfce130144b32d77d8d69f5c610e6d23e6857927150adf5d7379a/ruff-0.14.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa082a8f878deeba955531f975881828fd6afd90dfa757c2b0808aadb437136e", size = 13141942, upload-time = "2025-11-06T22:07:15.386Z" }, + { url = "https://files.pythonhosted.org/packages/6a/46/a9c89b42b231a9f487233f17a89cbef9d5acd538d9488687a02ad288fa6b/ruff-0.14.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1043c6811c2419e39011890f14d0a30470f19d47d197c4858b2787dfa698f6c8", size = 14544306, upload-time = "2025-11-06T22:07:17.631Z" }, + { url = "https://files.pythonhosted.org/packages/78/96/9c6cf86491f2a6d52758b830b89b78c2ae61e8ca66b86bf5a20af73d20e6/ruff-0.14.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a9f3a936ac27fb7c2a93e4f4b943a662775879ac579a433291a6f69428722649", size = 14210427, upload-time = "2025-11-06T22:07:19.832Z" }, + { url = "https://files.pythonhosted.org/packages/71/f4/0666fe7769a54f63e66404e8ff698de1dcde733e12e2fd1c9c6efb689cb5/ruff-0.14.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:95643ffd209ce78bc113266b88fba3d39e0461f0cbc8b55fb92505030fb4a850", size = 13658488, upload-time = "2025-11-06T22:07:22.32Z" }, + { url = "https://files.pythonhosted.org/packages/ee/79/6ad4dda2cfd55e41ac9ed6d73ef9ab9475b1eef69f3a85957210c74ba12c/ruff-0.14.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:456daa2fa1021bc86ca857f43fe29d5d8b3f0e55e9f90c58c317c1dcc2afc7b5", size = 13354908, upload-time = "2025-11-06T22:07:24.347Z" }, + { url = "https://files.pythonhosted.org/packages/b5/60/f0b6990f740bb15c1588601d19d21bcc1bd5de4330a07222041678a8e04f/ruff-0.14.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:f911bba769e4a9f51af6e70037bb72b70b45a16db5ce73e1f72aefe6f6d62132", size = 13587803, upload-time = "2025-11-06T22:07:26.327Z" }, + { url = "https://files.pythonhosted.org/packages/c9/da/eaaada586f80068728338e0ef7f29ab3e4a08a692f92eb901a4f06bbff24/ruff-0.14.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:76158a7369b3979fa878612c623a7e5430c18b2fd1c73b214945c2d06337db67", size = 12279654, upload-time = "2025-11-06T22:07:28.46Z" }, + { url = "https://files.pythonhosted.org/packages/66/d4/b1d0e82cf9bf8aed10a6d45be47b3f402730aa2c438164424783ac88c0ed/ruff-0.14.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f3b8f3b442d2b14c246e7aeca2e75915159e06a3540e2f4bed9f50d062d24469", size = 12357520, upload-time = "2025-11-06T22:07:31.468Z" }, + { url = "https://files.pythonhosted.org/packages/04/f4/53e2b42cc82804617e5c7950b7079d79996c27e99c4652131c6a1100657f/ruff-0.14.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c62da9a06779deecf4d17ed04939ae8b31b517643b26370c3be1d26f3ef7dbde", size = 12719431, upload-time = "2025-11-06T22:07:33.831Z" }, + { url = "https://files.pythonhosted.org/packages/a2/94/80e3d74ed9a72d64e94a7b7706b1c1ebaa315ef2076fd33581f6a1cd2f95/ruff-0.14.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5a443a83a1506c684e98acb8cb55abaf3ef725078be40237463dae4463366349", size = 13464394, upload-time = "2025-11-06T22:07:35.905Z" }, + { url = "https://files.pythonhosted.org/packages/54/1a/a49f071f04c42345c793d22f6cf5e0920095e286119ee53a64a3a3004825/ruff-0.14.4-py3-none-win32.whl", hash = "sha256:643b69cb63cd996f1fc7229da726d07ac307eae442dd8974dbc7cf22c1e18fff", size = 12493429, upload-time = "2025-11-06T22:07:38.43Z" }, + { url = "https://files.pythonhosted.org/packages/bc/22/e58c43e641145a2b670328fb98bc384e20679b5774258b1e540207580266/ruff-0.14.4-py3-none-win_amd64.whl", hash = "sha256:26673da283b96fe35fa0c939bf8411abec47111644aa9f7cfbd3c573fb125d2c", size = 13635380, upload-time = "2025-11-06T22:07:40.496Z" }, + { url = "https://files.pythonhosted.org/packages/30/bd/4168a751ddbbf43e86544b4de8b5c3b7be8d7167a2a5cb977d274e04f0a1/ruff-0.14.4-py3-none-win_arm64.whl", hash = "sha256:dd09c292479596b0e6fec8cd95c65c3a6dc68e9ad17b8f2382130f87ff6a75bb", size = 12663065, upload-time = "2025-11-06T22:07:42.603Z" }, ] [[package]] @@ -5569,14 +5548,14 @@ wheels = [ [[package]] name = "scipy-stubs" -version = "1.16.2.4" +version = "1.16.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "optype", extra = ["numpy"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1f/b1/c806d700fb442f8b04037b1272be303e9b55dea17237002958bd4dd48c47/scipy_stubs-1.16.2.4.tar.gz", hash = "sha256:dc303e0ba2272aa3832660f0e55f7b461ab32e98f452090f3e28a338f3920e67", size = 356403, upload-time = "2025-10-17T03:53:11.714Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/68/c53c3bce6bd069a164015be1be2671c968b526be4af1e85db64c88f04546/scipy_stubs-1.16.3.0.tar.gz", hash = "sha256:d6943c085e47a1ed431309f9ca582b6a206a9db808a036132a0bf01ebc34b506", size = 356462, upload-time = "2025-10-28T22:05:31.198Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f5/d2/596b5f7439c96e6e636db81a2e39a24738ccc6a1363b97e254643070c9c2/scipy_stubs-1.16.2.4-py3-none-any.whl", hash = "sha256:8e47684fe5f8b823e06ec6513e4dbb5ae43a5a064d10d8228b7e3c3d243ec673", size = 557679, upload-time = "2025-10-17T03:53:10.007Z" }, + { url = "https://files.pythonhosted.org/packages/86/1c/0ba7305fa01cfe7a6f1b8c86ccdd1b7a0d43fa9bd769c059995311e291a2/scipy_stubs-1.16.3.0-py3-none-any.whl", hash = "sha256:90e5d82ced2183ef3c5c0a28a77df8cc227458624364fa0ff975ad24fa89d6ad", size = 557713, upload-time = "2025-10-28T22:05:29.454Z" }, ] [[package]] @@ -5743,11 +5722,11 @@ wheels = [ [[package]] name = "sqlglot" -version = "26.33.0" +version = "27.29.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/25/9d/fcd59b4612d5ad1e2257c67c478107f073b19e1097d3bfde2fb517884416/sqlglot-26.33.0.tar.gz", hash = "sha256:2817278779fa51d6def43aa0d70690b93a25c83eb18ec97130fdaf707abc0d73", size = 5353340, upload-time = "2025-07-01T13:09:06.311Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d1/50/766692a83468adb1bde9e09ea524a01719912f6bc4fdb47ec18368320f6e/sqlglot-27.29.0.tar.gz", hash = "sha256:2270899694663acef94fa93497971837e6fadd712f4a98b32aee1e980bc82722", size = 5503507, upload-time = "2025-10-29T13:50:24.594Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/31/8d/f1d9cb5b18e06aa45689fbeaaea6ebab66d5f01d1e65029a8f7657c06be5/sqlglot-26.33.0-py3-none-any.whl", hash = "sha256:031cee20c0c796a83d26d079a47fdce667604df430598c7eabfa4e4dfd147033", size = 477610, upload-time = "2025-07-01T13:09:03.926Z" }, + { url = "https://files.pythonhosted.org/packages/9b/70/20c1912bc0bfebf516d59d618209443b136c58a7cff141afa7cf30969988/sqlglot-27.29.0-py3-none-any.whl", hash = "sha256:9a5ea8ac61826a7763de10cad45a35f0aa9bfcf7b96ee74afb2314de9089e1cb", size = 526060, upload-time = "2025-10-29T13:50:22.061Z" }, ] [[package]] @@ -5761,15 +5740,15 @@ wheels = [ [[package]] name = "starlette" -version = "0.47.2" +version = "0.49.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/04/57/d062573f391d062710d4088fa1369428c38d51460ab6fedff920efef932e/starlette-0.47.2.tar.gz", hash = "sha256:6ae9aa5db235e4846decc1e7b79c4f346adf41e9777aebeb49dfd09bbd7023d8", size = 2583948, upload-time = "2025-07-20T17:31:58.522Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/3f/507c21db33b66fb027a332f2cb3abbbe924cc3a79ced12f01ed8645955c9/starlette-0.49.1.tar.gz", hash = "sha256:481a43b71e24ed8c43b11ea02f5353d77840e01480881b8cb5a26b8cae64a8cb", size = 2654703, upload-time = "2025-10-28T17:34:10.928Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/1f/b876b1f83aef204198a42dc101613fefccb32258e5428b5f9259677864b4/starlette-0.47.2-py3-none-any.whl", hash = "sha256:c5847e96134e5c5371ee9fac6fdf1a67336d5815e09eb2a01fdb57a351ef915b", size = 72984, upload-time = "2025-07-20T17:31:56.738Z" }, + { url = "https://files.pythonhosted.org/packages/51/da/545b75d420bb23b5d494b0517757b351963e974e79933f01e05c929f20a6/starlette-0.49.1-py3-none-any.whl", hash = "sha256:d92ce9f07e4a3caa3ac13a79523bd18e3bc0042bb8ff2d759a8e7dd0e1859875", size = 74175, upload-time = "2025-10-28T17:34:09.13Z" }, ] [[package]] @@ -5862,9 +5841,10 @@ wheels = [ [[package]] name = "tablestore" -version = "6.2.0" +version = "6.3.7" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "aiohttp" }, { name = "certifi" }, { name = "crc32c" }, { name = "flatbuffers" }, @@ -5874,9 +5854,9 @@ dependencies = [ { name = "six" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a1/58/48d65d181a69f7db19f7cdee01d252168fbfbad2d1bb25abed03e6df3b05/tablestore-6.2.0.tar.gz", hash = "sha256:0773e77c00542be1bfebbc3c7a85f72a881c63e4e7df7c5a9793a54144590e68", size = 85942, upload-time = "2025-04-15T12:11:20.655Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/39/47a3ec8e42fe74dd05af1dfed9c3b02b8f8adfdd8656b2c5d4f95f975c9f/tablestore-6.3.7.tar.gz", hash = "sha256:990682dbf6b602f317a2d359b4281dcd054b4326081e7a67b73dbbe95407be51", size = 117440, upload-time = "2025-10-29T02:57:57.415Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9c/da/30451712a769bcf417add8e81163d478a4d668b0e8d489a9d667260d55df/tablestore-6.2.0-py3-none-any.whl", hash = "sha256:6af496d841ab1ff3f78b46abbd87b95a08d89605c51664d2b30933b1d1c5583a", size = 106297, upload-time = "2025-04-15T12:11:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/fe/55/1b24d8c369204a855ac652712f815e88a4909802094e613fe3742a2d80e3/tablestore-6.3.7-py3-none-any.whl", hash = "sha256:38dcc55085912ab2515e183afd4532a58bb628a763590a99fc1bd2a4aba6855c", size = 139041, upload-time = "2025-10-29T02:57:55.727Z" }, ] [[package]] @@ -6088,27 +6068,27 @@ wheels = [ [[package]] name = "ty" -version = "0.0.1a24" +version = "0.0.1a26" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fc/71/a1db0d604be8d0067342e7aad74ab0c7fec6bea20eb33b6a6324baabf45f/ty-0.0.1a24.tar.gz", hash = "sha256:3273c514df5b9954c9928ee93b6a0872d12310ea8de42249a6c197720853e096", size = 4386721, upload-time = "2025-10-23T13:33:29.729Z" } +sdist = { url = "https://files.pythonhosted.org/packages/39/39/b4b4ecb6ca6d7e937fa56f0b92a8f48d7719af8fe55bdbf667638e9f93e2/ty-0.0.1a26.tar.gz", hash = "sha256:65143f8efeb2da1644821b710bf6b702a31ddcf60a639d5a576db08bded91db4", size = 4432154, upload-time = "2025-11-10T18:02:30.142Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/89/21fb275cb676d3480b67fbbf6eb162aec200b4dcb10c7885bffc754dc73f/ty-0.0.1a24-py3-none-linux_armv6l.whl", hash = "sha256:d478cd02278b988d5767df5821a0f03b99ef848f6fc29e8c77f30e859b89c779", size = 8833903, upload-time = "2025-10-23T13:32:53.552Z" }, - { url = "https://files.pythonhosted.org/packages/a2/22/beb127bce67fc2a1f3704b6b39505d77a7078a61becfbe10c5ee7ed9f5d8/ty-0.0.1a24-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:de758790f05f0a3bb396da4c75f770c85ab3a46095ec188b830c916bd5a5bc10", size = 8691210, upload-time = "2025-10-23T13:32:55.706Z" }, - { url = "https://files.pythonhosted.org/packages/39/bd/190f5e934339669191179fa01c60f5a140822dc465f0d4d312985903d109/ty-0.0.1a24-py3-none-macosx_11_0_arm64.whl", hash = "sha256:68f325ddc8cfb7a7883501e5e22f01284c5d5912aaa901d21e477f38edf4e625", size = 8138421, upload-time = "2025-10-23T13:32:58.718Z" }, - { url = "https://files.pythonhosted.org/packages/40/84/f08020dabad1e660957bb641b2ba42fe1e1e87192c234b1fc1fd6fb42cf2/ty-0.0.1a24-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:49a52bbb1f8b0b29ad717d3fd70bd2afe752e991072fd13ff2fc14f03945c849", size = 8419861, upload-time = "2025-10-23T13:33:00.068Z" }, - { url = "https://files.pythonhosted.org/packages/e5/cc/e3812f7c1c2a0dcfb1bf8a5d6a7e5aa807a483a632c0d5734ea50a60a9ae/ty-0.0.1a24-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:12945fe358fb0f73acf0b72a29efcc80da73f8d95cfe7f11a81e4d8d730e7b18", size = 8641443, upload-time = "2025-10-23T13:33:01.887Z" }, - { url = "https://files.pythonhosted.org/packages/e3/8b/3fc047d04afbba4780aba031dc80e06f6e95d888bbddb8fd6da502975cfb/ty-0.0.1a24-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6231e190989798b0860d15a8f225e3a06a6ce442a7083d743eb84f5b4b83b980", size = 8997853, upload-time = "2025-10-23T13:33:03.951Z" }, - { url = "https://files.pythonhosted.org/packages/e0/d9/ae1475d9200ecf6b196a59357ea3e4f4aa00e1d38c9237ca3f267a4a3ef7/ty-0.0.1a24-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:7c6401f4a7532eab63dd7fe015c875792a701ca4b1a44fc0c490df32594e071f", size = 9676864, upload-time = "2025-10-23T13:33:05.744Z" }, - { url = "https://files.pythonhosted.org/packages/cc/d9/abd6849f0601b24d5d5098e47b00dfbdfe44a4f6776f2e54a21005739bdf/ty-0.0.1a24-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:83c69759bfa2a00278aa94210eded35aea599215d16460445cbbf5b36f77c454", size = 9351386, upload-time = "2025-10-23T13:33:07.807Z" }, - { url = "https://files.pythonhosted.org/packages/63/5c/639e0fe3b489c65b12b38385fe5032024756bc07f96cd994d7df3ab579ef/ty-0.0.1a24-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:71146713cb8f804aad2b2e87a8efa7e7df0a5a25aed551af34498bcc2721ae03", size = 9517674, upload-time = "2025-10-23T13:33:09.641Z" }, - { url = "https://files.pythonhosted.org/packages/78/ae/323f373fcf54a883e39ea3fb6f83ed6d1eda6dfd8246462d0cfd81dac781/ty-0.0.1a24-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4836854411059de592f0ecc62193f2b24fc3acbfe6ce6ce0bf2c6d1a5ea9de7", size = 9000468, upload-time = "2025-10-23T13:33:11.51Z" }, - { url = "https://files.pythonhosted.org/packages/14/26/1a4be005aa4326264f0e7ce554844d5ef8afc4c5600b9a38b05671e9ed18/ty-0.0.1a24-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a7f0b8546d27605e09cd0fe08dc28c1d177bf7498316dd11c3bb8ef9440bf2e1", size = 8377164, upload-time = "2025-10-23T13:33:13.504Z" }, - { url = "https://files.pythonhosted.org/packages/73/2f/dcd6b449084e53a2beb536d8721a2517143a2353413b5b323d6eb9a31705/ty-0.0.1a24-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4e2fbf7dce2311127748824e03d9de2279e96ab5713029c3fa58acbaf19b2f51", size = 8672709, upload-time = "2025-10-23T13:33:15.213Z" }, - { url = "https://files.pythonhosted.org/packages/dc/2e/8b3b45d46085a79547e6db5295f42c6b798a0240d34454181e2ca947183c/ty-0.0.1a24-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f35b7f0a65f7e34e59f34173164946c89a4c4b1d1c18cabe662356a35f33efcd", size = 8788732, upload-time = "2025-10-23T13:33:17.347Z" }, - { url = "https://files.pythonhosted.org/packages/cf/c5/7675ff8693ad13044d86d8d4c824caf6bbb00340df05ad93d0e9d1e0338b/ty-0.0.1a24-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:120fe95eaf2a200f531f949e3dd0a9d95ab38915ce388412873eae28c499c0b9", size = 9095693, upload-time = "2025-10-23T13:33:19.836Z" }, - { url = "https://files.pythonhosted.org/packages/62/0b/bdba5d31aa3f0298900675fd355eec63a9c682aa46ef743dbac8f28b4608/ty-0.0.1a24-py3-none-win32.whl", hash = "sha256:d8d8379264a8c14e1f4ca9e117e72df3bf0a0b0ca64c5fd18affbb6142d8662a", size = 8361302, upload-time = "2025-10-23T13:33:21.572Z" }, - { url = "https://files.pythonhosted.org/packages/b4/48/127a45e16c49563df82829542ca64b0bc387591a777df450972bc85957e6/ty-0.0.1a24-py3-none-win_amd64.whl", hash = "sha256:2e826d75bddd958643128c309f6c47673ed6cef2ea5f2b3cd1a1159a1392971a", size = 9039221, upload-time = "2025-10-23T13:33:23.055Z" }, - { url = "https://files.pythonhosted.org/packages/31/67/9161fbb8c1a2005938bdb5ccd4e4c98ee4bea2d262afb777a4b69aa15eb5/ty-0.0.1a24-py3-none-win_arm64.whl", hash = "sha256:2efbfcdc94d306f0d25f3efe2a90c0f953132ca41a1a47d0bae679d11cdb15aa", size = 8514044, upload-time = "2025-10-23T13:33:27.816Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6a/661833ecacc4d994f7e30a7f1307bfd3a4a91392a6b03fb6a018723e75b8/ty-0.0.1a26-py3-none-linux_armv6l.whl", hash = "sha256:09208dca99bb548e9200136d4d42618476bfe1f4d2066511f2c8e2e4dfeced5e", size = 9173869, upload-time = "2025-11-10T18:01:46.012Z" }, + { url = "https://files.pythonhosted.org/packages/66/a8/32ea50f064342de391a7267f84349287e2f1c2eb0ad4811d6110916179d6/ty-0.0.1a26-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:91d12b66c91a1b82e698a2aa73fe043a1a9da83ff0dfd60b970500bee0963b91", size = 8973420, upload-time = "2025-11-10T18:01:49.32Z" }, + { url = "https://files.pythonhosted.org/packages/d1/f6/6659d55940cd5158a6740ae46a65be84a7ee9167738033a9b1259c36eef5/ty-0.0.1a26-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c5bc6dfcea5477c81ad01d6a29ebc9bfcbdb21c34664f79c9e1b84be7aa8f289", size = 8528888, upload-time = "2025-11-10T18:01:51.511Z" }, + { url = "https://files.pythonhosted.org/packages/79/c9/4cbe7295013cc412b4f100b509aaa21982c08c59764a2efa537ead049345/ty-0.0.1a26-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40e5d15635e9918924138e8d3fb1cbf80822dfb8dc36ea8f3e72df598c0c4bea", size = 8801867, upload-time = "2025-11-10T18:01:53.888Z" }, + { url = "https://files.pythonhosted.org/packages/ed/b3/25099b219a6444c4b29f175784a275510c1cd85a23a926d687ab56915027/ty-0.0.1a26-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:86dc147ed0790c7c8fd3f0d6c16c3c5135b01e99c440e89c6ca1e0e592bb6682", size = 8975519, upload-time = "2025-11-10T18:01:56.231Z" }, + { url = "https://files.pythonhosted.org/packages/73/3e/3ad570f4f592cb1d11982dd2c426c90d2aa9f3d38bf77a7e2ce8aa614302/ty-0.0.1a26-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fbe0e07c9d5e624edfc79a468f2ef191f9435581546a5bb6b92713ddc86ad4a6", size = 9331932, upload-time = "2025-11-10T18:01:58.476Z" }, + { url = "https://files.pythonhosted.org/packages/04/fa/62c72eead0302787f9cc0d613fc671107afeecdaf76ebb04db8f91bb9f7e/ty-0.0.1a26-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:0dcebbfe9f24b43d98a078f4a41321ae7b08bea40f5c27d81394b3f54e9f7fb5", size = 9921353, upload-time = "2025-11-10T18:02:00.749Z" }, + { url = "https://files.pythonhosted.org/packages/6c/1f/3b329c4b60d878704e09eb9d05467f911f188e699961c044b75932893e0a/ty-0.0.1a26-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0901b75afc7738224ffc98bbc8ea03a20f167a2a83a4b23a6550115e8b3ddbc6", size = 9700800, upload-time = "2025-11-10T18:02:03.544Z" }, + { url = "https://files.pythonhosted.org/packages/92/24/13fcba20dd86a7c3f83c814279aa3eb6a29c5f1b38a3b3a4a0fd22159189/ty-0.0.1a26-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4788f34d384c132977958d76fef7f274f8d181b22e33933c4d16cff2bb5ca3b9", size = 9728289, upload-time = "2025-11-10T18:02:06.386Z" }, + { url = "https://files.pythonhosted.org/packages/40/7a/798894ff0b948425570b969be35e672693beeb6b852815b7340bc8de1575/ty-0.0.1a26-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b98851c11c560ce63cd972ed9728aa079d9cf40483f2cdcf3626a55849bfe107", size = 9279735, upload-time = "2025-11-10T18:02:09.425Z" }, + { url = "https://files.pythonhosted.org/packages/1a/54/71261cc1b8dc7d3c4ad92a83b4d1681f5cb7ea5965ebcbc53311ae8c6424/ty-0.0.1a26-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c20b4625a20059adecd86fe2c4df87cd6115fea28caee45d3bdcf8fb83d29510", size = 8767428, upload-time = "2025-11-10T18:02:11.956Z" }, + { url = "https://files.pythonhosted.org/packages/8e/07/b248b73a640badba2b301e6845699b7dd241f40a321b9b1bce684d440f70/ty-0.0.1a26-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d9909e96276f8d16382d285db92ae902174cae842aa953003ec0c06642db2f8a", size = 9009170, upload-time = "2025-11-10T18:02:14.878Z" }, + { url = "https://files.pythonhosted.org/packages/f8/35/ec8353f2bb7fd2f41bca6070b29ecb58e2de9af043e649678b8c132d5439/ty-0.0.1a26-py3-none-musllinux_1_2_i686.whl", hash = "sha256:a76d649ceefe9baa9bbae97d217bee076fd8eeb2a961f66f1dff73cc70af4ac8", size = 9119215, upload-time = "2025-11-10T18:02:18.329Z" }, + { url = "https://files.pythonhosted.org/packages/70/48/db49fe1b7e66edf90dc285869043f99c12aacf7a99c36ee760e297bac6d5/ty-0.0.1a26-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:a0ee0f6366bcf70fae114e714d45335cacc8daa936037441e02998a9110b7a29", size = 9398655, upload-time = "2025-11-10T18:02:21.031Z" }, + { url = "https://files.pythonhosted.org/packages/10/f8/d869492bdbb21ae8cf4c99b02f20812bbbf49aa187cfeb387dfaa03036a8/ty-0.0.1a26-py3-none-win32.whl", hash = "sha256:86689b90024810cac7750bf0c6e1652e4b4175a9de7b82b8b1583202aeb47287", size = 8645669, upload-time = "2025-11-10T18:02:23.23Z" }, + { url = "https://files.pythonhosted.org/packages/b4/18/8a907575d2b335afee7556cb92233ebb5efcefe17752fc9dcab21cffb23b/ty-0.0.1a26-py3-none-win_amd64.whl", hash = "sha256:829e6e6dbd7d9d370f97b2398b4804552554bdcc2d298114fed5e2ea06cbc05c", size = 9442975, upload-time = "2025-11-10T18:02:25.68Z" }, + { url = "https://files.pythonhosted.org/packages/e9/22/af92dcfdd84b78dd97ac6b7154d6a763781f04a400140444885c297cc213/ty-0.0.1a26-py3-none-win_arm64.whl", hash = "sha256:b8f431c784d4cf5b4195a3521b2eca9c15902f239b91154cb920da33f943c62b", size = 8958958, upload-time = "2025-11-10T18:02:28.071Z" }, ] [[package]] @@ -6137,11 +6117,11 @@ wheels = [ [[package]] name = "types-awscrt" -version = "0.28.2" +version = "0.28.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/86/65/f92debc7c9ff9e6e51cf1495248f0edd2fa7123461acf5d07ec1688d8ac1/types_awscrt-0.28.2.tar.gz", hash = "sha256:4349b6fc7b1cd9c9eb782701fb213875db89ab1781219c0e947dd7c4d9dcd65e", size = 17438, upload-time = "2025-10-19T06:39:11.202Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f7/6f/d4f2adb086e8f5cd2ae83cf8dbb192057d8b5025120e5b372468292db67f/types_awscrt-0.28.4.tar.gz", hash = "sha256:15929da84802f27019ee8e4484fb1c102e1f6d4cf22eb48688c34a5a86d02eb6", size = 17692, upload-time = "2025-11-11T02:56:53.516Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/27/23/535c2b3492fb31286a6adad45af3367eba3c23edc2fa24824d9526626012/types_awscrt-0.28.2-py3-none-any.whl", hash = "sha256:d08916fa735cfc032e6a8cfdac92785f1c4e88623999b224ea4e6267d5de5fcb", size = 41929, upload-time = "2025-10-19T06:39:10.042Z" }, + { url = "https://files.pythonhosted.org/packages/5e/ae/9acc4adf1d5d7bb7d09b6f9ff5d4d04a72eb64700d104106dd517665cd57/types_awscrt-0.28.4-py3-none-any.whl", hash = "sha256:2d453f9e27583fcc333771b69a5255a5a4e2c52f86e70f65f3c5a6789d3443d0", size = 42307, upload-time = "2025-11-11T02:56:52.231Z" }, ] [[package]] @@ -6355,11 +6335,11 @@ wheels = [ [[package]] name = "types-psutil" -version = "7.0.0.20251001" +version = "7.0.0.20251111" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9e/91/b020f9100b196a1f247cd12575f68dcdad94f032c1e0c42987d7632142ce/types_psutil-7.0.0.20251001.tar.gz", hash = "sha256:60d696200ddae28677e7d88cdebd6e960294e85adefbaafe0f6e5d0e7b4c1963", size = 20469, upload-time = "2025-10-01T03:04:21.292Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1a/ba/4f48c927f38c7a4d6f7ff65cde91c49d28a95a56e00ec19b2813e1e0b1c1/types_psutil-7.0.0.20251111.tar.gz", hash = "sha256:d109ee2da4c0a9b69b8cefc46e195db8cf0fc0200b6641480df71e7f3f51a239", size = 20287, upload-time = "2025-11-11T03:06:37.482Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/99/50f30e0b648e6f583165cb2e535b0256a02a03efa4868cb2f017ad25b3d8/types_psutil-7.0.0.20251001-py3-none-any.whl", hash = "sha256:adc31de8386d31c61bd4123112fd51e2c700c7502a001cad72a3d56ba6b463d1", size = 23164, upload-time = "2025-10-01T03:04:20.089Z" }, + { url = "https://files.pythonhosted.org/packages/fb/bc/b081d10fbd933cdf839109707a693c668a174e2276d64159a582a9cebd3f/types_psutil-7.0.0.20251111-py3-none-any.whl", hash = "sha256:85ba00205dcfa3c73685122e5a360205d2fbc9b56f942b591027bf401ce0cc47", size = 23052, upload-time = "2025-11-11T03:06:36.011Z" }, ] [[package]] @@ -6407,11 +6387,11 @@ wheels = [ [[package]] name = "types-python-dateutil" -version = "2.9.0.20251008" +version = "2.9.0.20251108" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fc/83/24ed25dd0c6277a1a170c180ad9eef5879ecc9a4745b58d7905a4588c80d/types_python_dateutil-2.9.0.20251008.tar.gz", hash = "sha256:c3826289c170c93ebd8360c3485311187df740166dbab9dd3b792e69f2bc1f9c", size = 16128, upload-time = "2025-10-08T02:51:34.93Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/42/18dff855130c3551d2b5159165bd24466f374dcb78670e5259d2ed51f55c/types_python_dateutil-2.9.0.20251108.tar.gz", hash = "sha256:d8a6687e197f2fa71779ce36176c666841f811368710ab8d274b876424ebfcaa", size = 16220, upload-time = "2025-11-08T02:55:53.393Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/da/af/5d24b8d49ef358468ecfdff5c556adf37f4fd28e336b96f923661a808329/types_python_dateutil-2.9.0.20251008-py3-none-any.whl", hash = "sha256:b9a5232c8921cf7661b29c163ccc56055c418ab2c6eabe8f917cbcc73a4c4157", size = 17934, upload-time = "2025-10-08T02:51:33.55Z" }, + { url = "https://files.pythonhosted.org/packages/25/dd/9fb1f5ef742cab1ea390582f407c967677704d2f5362b48c09de0d0dc8d4/types_python_dateutil-2.9.0.20251108-py3-none-any.whl", hash = "sha256:a4a537f0ea7126f8ccc2763eec9aa31ac8609e3c8e530eb2ddc5ee234b3cd764", size = 18127, upload-time = "2025-11-08T02:55:52.291Z" }, ] [[package]] @@ -6425,11 +6405,11 @@ wheels = [ [[package]] name = "types-pytz" -version = "2025.2.0.20250809" +version = "2025.2.0.20251108" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/07/e2/c774f754de26848f53f05defff5bb21dd9375a059d1ba5b5ea943cf8206e/types_pytz-2025.2.0.20250809.tar.gz", hash = "sha256:222e32e6a29bb28871f8834e8785e3801f2dc4441c715cd2082b271eecbe21e5", size = 10876, upload-time = "2025-08-09T03:14:17.453Z" } +sdist = { url = "https://files.pythonhosted.org/packages/40/ff/c047ddc68c803b46470a357454ef76f4acd8c1088f5cc4891cdd909bfcf6/types_pytz-2025.2.0.20251108.tar.gz", hash = "sha256:fca87917836ae843f07129567b74c1929f1870610681b4c92cb86a3df5817bdb", size = 10961, upload-time = "2025-11-08T02:55:57.001Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/db/d0/91c24fe54e565f2344d7a6821e6c6bb099841ef09007ea6321a0bac0f808/types_pytz-2025.2.0.20250809-py3-none-any.whl", hash = "sha256:4f55ed1b43e925cf851a756fe1707e0f5deeb1976e15bf844bcaa025e8fbd0db", size = 10095, upload-time = "2025-08-09T03:14:16.674Z" }, + { url = "https://files.pythonhosted.org/packages/e7/c1/56ef16bf5dcd255155cc736d276efa6ae0a5c26fd685e28f0412a4013c01/types_pytz-2025.2.0.20251108-py3-none-any.whl", hash = "sha256:0f1c9792cab4eb0e46c52f8845c8f77cf1e313cb3d68bf826aa867fe4717d91c", size = 10116, upload-time = "2025-11-08T02:55:56.194Z" }, ] [[package]] @@ -6838,7 +6818,7 @@ wheels = [ [[package]] name = "wandb" -version = "0.22.2" +version = "0.23.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -6852,17 +6832,17 @@ dependencies = [ { name = "sentry-sdk" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c1/a8/680bd77e11a278e6c14a2cb4646e8ab9525b2baaa81c3d12dc0f616aa4aa/wandb-0.22.2.tar.gz", hash = "sha256:510f5a1ac30d16921c36c3b932da852f046641d4aee98a86a7f5ec03a6e95bda", size = 41401439, upload-time = "2025-10-07T19:54:21.88Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/8b/db2d44395c967cd452517311fd6ede5d1e07310769f448358d4874248512/wandb-0.23.0.tar.gz", hash = "sha256:e5f98c61a8acc3ee84583ca78057f64344162ce026b9f71cb06eea44aec27c93", size = 44413921, upload-time = "2025-11-11T21:06:30.737Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/b3/8c637fb594cfd574ce9c9f7d0ac2f2d12742eb38ec59dcbb713beae95343/wandb-0.22.2-py3-none-macosx_12_0_arm64.whl", hash = "sha256:2e29c9fa4462b5411b2cd2175ae33eff4309c91de7c426bca6bc8e7abc7e5dec", size = 18677549, upload-time = "2025-10-07T19:54:00.839Z" }, - { url = "https://files.pythonhosted.org/packages/d3/f3/e309a726eaebddad6b8d9a73a50891e5796962ec8a091bb6a61d31692d1e/wandb-0.22.2-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:c42d594cd7a9da4fd39ecdb0abbc081b61f304123277b2b6c4ba84283956fd21", size = 19715188, upload-time = "2025-10-07T19:54:03.805Z" }, - { url = "https://files.pythonhosted.org/packages/f9/73/fad59910215876008f4781b57d828d1b19b3677c9b46af615e7229746435/wandb-0.22.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5188d84e66d3fd584f3b3ae4d2a70e78f29403c0528e6aecaa4188a1fcf54d8", size = 18463148, upload-time = "2025-10-07T19:54:05.676Z" }, - { url = "https://files.pythonhosted.org/packages/87/11/572c1913b5b92e4c519f735adfae572b46f2d79d99ede63eec0d6a272d6e/wandb-0.22.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88ccd484af9f21cfc127976793c3cf66cfe1acd75bd8cd650086a64e88bac4bf", size = 19908645, upload-time = "2025-10-07T19:54:07.693Z" }, - { url = "https://files.pythonhosted.org/packages/6d/0d/133aa82f5a505ba638b4fda5014cefddfe7f1f6238ef4afc0871ec61c41f/wandb-0.22.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:abf0ed175e791af64110e0a0b99ce02bbbbd1017722bc32d3bc328efb86450cd", size = 18501348, upload-time = "2025-10-07T19:54:10.234Z" }, - { url = "https://files.pythonhosted.org/packages/d0/d5/776203be2601872f01dacc6a5b4274106ec0db7cd3bf2cdb3b741f8fc932/wandb-0.22.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:44e77c56403b90bf3473a7ca3bfc4d42c636b7c0e31a5fb9cd0382f08302f74b", size = 20001756, upload-time = "2025-10-07T19:54:12.452Z" }, - { url = "https://files.pythonhosted.org/packages/30/43/ae3fa46e20b1d9a6508dd9abe716d57205c038ed4661c5c98ace48a60eac/wandb-0.22.2-py3-none-win32.whl", hash = "sha256:44d12bd379dbe15be5ceed6bdf23803d42f648ba0dd111297b4c47a3c7be6dbd", size = 19075950, upload-time = "2025-10-07T19:54:14.892Z" }, - { url = "https://files.pythonhosted.org/packages/09/59/c174321e868205f7a659d1e5ec51f546e62267296d6f4179bb9119294964/wandb-0.22.2-py3-none-win_amd64.whl", hash = "sha256:c95eb221bf316c0872f7ac55071856b9f25f95a2de983ada48acf653ce259386", size = 19075953, upload-time = "2025-10-07T19:54:16.837Z" }, - { url = "https://files.pythonhosted.org/packages/7a/a2/c7c24fda78513cab5686949d8cb36459dbbccbbb4b2b6fc67237ece31a00/wandb-0.22.2-py3-none-win_arm64.whl", hash = "sha256:20d2ab9aa10445aab3d60914a980f002a4f66566e28b0cd156b1e462f0080a0d", size = 17383217, upload-time = "2025-10-07T19:54:19.384Z" }, + { url = "https://files.pythonhosted.org/packages/41/61/a3220c7fa4cadfb2b2a5c09e3fa401787326584ade86d7c1f58bf1cd43bd/wandb-0.23.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:b682ec5e38fc97bd2e868ac7615a0ab4fc6a15220ee1159e87270a5ebb7a816d", size = 18992250, upload-time = "2025-11-11T21:06:03.412Z" }, + { url = "https://files.pythonhosted.org/packages/90/16/e69333cf3d11e7847f424afc6c8ae325e1f6061b2e5118d7a17f41b6525d/wandb-0.23.0-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:ec094eb71b778e77db8c188da19e52c4f96cb9d5b4421d7dc05028afc66fd7e7", size = 20045616, upload-time = "2025-11-11T21:06:07.109Z" }, + { url = "https://files.pythonhosted.org/packages/62/79/42dc6c7bb0b425775fe77f1a3f1a22d75d392841a06b43e150a3a7f2553a/wandb-0.23.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e43f1f04b98c34f407dcd2744cec0a590abce39bed14a61358287f817514a7b", size = 18758848, upload-time = "2025-11-11T21:06:09.832Z" }, + { url = "https://files.pythonhosted.org/packages/b8/94/d6ddb78334996ccfc1179444bfcfc0f37ffd07ee79bb98940466da6f68f8/wandb-0.23.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5847f98cbb3175caf5291932374410141f5bb3b7c25f9c5e562c1988ce0bf5", size = 20231493, upload-time = "2025-11-11T21:06:12.323Z" }, + { url = "https://files.pythonhosted.org/packages/52/4d/0ad6df0e750c19dabd24d2cecad0938964f69a072f05fbdab7281bec2b64/wandb-0.23.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6151355fd922539926e870be811474238c9614b96541773b990f1ce53368aef6", size = 18793473, upload-time = "2025-11-11T21:06:14.967Z" }, + { url = "https://files.pythonhosted.org/packages/f8/da/c2ba49c5573dff93dafc0acce691bb1c3d57361bf834b2f2c58e6193439b/wandb-0.23.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:df62e426e448ebc44269140deb7240df474e743b12d4b1f53b753afde4aa06d4", size = 20332882, upload-time = "2025-11-11T21:06:17.865Z" }, + { url = "https://files.pythonhosted.org/packages/40/65/21bfb10ee5cd93fbcaf794958863c7e05bac4bbeb1cc1b652094aa3743a5/wandb-0.23.0-py3-none-win32.whl", hash = "sha256:6c21d3eadda17aef7df6febdffdddfb0b4835c7754435fc4fe27631724269f5c", size = 19433198, upload-time = "2025-11-11T21:06:21.913Z" }, + { url = "https://files.pythonhosted.org/packages/f1/33/cbe79e66c171204e32cf940c7fdfb8b5f7d2af7a00f301c632f3a38aa84b/wandb-0.23.0-py3-none-win_amd64.whl", hash = "sha256:b50635fa0e16e528bde25715bf446e9153368428634ca7a5dbd7a22c8ae4e915", size = 19433201, upload-time = "2025-11-11T21:06:24.607Z" }, + { url = "https://files.pythonhosted.org/packages/1c/a0/5ecfae12d78ea036a746c071e4c13b54b28d641efbba61d2947c73b3e6f9/wandb-0.23.0-py3-none-win_arm64.whl", hash = "sha256:fa0181b02ce4d1993588f4a728d8b73ae487eb3cb341e6ce01c156be7a98ec72", size = 17678649, upload-time = "2025-11-11T21:06:27.289Z" }, ] [[package]] @@ -6917,7 +6897,7 @@ wheels = [ [[package]] name = "weave" -version = "0.51.59" +version = "0.52.16" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -6925,18 +6905,17 @@ dependencies = [ { name = "eval-type-backport" }, { name = "gql", extra = ["aiohttp", "requests"] }, { name = "jsonschema" }, - { name = "nest-asyncio" }, { name = "packaging" }, { name = "polyfile-weave" }, { name = "pydantic" }, - { name = "rich" }, { name = "sentry-sdk" }, { name = "tenacity" }, + { name = "tzdata", marker = "sys_platform == 'win32'" }, { name = "wandb" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0e/53/1b0350a64837df3e29eda6149a542f3a51e706122086f82547153820e982/weave-0.51.59.tar.gz", hash = "sha256:fad34c0478f3470401274cba8fa2bfd45d14a187db0a5724bd507e356761b349", size = 480572, upload-time = "2025-07-25T22:05:07.458Z" } +sdist = { url = "https://files.pythonhosted.org/packages/be/30/b795b5a857e8a908e68f3ed969587bb2bc63527ef2260f72ac1a6fd983e8/weave-0.52.16.tar.gz", hash = "sha256:7bb8fdce0393007e9c40fb1769d0606bfe55401c4cd13146457ccac4b49c695d", size = 607024, upload-time = "2025-11-07T19:45:30.898Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/bc/fa5ffb887a1ee28109b29c62416c9e0f41da8e75e6871671208b3d42b392/weave-0.51.59-py3-none-any.whl", hash = "sha256:2238578574ecdf6285efdf028c78987769720242ac75b7b84b1dbc59060468ce", size = 612468, upload-time = "2025-07-25T22:05:05.088Z" }, + { url = "https://files.pythonhosted.org/packages/e5/87/a54513796605dfaef2c3c23c2733bcb4b24866a623635c057b2ffdb74052/weave-0.52.16-py3-none-any.whl", hash = "sha256:85985b8cf233032c6d915dfac95b3bcccb1304444d99a6b4a61f3666b58146ce", size = 764366, upload-time = "2025-11-07T19:45:28.878Z" }, ] [[package]] @@ -7154,34 +7133,29 @@ wheels = [ [[package]] name = "zope-event" -version = "6.0" +version = "6.1" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "setuptools" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c2/d8/9c8b0c6bb1db09725395618f68d3b8a08089fca0aed28437500caaf713ee/zope_event-6.0.tar.gz", hash = "sha256:0ebac894fa7c5f8b7a89141c272133d8c1de6ddc75ea4b1f327f00d1f890df92", size = 18731, upload-time = "2025-09-12T07:10:13.551Z" } +sdist = { url = "https://files.pythonhosted.org/packages/46/33/d3eeac228fc14de76615612ee208be2d8a5b5b0fada36bf9b62d6b40600c/zope_event-6.1.tar.gz", hash = "sha256:6052a3e0cb8565d3d4ef1a3a7809336ac519bc4fe38398cb8d466db09adef4f0", size = 18739, upload-time = "2025-11-07T08:05:49.934Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/b5/1abb5a8b443314c978617bf46d5d9ad648bdf21058074e817d7efbb257db/zope_event-6.0-py3-none-any.whl", hash = "sha256:6f0922593407cc673e7d8766b492c519f91bdc99f3080fe43dcec0a800d682a3", size = 6409, upload-time = "2025-09-12T07:10:12.316Z" }, + { url = "https://files.pythonhosted.org/packages/c2/b0/956902e5e1302f8c5d124e219c6bf214e2649f92ad5fce85b05c039a04c9/zope_event-6.1-py3-none-any.whl", hash = "sha256:0ca78b6391b694272b23ec1335c0294cc471065ed10f7f606858fc54566c25a0", size = 6414, upload-time = "2025-11-07T08:05:48.874Z" }, ] [[package]] name = "zope-interface" -version = "8.0.1" +version = "8.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/88/3a/7fcf02178b8fad0a51e67e32765cd039ae505d054d744d76b8c2bbcba5ba/zope_interface-8.0.1.tar.gz", hash = "sha256:eba5610d042c3704a48222f7f7c6ab5b243ed26f917e2bc69379456b115e02d1", size = 253746, upload-time = "2025-09-25T05:55:51.285Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/7d/b5b85e09f87be5f33decde2626347123696fc6d9d655cb16f5a986b60a97/zope_interface-8.1.tar.gz", hash = "sha256:a02ee40770c6a2f3d168a8f71f09b62aec3e4fb366da83f8e849dbaa5b38d12f", size = 253831, upload-time = "2025-11-10T07:56:24.825Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f2/2f/c10c739bcb9b072090c97c2e08533777497190daa19d190d72b4cce9c7cb/zope_interface-8.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4bd01022d2e1bce4a4a4ed9549edb25393c92e607d7daa6deff843f1f68b479d", size = 207903, upload-time = "2025-09-25T05:58:21.671Z" }, - { url = "https://files.pythonhosted.org/packages/b5/e1/9845ac3697f108d9a1af6912170c59a23732090bbfb35955fe77e5544955/zope_interface-8.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:29be8db8b712d94f1c05e24ea230a879271d787205ba1c9a6100d1d81f06c69a", size = 208345, upload-time = "2025-09-25T05:58:24.217Z" }, - { url = "https://files.pythonhosted.org/packages/f2/49/6573bc8b841cfab18e80c8e8259f1abdbbf716140011370de30231be79ad/zope_interface-8.0.1-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:51ae1b856565b30455b7879fdf0a56a88763b401d3f814fa9f9542d7410dbd7e", size = 255027, upload-time = "2025-09-25T05:58:19.975Z" }, - { url = "https://files.pythonhosted.org/packages/e2/fd/908b0fd4b1ab6e412dfac9bd2b606f2893ef9ba3dd36d643f5e5b94c57b3/zope_interface-8.0.1-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d2e7596149cb1acd1d4d41b9f8fe2ffc0e9e29e2e91d026311814181d0d9efaf", size = 259800, upload-time = "2025-09-25T05:58:11.487Z" }, - { url = "https://files.pythonhosted.org/packages/dc/78/8419a2b4e88410520ed4b7f93bbd25a6d4ae66c4e2b131320f2b90f43077/zope_interface-8.0.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b2737c11c34fb9128816759864752d007ec4f987b571c934c30723ed881a7a4f", size = 260978, upload-time = "2025-09-25T06:26:24.483Z" }, - { url = "https://files.pythonhosted.org/packages/e5/90/caf68152c292f1810e2bd3acd2177badf08a740aa8a348714617d6c9ad0b/zope_interface-8.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:cf66e4bf731aa7e0ced855bb3670e8cda772f6515a475c6a107bad5cb6604103", size = 212155, upload-time = "2025-09-25T05:59:40.318Z" }, - { url = "https://files.pythonhosted.org/packages/dc/a6/0f08713ddda834c428ebf97b2a7fd8dea50c0100065a8955924dbd94dae8/zope_interface-8.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:115f27c1cc95ce7a517d960ef381beedb0a7ce9489645e80b9ab3cbf8a78799c", size = 208609, upload-time = "2025-09-25T05:58:53.698Z" }, - { url = "https://files.pythonhosted.org/packages/e9/5e/d423045f54dc81e0991ec655041e7a0eccf6b2642535839dd364b35f4d7f/zope_interface-8.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af655c573b84e3cb6a4f6fd3fbe04e4dc91c63c6b6f99019b3713ef964e589bc", size = 208797, upload-time = "2025-09-25T05:58:56.258Z" }, - { url = "https://files.pythonhosted.org/packages/c6/43/39d4bb3f7a80ebd261446792493cfa4e198badd47107224f5b6fe1997ad9/zope_interface-8.0.1-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:23f82ef9b2d5370750cc1bf883c3b94c33d098ce08557922a3fbc7ff3b63dfe1", size = 259242, upload-time = "2025-09-25T05:58:21.602Z" }, - { url = "https://files.pythonhosted.org/packages/da/29/49effcff64ef30731e35520a152a9dfcafec86cf114b4c2aff942e8264ba/zope_interface-8.0.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:35a1565d5244997f2e629c5c68715b3d9d9036e8df23c4068b08d9316dcb2822", size = 264696, upload-time = "2025-09-25T05:58:13.351Z" }, - { url = "https://files.pythonhosted.org/packages/c7/39/b947673ec9a258eeaa20208dd2f6127d9fbb3e5071272a674ebe02063a78/zope_interface-8.0.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:029ea1db7e855a475bf88d9910baab4e94d007a054810e9007ac037a91c67c6f", size = 264229, upload-time = "2025-09-25T06:26:26.226Z" }, - { url = "https://files.pythonhosted.org/packages/8f/ee/eed6efd1fc3788d1bef7a814e0592d8173b7fe601c699b935009df035fc2/zope_interface-8.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0beb3e7f7dc153944076fcaf717a935f68d39efa9fce96ec97bafcc0c2ea6cab", size = 212270, upload-time = "2025-09-25T05:58:53.584Z" }, + { url = "https://files.pythonhosted.org/packages/dd/a5/92e53d4d67c127d3ed0e002b90e758a28b4dacb9d81da617c3bae28d2907/zope_interface-8.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:db263a60c728c86e6a74945f3f74cfe0ede252e726cf71e05a0c7aca8d9d5432", size = 207891, upload-time = "2025-11-10T07:58:53.189Z" }, + { url = "https://files.pythonhosted.org/packages/b3/76/a100cc050aa76df9bcf8bbd51000724465e2336fd4c786b5904c6c6dfc55/zope_interface-8.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cfa89e5b05b7a79ab34e368293ad008321231e321b3ce4430487407b4fe3450a", size = 208335, upload-time = "2025-11-10T07:58:54.232Z" }, + { url = "https://files.pythonhosted.org/packages/ab/ae/37c3e964c599c57323e02ca92a6bf81b4bc9848b88fb5eb3f6fc26320af2/zope_interface-8.1-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:87eaf011912a06ef86da70aba2ca0ddb68b8ab84a7d1da6b144a586b70a61bca", size = 255011, upload-time = "2025-11-10T07:58:30.304Z" }, + { url = "https://files.pythonhosted.org/packages/b6/9b/b693b6021d83177db2f5237fc3917921c7f497bac9a062eba422435ee172/zope_interface-8.1-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10f06d128f1c181ded3af08c5004abcb3719c13a976ce9163124e7eeded6899a", size = 259780, upload-time = "2025-11-10T07:58:33.306Z" }, + { url = "https://files.pythonhosted.org/packages/c3/e2/0d1783563892ad46fedd0b1369e8d60ff8fcec0cd6859ab2d07e36f4f0ff/zope_interface-8.1-cp311-cp311-win_amd64.whl", hash = "sha256:17fb5382a4b9bd2ea05648a457c583e5a69f0bfa3076ed1963d48bc42a2da81f", size = 212143, upload-time = "2025-11-10T07:59:56.744Z" }, + { url = "https://files.pythonhosted.org/packages/02/6f/0bfb2beb373b7ca1c3d12807678f20bac1a07f62892f84305a1b544da785/zope_interface-8.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a8aee385282ab2a9813171b15f41317e22ab0a96cf05e9e9e16b29f4af8b6feb", size = 208596, upload-time = "2025-11-10T07:58:09.945Z" }, + { url = "https://files.pythonhosted.org/packages/49/50/169981a42812a2e21bc33fb48640ad01a790ed93c179a9854fe66f901641/zope_interface-8.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af651a87f950a13e45fd49510111f582717fb106a63d6a0c2d3ba86b29734f07", size = 208787, upload-time = "2025-11-10T07:58:11.4Z" }, + { url = "https://files.pythonhosted.org/packages/f8/fb/cb9cb9591a7c78d0878b280b3d3cea42ec17c69c2219b655521b9daa36e8/zope_interface-8.1-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:80ed7683cf337f3b295e4b96153e2e87f12595c218323dc237c0147a6cc9da26", size = 259224, upload-time = "2025-11-10T07:58:31.882Z" }, + { url = "https://files.pythonhosted.org/packages/18/28/aa89afcefbb93b26934bb5cf030774804b267de2d9300f8bd8e0c6f20bc4/zope_interface-8.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb9a7a45944b28c16d25df7a91bf2b9bdb919fa2b9e11782366a1e737d266ec1", size = 264671, upload-time = "2025-11-10T07:58:36.283Z" }, + { url = "https://files.pythonhosted.org/packages/de/7a/9cea2b9e64d74f27484c59b9a42d6854506673eb0b90c3b8cd088f652d5b/zope_interface-8.1-cp312-cp312-win_amd64.whl", hash = "sha256:fc5e120e3618741714c474b2427d08d36bd292855208b4397e325bd50d81439d", size = 212257, upload-time = "2025-11-10T07:59:54.691Z" }, ] [[package]] diff --git a/dev/basedpyright-check b/dev/basedpyright-check index 1c87b27d6f..1b3d1df7ad 100755 --- a/dev/basedpyright-check +++ b/dev/basedpyright-check @@ -8,9 +8,14 @@ cd "$SCRIPT_DIR/.." # Get the path argument if provided PATH_TO_CHECK="$1" -# run basedpyright checks -if [ -n "$PATH_TO_CHECK" ]; then - uv run --directory api --dev -- basedpyright --threads $(nproc) "$PATH_TO_CHECK" -else - uv run --directory api --dev -- basedpyright --threads $(nproc) -fi +# Determine CPU core count based on OS +CPU_CORES=$( + if [[ "$(uname -s)" == "Darwin" ]]; then + sysctl -n hw.ncpu 2>/dev/null + else + nproc + fi +) + +# Run basedpyright checks +uv run --directory api --dev -- basedpyright --threads "$CPU_CORES" $PATH_TO_CHECK diff --git a/dev/start-web b/dev/start-web new file mode 100755 index 0000000000..dc06d6a59f --- /dev/null +++ b/dev/start-web @@ -0,0 +1,8 @@ +#!/bin/bash + +set -x + +SCRIPT_DIR="$(dirname "$(realpath "$0")")" +cd "$SCRIPT_DIR/../web" + +pnpm install && pnpm build && pnpm start diff --git a/dev/start-worker b/dev/start-worker index 0c9b55a6fb..a01da11d86 100755 --- a/dev/start-worker +++ b/dev/start-worker @@ -11,6 +11,7 @@ show_help() { echo " -c, --concurrency NUM Number of worker processes (default: 1)" echo " -P, --pool POOL Pool implementation (default: gevent)" echo " --loglevel LEVEL Log level (default: INFO)" + echo " -e, --env-file FILE Path to an env file to source before starting" echo " -h, --help Show this help message" echo "" echo "Examples:" @@ -26,12 +27,16 @@ show_help() { echo " workflow_sandbox - Sandbox tier workflows (cloud edition)" echo " schedule_poller - Schedule polling tasks" echo " schedule_executor - Schedule execution tasks" - echo " generation - Content generation tasks" echo " mail - Email notifications" echo " ops_trace - Operations tracing" echo " app_deletion - Application cleanup" echo " plugin - Plugin operations" echo " workflow_storage - Workflow storage tasks" + echo " conversation - Conversation tasks" + echo " priority_pipeline - High priority pipeline tasks" + echo " pipeline - Standard pipeline tasks" + echo " triggered_workflow_dispatcher - Trigger dispatcher tasks" + echo " trigger_refresh_executor - Trigger refresh tasks" } # Parse command line arguments @@ -40,6 +45,8 @@ CONCURRENCY=1 POOL="gevent" LOGLEVEL="INFO" +ENV_FILE="" + while [[ $# -gt 0 ]]; do case $1 in -q|--queues) @@ -58,6 +65,10 @@ while [[ $# -gt 0 ]]; do LOGLEVEL="$2" shift 2 ;; + -e|--env-file) + ENV_FILE="$2" + shift 2 + ;; -h|--help) show_help exit 0 @@ -73,6 +84,19 @@ done SCRIPT_DIR="$(dirname "$(realpath "$0")")" cd "$SCRIPT_DIR/.." +if [[ -n "${ENV_FILE}" ]]; then + if [[ ! -f "${ENV_FILE}" ]]; then + echo "Env file ${ENV_FILE} not found" + exit 1 + fi + + echo "Loading environment variables from ${ENV_FILE}" + # Export everything sourced from the env file + set -a + source "${ENV_FILE}" + set +a +fi + # If no queues specified, use edition-based defaults if [[ -z "${QUEUES}" ]]; then # Get EDITION from environment, default to SELF_HOSTED (community edition) @@ -81,12 +105,12 @@ if [[ -z "${QUEUES}" ]]; then # Configure queues based on edition if [[ "${EDITION}" == "CLOUD" ]]; then # Cloud edition: separate queues for dataset and trigger tasks - QUEUES="dataset,generation,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,priority_pipeline,pipeline" + QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor" else # Community edition (SELF_HOSTED): dataset and workflow have separate queues - QUEUES="dataset,generation,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,priority_pipeline,pipeline" + QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor" fi - + echo "No queues specified, using edition-based defaults: ${QUEUES}" else echo "Using specified queues: ${QUEUES}" diff --git a/docker/.env.example b/docker/.env.example index 53363932af..7e2e9aa26d 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -24,6 +24,11 @@ CONSOLE_WEB_URL= # Example: https://api.dify.ai SERVICE_API_URL= +# Trigger external URL +# used to display trigger endpoint API Base URL to the front-end. +# Example: https://api.dify.ai +TRIGGER_URL=http://localhost + # WebApp API backend Url, # used to declare the back-end URL for the front-end API. # If empty, it is the same domain. @@ -149,6 +154,12 @@ DIFY_PORT=5001 SERVER_WORKER_AMOUNT=1 # Defaults to gevent. If using windows, it can be switched to sync or solo. +# +# Warning: Changing this parameter requires disabling patching for +# psycopg2 and gRPC (see `gunicorn.conf.py` and `celery_entrypoint.py`). +# Modifying it may also decrease throughput. +# +# It is strongly discouraged to change this parameter. SERVER_WORKER_CLASS=gevent # Default number of worker connections, the default is 10. @@ -156,6 +167,12 @@ SERVER_WORKER_CONNECTIONS=10 # Similar to SERVER_WORKER_CLASS. # If using windows, it can be switched to sync or solo. +# +# Warning: Changing this parameter requires disabling patching for +# psycopg2 and gRPC (see `gunicorn_conf.py` and `celery_entrypoint.py`). +# Modifying it may also decrease throughput. +# +# It is strongly discouraged to change this parameter. CELERY_WORKER_CLASS= # Request handling timeout. The default is 200, @@ -207,15 +224,20 @@ NEXT_PUBLIC_ENABLE_SINGLE_DOLLAR_LATEX=false # ------------------------------ # Database Configuration -# The database uses PostgreSQL. Please use the public schema. -# It is consistent with the configuration in the 'db' service below. +# The database uses PostgreSQL or MySQL. OceanBase and seekdb are also supported. Please use the public schema. +# It is consistent with the configuration in the database service below. +# You can adjust the database configuration according to your needs. # ------------------------------ +# Database type, supported values are `postgresql` and `mysql` +DB_TYPE=postgresql + DB_USERNAME=postgres DB_PASSWORD=difyai123456 -DB_HOST=db +DB_HOST=db_postgres DB_PORT=5432 DB_DATABASE=dify + # The size of the database connection pool. # The default is 30 connections, which can be appropriately increased. SQLALCHEMY_POOL_SIZE=30 @@ -277,6 +299,29 @@ POSTGRES_STATEMENT_TIMEOUT=0 # A value of 0 prevents the server from terminating idle sessions. POSTGRES_IDLE_IN_TRANSACTION_SESSION_TIMEOUT=0 +# MySQL Performance Configuration +# Maximum number of connections to MySQL +# +# Default is 1000 +MYSQL_MAX_CONNECTIONS=1000 + +# InnoDB buffer pool size +# Default is 512M +# Recommended value: 70-80% of available memory for dedicated MySQL server +# Reference: https://dev.mysql.com/doc/refman/8.0/en/innodb-parameters.html#sysvar_innodb_buffer_pool_size +MYSQL_INNODB_BUFFER_POOL_SIZE=512M + +# InnoDB log file size +# Default is 128M +# Reference: https://dev.mysql.com/doc/refman/8.0/en/innodb-parameters.html#sysvar_innodb_log_file_size +MYSQL_INNODB_LOG_FILE_SIZE=128M + +# InnoDB flush log at transaction commit +# Default is 2 (flush to OS cache, sync every second) +# Options: 0 (no flush), 1 (flush and sync), 2 (flush to OS cache) +# Reference: https://dev.mysql.com/doc/refman/8.0/en/innodb-parameters.html#sysvar_innodb_flush_log_at_trx_commit +MYSQL_INNODB_FLUSH_LOG_AT_TRX_COMMIT=2 + # ------------------------------ # Redis Configuration # This Redis configuration is used for caching and for pub/sub during conversation. @@ -348,6 +393,10 @@ WEB_API_CORS_ALLOW_ORIGINS=* # Specifies the allowed origins for cross-origin requests to the console API, # e.g. https://cloud.dify.ai or * for all origins. CONSOLE_CORS_ALLOW_ORIGINS=* +# When the frontend and backend run on different subdomains, set COOKIE_DOMAIN to the site’s top-level domain (e.g., `example.com`). Leading dots are optional. +COOKIE_DOMAIN= +# When the frontend and backend run on different subdomains, set NEXT_PUBLIC_COOKIE_DOMAIN=1. +NEXT_PUBLIC_COOKIE_DOMAIN= # ------------------------------ # File Storage Configuration @@ -467,7 +516,7 @@ SUPABASE_URL=your-server-url # ------------------------------ # The type of vector store to use. -# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`. +# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`. VECTOR_STORE=weaviate # Prefix used to create collection name in vector database VECTOR_INDEX_NAME_PREFIX=Vector_index @@ -475,6 +524,24 @@ VECTOR_INDEX_NAME_PREFIX=Vector_index # The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`. WEAVIATE_ENDPOINT=http://weaviate:8080 WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih +WEAVIATE_GRPC_ENDPOINT=grpc://weaviate:50051 + +# For OceanBase metadata database configuration, available when `DB_TYPE` is `mysql` and `COMPOSE_PROFILES` includes `oceanbase`. +# For OceanBase vector database configuration, available when `VECTOR_STORE` is `oceanbase` +# If you want to use OceanBase as both vector database and metadata database, you need to set `DB_TYPE` to `mysql`, `COMPOSE_PROFILES` is `oceanbase`, and set Database Configuration is the same as the vector database. +# seekdb is the lite version of OceanBase and shares the connection configuration with OceanBase. +OCEANBASE_VECTOR_HOST=oceanbase +OCEANBASE_VECTOR_PORT=2881 +OCEANBASE_VECTOR_USER=root@test +OCEANBASE_VECTOR_PASSWORD=difyai123456 +OCEANBASE_VECTOR_DATABASE=test +OCEANBASE_CLUSTER_NAME=difyai +OCEANBASE_MEMORY_LIMIT=6G +OCEANBASE_ENABLE_HYBRID_SEARCH=false +# For OceanBase vector database, built-in fulltext parsers are `ngram`, `beng`, `space`, `ngram2`, `ik` +# For OceanBase vector database, external fulltext parsers (require plugin installation) are `japanese_ftparser`, `thai_ftparser` +OCEANBASE_FULLTEXT_PARSER=ik +SEEKDB_MEMORY_LIMIT=2G # The Qdrant endpoint URL. Only available when VECTOR_STORE is `qdrant`. QDRANT_URL=http://qdrant:6333 @@ -681,19 +748,6 @@ LINDORM_PASSWORD=admin LINDORM_USING_UGC=True LINDORM_QUERY_TIMEOUT=1 -# OceanBase Vector configuration, only available when VECTOR_STORE is `oceanbase` -# Built-in fulltext parsers are `ngram`, `beng`, `space`, `ngram2`, `ik` -# External fulltext parsers (require plugin installation) are `japanese_ftparser`, `thai_ftparser` -OCEANBASE_VECTOR_HOST=oceanbase -OCEANBASE_VECTOR_PORT=2881 -OCEANBASE_VECTOR_USER=root@test -OCEANBASE_VECTOR_PASSWORD=difyai123456 -OCEANBASE_VECTOR_DATABASE=test -OCEANBASE_CLUSTER_NAME=difyai -OCEANBASE_MEMORY_LIMIT=6G -OCEANBASE_ENABLE_HYBRID_SEARCH=false -OCEANBASE_FULLTEXT_PARSER=ik - # opengauss configurations, only available when VECTOR_STORE is `opengauss` OPENGAUSS_HOST=opengauss OPENGAUSS_PORT=6600 @@ -745,6 +799,12 @@ UPLOAD_FILE_SIZE_LIMIT=15 # The maximum number of files that can be uploaded at a time, default 5. UPLOAD_FILE_BATCH_LIMIT=5 +# Comma-separated list of file extensions blocked from upload for security reasons. +# Extensions should be lowercase without dots (e.g., exe,bat,sh,dll). +# Empty by default to allow all file types. +# Recommended: exe,bat,cmd,com,scr,vbs,ps1,msi,dll +UPLOAD_FILE_EXTENSION_BLACKLIST= + # ETL type, support: `dify`, `Unstructured` # `dify` Dify's proprietary file extraction scheme # `Unstructured` Unstructured.io file extraction scheme @@ -1011,7 +1071,7 @@ ALLOW_UNSAFE_DATA_SCHEME=false MAX_TREE_DEPTH=50 # ------------------------------ -# Environment Variables for db Service +# Environment Variables for database Service # ------------------------------ # The name of the default postgres user. @@ -1020,9 +1080,19 @@ POSTGRES_USER=${DB_USERNAME} POSTGRES_PASSWORD=${DB_PASSWORD} # The name of the default postgres database. POSTGRES_DB=${DB_DATABASE} -# postgres data directory +# Postgres data directory PGDATA=/var/lib/postgresql/data/pgdata +# MySQL Default Configuration +# The name of the default mysql user. +MYSQL_USERNAME=${DB_USERNAME} +# The password for the default mysql user. +MYSQL_PASSWORD=${DB_PASSWORD} +# The name of the default mysql database. +MYSQL_DATABASE=${DB_DATABASE} +# MySQL data directory +MYSQL_HOST_VOLUME=./volumes/mysql/data + # ------------------------------ # Environment Variables for sandbox Service # ------------------------------ @@ -1182,12 +1252,12 @@ SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS=20 SSRF_POOL_KEEPALIVE_EXPIRY=5.0 # ------------------------------ -# docker env var for specifying vector db type at startup -# (based on the vector db type, the corresponding docker +# docker env var for specifying vector db and metadata db type at startup +# (based on the vector db and metadata db type, the corresponding docker # compose profile will be used) # if you want to use unstructured, add ',unstructured' to the end # ------------------------------ -COMPOSE_PROFILES=${VECTOR_STORE:-weaviate} +COMPOSE_PROFILES=${VECTOR_STORE:-weaviate},${DB_TYPE:-postgresql} # ------------------------------ # Docker Compose Service Expose Host Port Configurations @@ -1253,6 +1323,7 @@ MARKETPLACE_ENABLED=true MARKETPLACE_API_URL=https://marketplace.dify.ai FORCE_VERIFYING_SIGNATURE=true +ENFORCE_LANGGENIUS_PLUGIN_SIGNATURES=true PLUGIN_STDIO_BUFFER_SIZE=1024 PLUGIN_STDIO_MAX_BUFFER_SIZE=5242880 @@ -1336,6 +1407,9 @@ SWAGGER_UI_PATH=/swagger-ui.html # Set to false to export dataset IDs as plain text for easier cross-environment import DSL_EXPORT_ENCRYPT_DATASET_ID=true +# Maximum number of segments for dataset segments API (0 for unlimited) +DATASET_MAX_SEGMENTS_PER_REQUEST=0 + # Celery schedule tasks configuration ENABLE_CLEAN_EMBEDDING_CACHE_TASK=false ENABLE_CLEAN_UNUSED_DATASETS_TASK=false @@ -1349,3 +1423,6 @@ ENABLE_WORKFLOW_SCHEDULE_POLLER_TASK=true WORKFLOW_SCHEDULE_POLLER_INTERVAL=1 WORKFLOW_SCHEDULE_POLLER_BATCH_SIZE=100 WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK=0 + +# Tenant isolated task queue configuration +TENANT_ISOLATED_TASK_CONCURRENCY=1 \ No newline at end of file diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index 3c39343adf..eb0733e414 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -2,7 +2,7 @@ x-shared-env: &shared-api-worker-env services: # API service api: - image: langgenius/dify-api:1.9.2 + image: langgenius/dify-api:1.10.0 restart: always environment: # Use the shared environment variables. @@ -17,8 +17,18 @@ services: PLUGIN_MAX_PACKAGE_SIZE: ${PLUGIN_MAX_PACKAGE_SIZE:-52428800} INNER_API_KEY_FOR_PLUGIN: ${PLUGIN_DIFY_INNER_API_KEY:-QaHbTe77CtuXmsfyhR7+vRjI/+XbV1AaFy691iy+kGDv2Jvy0/eAh8Y1} depends_on: - db: + db_postgres: condition: service_healthy + required: false + db_mysql: + condition: service_healthy + required: false + oceanbase: + condition: service_healthy + required: false + seekdb: + condition: service_healthy + required: false redis: condition: service_started volumes: @@ -31,7 +41,7 @@ services: # worker service # The Celery worker for processing all queues (dataset, workflow, mail, etc.) worker: - image: langgenius/dify-api:1.9.2 + image: langgenius/dify-api:1.10.0 restart: always environment: # Use the shared environment variables. @@ -44,8 +54,18 @@ services: PLUGIN_MAX_PACKAGE_SIZE: ${PLUGIN_MAX_PACKAGE_SIZE:-52428800} INNER_API_KEY_FOR_PLUGIN: ${PLUGIN_DIFY_INNER_API_KEY:-QaHbTe77CtuXmsfyhR7+vRjI/+XbV1AaFy691iy+kGDv2Jvy0/eAh8Y1} depends_on: - db: + db_postgres: condition: service_healthy + required: false + db_mysql: + condition: service_healthy + required: false + oceanbase: + condition: service_healthy + required: false + seekdb: + condition: service_healthy + required: false redis: condition: service_started volumes: @@ -58,7 +78,7 @@ services: # worker_beat service # Celery beat for scheduling periodic tasks. worker_beat: - image: langgenius/dify-api:1.9.2 + image: langgenius/dify-api:1.10.0 restart: always environment: # Use the shared environment variables. @@ -66,8 +86,18 @@ services: # Startup mode, 'worker_beat' starts the Celery beat for scheduling periodic tasks. MODE: beat depends_on: - db: + db_postgres: condition: service_healthy + required: false + db_mysql: + condition: service_healthy + required: false + oceanbase: + condition: service_healthy + required: false + seekdb: + condition: service_healthy + required: false redis: condition: service_started networks: @@ -76,11 +106,12 @@ services: # Frontend web application. web: - image: langgenius/dify-web:1.9.2 + image: langgenius/dify-web:1.10.0 restart: always environment: CONSOLE_API_URL: ${CONSOLE_API_URL:-} APP_API_URL: ${APP_API_URL:-} + NEXT_PUBLIC_COOKIE_DOMAIN: ${NEXT_PUBLIC_COOKIE_DOMAIN:-} SENTRY_DSN: ${WEB_SENTRY_DSN:-} NEXT_TELEMETRY_DISABLED: ${NEXT_TELEMETRY_DISABLED:-0} TEXT_GENERATION_TIMEOUT_MS: ${TEXT_GENERATION_TIMEOUT_MS:-60000} @@ -100,9 +131,12 @@ services: ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true} ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true} ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true} - # The postgres database. - db: + + # The PostgreSQL database. + db_postgres: image: postgres:15-alpine + profiles: + - postgresql restart: always environment: POSTGRES_USER: ${POSTGRES_USER:-postgres} @@ -125,16 +159,46 @@ services: "CMD", "pg_isready", "-h", - "db", + "db_postgres", "-U", "${PGUSER:-postgres}", "-d", - "${POSTGRES_DB:-dify}", + "${DB_DATABASE:-dify}", ] interval: 1s timeout: 3s retries: 60 + # The mysql database. + db_mysql: + image: mysql:8.0 + profiles: + - mysql + restart: always + environment: + MYSQL_ROOT_PASSWORD: ${MYSQL_PASSWORD:-difyai123456} + MYSQL_DATABASE: ${MYSQL_DATABASE:-dify} + command: > + --max_connections=1000 + --innodb_buffer_pool_size=${MYSQL_INNODB_BUFFER_POOL_SIZE:-512M} + --innodb_log_file_size=${MYSQL_INNODB_LOG_FILE_SIZE:-128M} + --innodb_flush_log_at_trx_commit=${MYSQL_INNODB_FLUSH_LOG_AT_TRX_COMMIT:-2} + volumes: + - ${MYSQL_HOST_VOLUME:-./volumes/mysql/data}:/var/lib/mysql + healthcheck: + test: + [ + "CMD", + "mysqladmin", + "ping", + "-u", + "root", + "-p${MYSQL_PASSWORD:-difyai123456}", + ] + interval: 1s + timeout: 3s + retries: 30 + # The redis cache. redis: image: redis:6-alpine @@ -179,7 +243,7 @@ services: # plugin daemon plugin_daemon: - image: langgenius/dify-plugin-daemon:0.3.3-local + image: langgenius/dify-plugin-daemon:0.4.1-local restart: always environment: # Use the shared environment variables. @@ -235,8 +299,18 @@ services: volumes: - ./volumes/plugin_daemon:/app/storage depends_on: - db: + db_postgres: condition: service_healthy + required: false + db_mysql: + condition: service_healthy + required: false + oceanbase: + condition: service_healthy + required: false + seekdb: + condition: service_healthy + required: false # ssrf_proxy server # for more information, please refer to @@ -352,6 +426,63 @@ services: AUTHORIZATION_ADMINLIST_ENABLED: ${WEAVIATE_AUTHORIZATION_ADMINLIST_ENABLED:-true} AUTHORIZATION_ADMINLIST_USERS: ${WEAVIATE_AUTHORIZATION_ADMINLIST_USERS:-hello@dify.ai} + # OceanBase vector database + oceanbase: + image: oceanbase/oceanbase-ce:4.3.5-lts + container_name: oceanbase + profiles: + - oceanbase + restart: always + volumes: + - ./volumes/oceanbase/data:/root/ob + - ./volumes/oceanbase/conf:/root/.obd/cluster + - ./volumes/oceanbase/init.d:/root/boot/init.d + environment: + OB_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G} + OB_SYS_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} + OB_TENANT_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} + OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai} + OB_SERVER_IP: 127.0.0.1 + MODE: mini + LANG: en_US.UTF-8 + ports: + - "${OCEANBASE_VECTOR_PORT:-2881}:2881" + healthcheck: + test: + [ + "CMD-SHELL", + 'obclient -h127.0.0.1 -P2881 -uroot@test -p${OCEANBASE_VECTOR_PASSWORD:-difyai123456} -e "SELECT 1;"', + ] + interval: 10s + retries: 30 + start_period: 30s + timeout: 10s + + # seekdb vector database + seekdb: + image: oceanbase/seekdb:latest + container_name: seekdb + profiles: + - seekdb + restart: always + volumes: + - ./volumes/seekdb:/var/lib/oceanbase + environment: + ROOT_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} + MEMORY_LIMIT: ${SEEKDB_MEMORY_LIMIT:-2G} + REPORTER: dify-ai-seekdb + ports: + - "${OCEANBASE_VECTOR_PORT:-2881}:2881" + healthcheck: + test: + [ + "CMD-SHELL", + 'mysql -h127.0.0.1 -P2881 -uroot -p${OCEANBASE_VECTOR_PASSWORD:-difyai123456} -e "SELECT 1;"', + ] + interval: 5s + retries: 60 + timeout: 5s + # Qdrant vector store. # (if used, you need to set VECTOR_STORE to qdrant in the api & worker service.) qdrant: @@ -487,38 +618,6 @@ services: CHROMA_SERVER_AUTHN_PROVIDER: ${CHROMA_SERVER_AUTHN_PROVIDER:-chromadb.auth.token_authn.TokenAuthenticationServerProvider} IS_PERSISTENT: ${CHROMA_IS_PERSISTENT:-TRUE} - # OceanBase vector database - oceanbase: - image: oceanbase/oceanbase-ce:4.3.5-lts - container_name: oceanbase - profiles: - - oceanbase - restart: always - volumes: - - ./volumes/oceanbase/data:/root/ob - - ./volumes/oceanbase/conf:/root/.obd/cluster - - ./volumes/oceanbase/init.d:/root/boot/init.d - environment: - OB_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G} - OB_SYS_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} - OB_TENANT_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} - OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai} - OB_SERVER_IP: 127.0.0.1 - MODE: mini - LANG: en_US.UTF-8 - ports: - - "${OCEANBASE_VECTOR_PORT:-2881}:2881" - healthcheck: - test: - [ - "CMD-SHELL", - 'obclient -h127.0.0.1 -P2881 -uroot@test -p$${OB_TENANT_PASSWORD} -e "SELECT 1;"', - ] - interval: 10s - retries: 30 - start_period: 30s - timeout: 10s - # Oracle vector database oracle: image: container-registry.oracle.com/database/free:latest diff --git a/docker/docker-compose.middleware.yaml b/docker/docker-compose.middleware.yaml index 0497e9d1f6..b409e3d26d 100644 --- a/docker/docker-compose.middleware.yaml +++ b/docker/docker-compose.middleware.yaml @@ -1,7 +1,10 @@ services: # The postgres database. - db: + db_postgres: image: postgres:15-alpine + profiles: + - "" + - postgresql restart: always env_file: - ./middleware.env @@ -27,7 +30,7 @@ services: "CMD", "pg_isready", "-h", - "db", + "db_postgres", "-U", "${PGUSER:-postgres}", "-d", @@ -37,6 +40,39 @@ services: timeout: 3s retries: 30 + db_mysql: + image: mysql:8.0 + profiles: + - mysql + restart: always + env_file: + - ./middleware.env + environment: + MYSQL_ROOT_PASSWORD: ${MYSQL_PASSWORD:-difyai123456} + MYSQL_DATABASE: ${MYSQL_DATABASE:-dify} + command: > + --max_connections=1000 + --innodb_buffer_pool_size=${MYSQL_INNODB_BUFFER_POOL_SIZE:-512M} + --innodb_log_file_size=${MYSQL_INNODB_LOG_FILE_SIZE:-128M} + --innodb_flush_log_at_trx_commit=${MYSQL_INNODB_FLUSH_LOG_AT_TRX_COMMIT:-2} + volumes: + - ${MYSQL_HOST_VOLUME:-./volumes/mysql/data}:/var/lib/mysql + ports: + - "${EXPOSE_MYSQL_PORT:-3306}:3306" + healthcheck: + test: + [ + "CMD", + "mysqladmin", + "ping", + "-u", + "root", + "-p${MYSQL_PASSWORD:-difyai123456}", + ] + interval: 1s + timeout: 3s + retries: 30 + # The redis cache. redis: image: redis:6-alpine @@ -87,16 +123,12 @@ services: # plugin daemon plugin_daemon: - image: langgenius/dify-plugin-daemon:0.3.3-local + image: langgenius/dify-plugin-daemon:0.4.0-local restart: always env_file: - ./middleware.env environment: # Use the shared environment variables. - DB_HOST: ${DB_HOST:-db} - DB_PORT: ${DB_PORT:-5432} - DB_USERNAME: ${DB_USER:-postgres} - DB_PASSWORD: ${DB_PASSWORD:-difyai123456} DB_DATABASE: ${DB_PLUGIN_DATABASE:-dify_plugin} REDIS_HOST: ${REDIS_HOST:-redis} REDIS_PORT: ${REDIS_PORT:-6379} diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 0ca23667e0..d1e970719c 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -8,6 +8,7 @@ x-shared-env: &shared-api-worker-env CONSOLE_API_URL: ${CONSOLE_API_URL:-} CONSOLE_WEB_URL: ${CONSOLE_WEB_URL:-} SERVICE_API_URL: ${SERVICE_API_URL:-} + TRIGGER_URL: ${TRIGGER_URL:-http://localhost} APP_API_URL: ${APP_API_URL:-} APP_WEB_URL: ${APP_WEB_URL:-} FILES_URL: ${FILES_URL:-} @@ -52,9 +53,10 @@ x-shared-env: &shared-api-worker-env ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true} ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true} NEXT_PUBLIC_ENABLE_SINGLE_DOLLAR_LATEX: ${NEXT_PUBLIC_ENABLE_SINGLE_DOLLAR_LATEX:-false} + DB_TYPE: ${DB_TYPE:-postgresql} DB_USERNAME: ${DB_USERNAME:-postgres} DB_PASSWORD: ${DB_PASSWORD:-difyai123456} - DB_HOST: ${DB_HOST:-db} + DB_HOST: ${DB_HOST:-db_postgres} DB_PORT: ${DB_PORT:-5432} DB_DATABASE: ${DB_DATABASE:-dify} SQLALCHEMY_POOL_SIZE: ${SQLALCHEMY_POOL_SIZE:-30} @@ -71,6 +73,10 @@ x-shared-env: &shared-api-worker-env POSTGRES_EFFECTIVE_CACHE_SIZE: ${POSTGRES_EFFECTIVE_CACHE_SIZE:-4096MB} POSTGRES_STATEMENT_TIMEOUT: ${POSTGRES_STATEMENT_TIMEOUT:-0} POSTGRES_IDLE_IN_TRANSACTION_SESSION_TIMEOUT: ${POSTGRES_IDLE_IN_TRANSACTION_SESSION_TIMEOUT:-0} + MYSQL_MAX_CONNECTIONS: ${MYSQL_MAX_CONNECTIONS:-1000} + MYSQL_INNODB_BUFFER_POOL_SIZE: ${MYSQL_INNODB_BUFFER_POOL_SIZE:-512M} + MYSQL_INNODB_LOG_FILE_SIZE: ${MYSQL_INNODB_LOG_FILE_SIZE:-128M} + MYSQL_INNODB_FLUSH_LOG_AT_TRX_COMMIT: ${MYSQL_INNODB_FLUSH_LOG_AT_TRX_COMMIT:-2} REDIS_HOST: ${REDIS_HOST:-redis} REDIS_PORT: ${REDIS_PORT:-6379} REDIS_USERNAME: ${REDIS_USERNAME:-} @@ -99,6 +105,8 @@ x-shared-env: &shared-api-worker-env CELERY_SENTINEL_SOCKET_TIMEOUT: ${CELERY_SENTINEL_SOCKET_TIMEOUT:-0.1} WEB_API_CORS_ALLOW_ORIGINS: ${WEB_API_CORS_ALLOW_ORIGINS:-*} CONSOLE_CORS_ALLOW_ORIGINS: ${CONSOLE_CORS_ALLOW_ORIGINS:-*} + COOKIE_DOMAIN: ${COOKIE_DOMAIN:-} + NEXT_PUBLIC_COOKIE_DOMAIN: ${NEXT_PUBLIC_COOKIE_DOMAIN:-} STORAGE_TYPE: ${STORAGE_TYPE:-opendal} OPENDAL_SCHEME: ${OPENDAL_SCHEME:-fs} OPENDAL_FS_ROOT: ${OPENDAL_FS_ROOT:-storage} @@ -155,6 +163,17 @@ x-shared-env: &shared-api-worker-env VECTOR_INDEX_NAME_PREFIX: ${VECTOR_INDEX_NAME_PREFIX:-Vector_index} WEAVIATE_ENDPOINT: ${WEAVIATE_ENDPOINT:-http://weaviate:8080} WEAVIATE_API_KEY: ${WEAVIATE_API_KEY:-WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih} + WEAVIATE_GRPC_ENDPOINT: ${WEAVIATE_GRPC_ENDPOINT:-grpc://weaviate:50051} + OCEANBASE_VECTOR_HOST: ${OCEANBASE_VECTOR_HOST:-oceanbase} + OCEANBASE_VECTOR_PORT: ${OCEANBASE_VECTOR_PORT:-2881} + OCEANBASE_VECTOR_USER: ${OCEANBASE_VECTOR_USER:-root@test} + OCEANBASE_VECTOR_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} + OCEANBASE_VECTOR_DATABASE: ${OCEANBASE_VECTOR_DATABASE:-test} + OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai} + OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G} + OCEANBASE_ENABLE_HYBRID_SEARCH: ${OCEANBASE_ENABLE_HYBRID_SEARCH:-false} + OCEANBASE_FULLTEXT_PARSER: ${OCEANBASE_FULLTEXT_PARSER:-ik} + SEEKDB_MEMORY_LIMIT: ${SEEKDB_MEMORY_LIMIT:-2G} QDRANT_URL: ${QDRANT_URL:-http://qdrant:6333} QDRANT_API_KEY: ${QDRANT_API_KEY:-difyai123456} QDRANT_CLIENT_TIMEOUT: ${QDRANT_CLIENT_TIMEOUT:-20} @@ -310,15 +329,6 @@ x-shared-env: &shared-api-worker-env LINDORM_PASSWORD: ${LINDORM_PASSWORD:-admin} LINDORM_USING_UGC: ${LINDORM_USING_UGC:-True} LINDORM_QUERY_TIMEOUT: ${LINDORM_QUERY_TIMEOUT:-1} - OCEANBASE_VECTOR_HOST: ${OCEANBASE_VECTOR_HOST:-oceanbase} - OCEANBASE_VECTOR_PORT: ${OCEANBASE_VECTOR_PORT:-2881} - OCEANBASE_VECTOR_USER: ${OCEANBASE_VECTOR_USER:-root@test} - OCEANBASE_VECTOR_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} - OCEANBASE_VECTOR_DATABASE: ${OCEANBASE_VECTOR_DATABASE:-test} - OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai} - OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G} - OCEANBASE_ENABLE_HYBRID_SEARCH: ${OCEANBASE_ENABLE_HYBRID_SEARCH:-false} - OCEANBASE_FULLTEXT_PARSER: ${OCEANBASE_FULLTEXT_PARSER:-ik} OPENGAUSS_HOST: ${OPENGAUSS_HOST:-opengauss} OPENGAUSS_PORT: ${OPENGAUSS_PORT:-6600} OPENGAUSS_USER: ${OPENGAUSS_USER:-postgres} @@ -351,6 +361,7 @@ x-shared-env: &shared-api-worker-env CLICKZETTA_VECTOR_DISTANCE_FUNCTION: ${CLICKZETTA_VECTOR_DISTANCE_FUNCTION:-cosine_distance} UPLOAD_FILE_SIZE_LIMIT: ${UPLOAD_FILE_SIZE_LIMIT:-15} UPLOAD_FILE_BATCH_LIMIT: ${UPLOAD_FILE_BATCH_LIMIT:-5} + UPLOAD_FILE_EXTENSION_BLACKLIST: ${UPLOAD_FILE_EXTENSION_BLACKLIST:-} ETL_TYPE: ${ETL_TYPE:-dify} UNSTRUCTURED_API_URL: ${UNSTRUCTURED_API_URL:-} UNSTRUCTURED_API_KEY: ${UNSTRUCTURED_API_KEY:-} @@ -428,10 +439,10 @@ x-shared-env: &shared-api-worker-env HTTP_REQUEST_NODE_MAX_BINARY_SIZE: ${HTTP_REQUEST_NODE_MAX_BINARY_SIZE:-10485760} HTTP_REQUEST_NODE_MAX_TEXT_SIZE: ${HTTP_REQUEST_NODE_MAX_TEXT_SIZE:-1048576} HTTP_REQUEST_NODE_SSL_VERIFY: ${HTTP_REQUEST_NODE_SSL_VERIFY:-True} - WEBHOOK_REQUEST_BODY_MAX_SIZE: ${WEBHOOK_REQUEST_BODY_MAX_SIZE:-10485760} HTTP_REQUEST_MAX_CONNECT_TIMEOUT: ${HTTP_REQUEST_MAX_CONNECT_TIMEOUT:-10} HTTP_REQUEST_MAX_READ_TIMEOUT: ${HTTP_REQUEST_MAX_READ_TIMEOUT:-600} HTTP_REQUEST_MAX_WRITE_TIMEOUT: ${HTTP_REQUEST_MAX_WRITE_TIMEOUT:-600} + WEBHOOK_REQUEST_BODY_MAX_SIZE: ${WEBHOOK_REQUEST_BODY_MAX_SIZE:-10485760} RESPECT_XFORWARD_HEADERS_ENABLED: ${RESPECT_XFORWARD_HEADERS_ENABLED:-false} SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-http://ssrf_proxy:3128} SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-http://ssrf_proxy:3128} @@ -446,6 +457,10 @@ x-shared-env: &shared-api-worker-env POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-${DB_PASSWORD}} POSTGRES_DB: ${POSTGRES_DB:-${DB_DATABASE}} PGDATA: ${PGDATA:-/var/lib/postgresql/data/pgdata} + MYSQL_USERNAME: ${MYSQL_USERNAME:-${DB_USERNAME}} + MYSQL_PASSWORD: ${MYSQL_PASSWORD:-${DB_PASSWORD}} + MYSQL_DATABASE: ${MYSQL_DATABASE:-${DB_DATABASE}} + MYSQL_HOST_VOLUME: ${MYSQL_HOST_VOLUME:-./volumes/mysql/data} SANDBOX_API_KEY: ${SANDBOX_API_KEY:-dify-sandbox} SANDBOX_GIN_MODE: ${SANDBOX_GIN_MODE:-release} SANDBOX_WORKER_TIMEOUT: ${SANDBOX_WORKER_TIMEOUT:-15} @@ -546,6 +561,7 @@ x-shared-env: &shared-api-worker-env MARKETPLACE_ENABLED: ${MARKETPLACE_ENABLED:-true} MARKETPLACE_API_URL: ${MARKETPLACE_API_URL:-https://marketplace.dify.ai} FORCE_VERIFYING_SIGNATURE: ${FORCE_VERIFYING_SIGNATURE:-true} + ENFORCE_LANGGENIUS_PLUGIN_SIGNATURES: ${ENFORCE_LANGGENIUS_PLUGIN_SIGNATURES:-true} PLUGIN_STDIO_BUFFER_SIZE: ${PLUGIN_STDIO_BUFFER_SIZE:-1024} PLUGIN_STDIO_MAX_BUFFER_SIZE: ${PLUGIN_STDIO_MAX_BUFFER_SIZE:-5242880} PLUGIN_PYTHON_ENV_INIT_TIMEOUT: ${PLUGIN_PYTHON_ENV_INIT_TIMEOUT:-120} @@ -601,6 +617,7 @@ x-shared-env: &shared-api-worker-env SWAGGER_UI_ENABLED: ${SWAGGER_UI_ENABLED:-true} SWAGGER_UI_PATH: ${SWAGGER_UI_PATH:-/swagger-ui.html} DSL_EXPORT_ENCRYPT_DATASET_ID: ${DSL_EXPORT_ENCRYPT_DATASET_ID:-true} + DATASET_MAX_SEGMENTS_PER_REQUEST: ${DATASET_MAX_SEGMENTS_PER_REQUEST:-0} ENABLE_CLEAN_EMBEDDING_CACHE_TASK: ${ENABLE_CLEAN_EMBEDDING_CACHE_TASK:-false} ENABLE_CLEAN_UNUSED_DATASETS_TASK: ${ENABLE_CLEAN_UNUSED_DATASETS_TASK:-false} ENABLE_CREATE_TIDB_SERVERLESS_TASK: ${ENABLE_CREATE_TIDB_SERVERLESS_TASK:-false} @@ -613,11 +630,12 @@ x-shared-env: &shared-api-worker-env WORKFLOW_SCHEDULE_POLLER_INTERVAL: ${WORKFLOW_SCHEDULE_POLLER_INTERVAL:-1} WORKFLOW_SCHEDULE_POLLER_BATCH_SIZE: ${WORKFLOW_SCHEDULE_POLLER_BATCH_SIZE:-100} WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK: ${WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK:-0} + TENANT_ISOLATED_TASK_CONCURRENCY: ${TENANT_ISOLATED_TASK_CONCURRENCY:-1} services: # API service api: - image: langgenius/dify-api:1.9.2 + image: langgenius/dify-api:1.10.0 restart: always environment: # Use the shared environment variables. @@ -632,8 +650,18 @@ services: PLUGIN_MAX_PACKAGE_SIZE: ${PLUGIN_MAX_PACKAGE_SIZE:-52428800} INNER_API_KEY_FOR_PLUGIN: ${PLUGIN_DIFY_INNER_API_KEY:-QaHbTe77CtuXmsfyhR7+vRjI/+XbV1AaFy691iy+kGDv2Jvy0/eAh8Y1} depends_on: - db: + db_postgres: condition: service_healthy + required: false + db_mysql: + condition: service_healthy + required: false + oceanbase: + condition: service_healthy + required: false + seekdb: + condition: service_healthy + required: false redis: condition: service_started volumes: @@ -644,14 +672,14 @@ services: - default # worker service - # The Celery worker for processing the queue. + # The Celery worker for processing all queues (dataset, workflow, mail, etc.) worker: - image: langgenius/dify-api:1.9.2 + image: langgenius/dify-api:1.10.0 restart: always environment: # Use the shared environment variables. <<: *shared-api-worker-env - # Startup mode, 'worker' starts the Celery worker for processing the queue. + # Startup mode, 'worker' starts the Celery worker for processing all queues. MODE: worker SENTRY_DSN: ${API_SENTRY_DSN:-} SENTRY_TRACES_SAMPLE_RATE: ${API_SENTRY_TRACES_SAMPLE_RATE:-1.0} @@ -659,8 +687,18 @@ services: PLUGIN_MAX_PACKAGE_SIZE: ${PLUGIN_MAX_PACKAGE_SIZE:-52428800} INNER_API_KEY_FOR_PLUGIN: ${PLUGIN_DIFY_INNER_API_KEY:-QaHbTe77CtuXmsfyhR7+vRjI/+XbV1AaFy691iy+kGDv2Jvy0/eAh8Y1} depends_on: - db: + db_postgres: condition: service_healthy + required: false + db_mysql: + condition: service_healthy + required: false + oceanbase: + condition: service_healthy + required: false + seekdb: + condition: service_healthy + required: false redis: condition: service_started volumes: @@ -673,7 +711,7 @@ services: # worker_beat service # Celery beat for scheduling periodic tasks. worker_beat: - image: langgenius/dify-api:1.9.2 + image: langgenius/dify-api:1.10.0 restart: always environment: # Use the shared environment variables. @@ -681,8 +719,18 @@ services: # Startup mode, 'worker_beat' starts the Celery beat for scheduling periodic tasks. MODE: beat depends_on: - db: + db_postgres: condition: service_healthy + required: false + db_mysql: + condition: service_healthy + required: false + oceanbase: + condition: service_healthy + required: false + seekdb: + condition: service_healthy + required: false redis: condition: service_started networks: @@ -691,11 +739,12 @@ services: # Frontend web application. web: - image: langgenius/dify-web:1.9.2 + image: langgenius/dify-web:1.10.0 restart: always environment: CONSOLE_API_URL: ${CONSOLE_API_URL:-} APP_API_URL: ${APP_API_URL:-} + NEXT_PUBLIC_COOKIE_DOMAIN: ${NEXT_PUBLIC_COOKIE_DOMAIN:-} SENTRY_DSN: ${WEB_SENTRY_DSN:-} NEXT_TELEMETRY_DISABLED: ${NEXT_TELEMETRY_DISABLED:-0} TEXT_GENERATION_TIMEOUT_MS: ${TEXT_GENERATION_TIMEOUT_MS:-60000} @@ -715,9 +764,12 @@ services: ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true} ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true} ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true} - # The postgres database. - db: + + # The PostgreSQL database. + db_postgres: image: postgres:15-alpine + profiles: + - postgresql restart: always environment: POSTGRES_USER: ${POSTGRES_USER:-postgres} @@ -740,16 +792,46 @@ services: "CMD", "pg_isready", "-h", - "db", + "db_postgres", "-U", "${PGUSER:-postgres}", "-d", - "${POSTGRES_DB:-dify}", + "${DB_DATABASE:-dify}", ] interval: 1s timeout: 3s retries: 60 + # The mysql database. + db_mysql: + image: mysql:8.0 + profiles: + - mysql + restart: always + environment: + MYSQL_ROOT_PASSWORD: ${MYSQL_PASSWORD:-difyai123456} + MYSQL_DATABASE: ${MYSQL_DATABASE:-dify} + command: > + --max_connections=1000 + --innodb_buffer_pool_size=${MYSQL_INNODB_BUFFER_POOL_SIZE:-512M} + --innodb_log_file_size=${MYSQL_INNODB_LOG_FILE_SIZE:-128M} + --innodb_flush_log_at_trx_commit=${MYSQL_INNODB_FLUSH_LOG_AT_TRX_COMMIT:-2} + volumes: + - ${MYSQL_HOST_VOLUME:-./volumes/mysql/data}:/var/lib/mysql + healthcheck: + test: + [ + "CMD", + "mysqladmin", + "ping", + "-u", + "root", + "-p${MYSQL_PASSWORD:-difyai123456}", + ] + interval: 1s + timeout: 3s + retries: 30 + # The redis cache. redis: image: redis:6-alpine @@ -794,7 +876,7 @@ services: # plugin daemon plugin_daemon: - image: langgenius/dify-plugin-daemon:0.3.3-local + image: langgenius/dify-plugin-daemon:0.4.1-local restart: always environment: # Use the shared environment variables. @@ -850,8 +932,18 @@ services: volumes: - ./volumes/plugin_daemon:/app/storage depends_on: - db: + db_postgres: condition: service_healthy + required: false + db_mysql: + condition: service_healthy + required: false + oceanbase: + condition: service_healthy + required: false + seekdb: + condition: service_healthy + required: false # ssrf_proxy server # for more information, please refer to @@ -967,6 +1059,63 @@ services: AUTHORIZATION_ADMINLIST_ENABLED: ${WEAVIATE_AUTHORIZATION_ADMINLIST_ENABLED:-true} AUTHORIZATION_ADMINLIST_USERS: ${WEAVIATE_AUTHORIZATION_ADMINLIST_USERS:-hello@dify.ai} + # OceanBase vector database + oceanbase: + image: oceanbase/oceanbase-ce:4.3.5-lts + container_name: oceanbase + profiles: + - oceanbase + restart: always + volumes: + - ./volumes/oceanbase/data:/root/ob + - ./volumes/oceanbase/conf:/root/.obd/cluster + - ./volumes/oceanbase/init.d:/root/boot/init.d + environment: + OB_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G} + OB_SYS_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} + OB_TENANT_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} + OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai} + OB_SERVER_IP: 127.0.0.1 + MODE: mini + LANG: en_US.UTF-8 + ports: + - "${OCEANBASE_VECTOR_PORT:-2881}:2881" + healthcheck: + test: + [ + "CMD-SHELL", + 'obclient -h127.0.0.1 -P2881 -uroot@test -p${OCEANBASE_VECTOR_PASSWORD:-difyai123456} -e "SELECT 1;"', + ] + interval: 10s + retries: 30 + start_period: 30s + timeout: 10s + + # seekdb vector database + seekdb: + image: oceanbase/seekdb:latest + container_name: seekdb + profiles: + - seekdb + restart: always + volumes: + - ./volumes/seekdb:/var/lib/oceanbase + environment: + ROOT_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} + MEMORY_LIMIT: ${SEEKDB_MEMORY_LIMIT:-2G} + REPORTER: dify-ai-seekdb + ports: + - "${OCEANBASE_VECTOR_PORT:-2881}:2881" + healthcheck: + test: + [ + "CMD-SHELL", + 'mysql -h127.0.0.1 -P2881 -uroot -p${OCEANBASE_VECTOR_PASSWORD:-difyai123456} -e "SELECT 1;"', + ] + interval: 5s + retries: 60 + timeout: 5s + # Qdrant vector store. # (if used, you need to set VECTOR_STORE to qdrant in the api & worker service.) qdrant: @@ -1102,38 +1251,6 @@ services: CHROMA_SERVER_AUTHN_PROVIDER: ${CHROMA_SERVER_AUTHN_PROVIDER:-chromadb.auth.token_authn.TokenAuthenticationServerProvider} IS_PERSISTENT: ${CHROMA_IS_PERSISTENT:-TRUE} - # OceanBase vector database - oceanbase: - image: oceanbase/oceanbase-ce:4.3.5-lts - container_name: oceanbase - profiles: - - oceanbase - restart: always - volumes: - - ./volumes/oceanbase/data:/root/ob - - ./volumes/oceanbase/conf:/root/.obd/cluster - - ./volumes/oceanbase/init.d:/root/boot/init.d - environment: - OB_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G} - OB_SYS_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} - OB_TENANT_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} - OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai} - OB_SERVER_IP: 127.0.0.1 - MODE: mini - LANG: en_US.UTF-8 - ports: - - "${OCEANBASE_VECTOR_PORT:-2881}:2881" - healthcheck: - test: - [ - "CMD-SHELL", - 'obclient -h127.0.0.1 -P2881 -uroot@test -p$${OB_TENANT_PASSWORD} -e "SELECT 1;"', - ] - interval: 10s - retries: 30 - start_period: 30s - timeout: 10s - # Oracle vector database oracle: image: container-registry.oracle.com/database/free:latest diff --git a/docker/middleware.env.example b/docker/middleware.env.example index 24629c2d89..3374ddd537 100644 --- a/docker/middleware.env.example +++ b/docker/middleware.env.example @@ -1,11 +1,21 @@ # ------------------------------ # Environment Variables for db Service # ------------------------------ -POSTGRES_USER=postgres +# Database Configuration +# Database type, supported values are `postgresql` and `mysql` +DB_TYPE=postgresql +DB_USERNAME=postgres +DB_PASSWORD=difyai123456 +DB_HOST=db_postgres +DB_PORT=5432 +DB_DATABASE=dify + +# PostgreSQL Configuration +POSTGRES_USER=${DB_USERNAME} # The password for the default postgres user. -POSTGRES_PASSWORD=difyai123456 +POSTGRES_PASSWORD=${DB_PASSWORD} # The name of the default postgres database. -POSTGRES_DB=dify +POSTGRES_DB=${DB_DATABASE} # postgres data directory PGDATA=/var/lib/postgresql/data/pgdata PGDATA_HOST_VOLUME=./volumes/db/data @@ -54,6 +64,37 @@ POSTGRES_STATEMENT_TIMEOUT=0 # A value of 0 prevents the server from terminating idle sessions. POSTGRES_IDLE_IN_TRANSACTION_SESSION_TIMEOUT=0 +# MySQL Configuration +MYSQL_USERNAME=${DB_USERNAME} +# MySQL password +MYSQL_PASSWORD=${DB_PASSWORD} +# MySQL database name +MYSQL_DATABASE=${DB_DATABASE} +# MySQL data directory host volume +MYSQL_HOST_VOLUME=./volumes/mysql/data + +# MySQL Performance Configuration +# Maximum number of connections to MySQL +# Default is 1000 +MYSQL_MAX_CONNECTIONS=1000 + +# InnoDB buffer pool size +# Default is 512M +# Recommended value: 70-80% of available memory for dedicated MySQL server +# Reference: https://dev.mysql.com/doc/refman/8.0/en/innodb-parameters.html#sysvar_innodb_buffer_pool_size +MYSQL_INNODB_BUFFER_POOL_SIZE=512M + +# InnoDB log file size +# Default is 128M +# Reference: https://dev.mysql.com/doc/refman/8.0/en/innodb-parameters.html#sysvar_innodb_log_file_size +MYSQL_INNODB_LOG_FILE_SIZE=128M + +# InnoDB flush log at transaction commit +# Default is 2 (flush to OS cache, sync every second) +# Options: 0 (no flush), 1 (flush and sync), 2 (flush to OS cache) +# Reference: https://dev.mysql.com/doc/refman/8.0/en/innodb-parameters.html#sysvar_innodb_flush_log_at_trx_commit +MYSQL_INNODB_FLUSH_LOG_AT_TRX_COMMIT=2 + # ----------------------------- # Environment Variables for redis Service # ----------------------------- @@ -97,6 +138,7 @@ WEAVIATE_HOST_VOLUME=./volumes/weaviate # Docker Compose Service Expose Host Port Configurations # ------------------------------ EXPOSE_POSTGRES_PORT=5432 +EXPOSE_MYSQL_PORT=3306 EXPOSE_REDIS_PORT=6379 EXPOSE_SANDBOX_PORT=8194 EXPOSE_SSRF_PROXY_PORT=3128 diff --git a/docker/tidb/docker-compose.yaml b/docker/tidb/docker-compose.yaml index fa15770175..9db6922108 100644 --- a/docker/tidb/docker-compose.yaml +++ b/docker/tidb/docker-compose.yaml @@ -55,7 +55,8 @@ services: - ./volumes/data:/data - ./volumes/logs:/logs command: - - --config=/tiflash.toml + - server + - --config-file=/tiflash.toml depends_on: - "tikv" - "tidb" diff --git a/docs/hi-IN/CONTRIBUTING.md b/docs/hi-IN/CONTRIBUTING.md new file mode 100644 index 0000000000..5c1ea4f8fd --- /dev/null +++ b/docs/hi-IN/CONTRIBUTING.md @@ -0,0 +1,101 @@ +# योगदान (CONTRIBUTING) + +तो आप Dify में योगदान देना चाहते हैं — यह शानदार है, हम उत्सुक हैं यह देखने के लिए कि आप क्या बनाते हैं। सीमित टीम और फंडिंग वाले एक स्टार्टअप के रूप में, हमारा बड़ा लक्ष्य LLM एप्लिकेशनों के निर्माण और प्रबंधन के लिए सबसे सहज वर्कफ़्लो डिज़ाइन करना है। समुदाय से मिलने वाली कोई भी मदद वास्तव में मायने रखती है। + +हमारे वर्तमान चरण को देखते हुए हमें तेज़ी से काम करना और डिलीवर करना होता है, लेकिन हम यह भी सुनिश्चित करना चाहते हैं कि आपके जैसे योगदानकर्ताओं के लिए योगदान देने का अनुभव यथासंभव सरल और सुगम हो।\ +इसी उद्देश्य से हमने यह योगदान गाइड तैयार किया है, ताकि आप कोडबेस से परिचित हो सकें और जान सकें कि हम योगदानकर्ताओं के साथ कैसे काम करते हैं — ताकि आप जल्दी से मज़ेदार हिस्से पर पहुँच सकें। + +यह गाइड, Dify की तरह ही, एक निरंतर विकसित होता दस्तावेज़ है। यदि यह कभी-कभी वास्तविक प्रोजेक्ट से पीछे रह जाए तो हम आपके समझ के लिए आभारी हैं, और सुधार के लिए किसी भी सुझाव का स्वागत करते हैं। + +लाइसेंसिंग के संदर्भ में, कृपया एक मिनट निकालकर हमारा छोटा [License and Contributor Agreement](../../LICENSE) पढ़ें।\ +समुदाय [code of conduct](https://github.com/langgenius/.github/blob/main/CODE_OF_CONDUCT.md) का भी पालन करता है। + +## शुरू करने से पहले + +कुछ योगदान करने की तलाश में हैं? हमारे [good first issues](https://github.com/langgenius/dify/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22good%20first%20issue%22) ब्राउज़ करें और किसी एक को चुनकर शुरुआत करें! + +कोई नया मॉडल रनटाइम या टूल जोड़ना चाहते हैं? हमारे [plugin repo](https://github.com/langgenius/dify-plugins) में एक PR खोलें और हमें दिखाएँ कि आपने क्या बनाया है। + +किसी मौजूदा मॉडल रनटाइम या टूल को अपडेट करना है, या कुछ बग्स को ठीक करना है? हमारे [official plugin repo](https://github.com/langgenius/dify-official-plugins) पर जाएँ और अपना जादू दिखाएँ! + +मज़े में शामिल हों, योगदान दें, और चलिए मिलकर कुछ शानदार बनाते हैं! 💡✨ + +PR के विवरण में मौजूदा issue को लिंक करना या नया issue खोलना न भूलें। + +### बग रिपोर्ट (Bug reports) + +> [!IMPORTANT]\ +> कृपया बग रिपोर्ट सबमिट करते समय निम्नलिखित जानकारी अवश्य शामिल करें: + +- एक स्पष्ट और वर्णनात्मक शीर्षक +- बग का विस्तृत विवरण, जिसमें कोई भी त्रुटि संदेश (error messages) शामिल हो +- बग को पुन: उत्पन्न करने के चरण +- अपेक्षित व्यवहार +- **लॉग्स**, यदि उपलब्ध हों — बैकएंड समस्याओं के लिए यह बहुत महत्वपूर्ण है, आप इन्हें docker-compose logs में पा सकते हैं +- स्क्रीनशॉट या वीडियो (यदि लागू हो) + +हम प्राथमिकता कैसे तय करते हैं: + +| समस्या प्रकार (Issue Type) | प्राथमिकता (Priority) | +| ------------------------------------------------------------ | --------------- | +| मुख्य कार्यों में बग (क्लाउड सेवा, लॉगिन न होना, एप्लिकेशन न चलना, सुरक्षा खामियाँ) | गंभीर (Critical) | +| गैर-गंभीर बग, प्रदर्शन सुधार | मध्यम प्राथमिकता (Medium Priority) | +| छोटे सुधार (टाइपो, भ्रमित करने वाला लेकिन काम करने वाला UI) | निम्न प्राथमिकता (Low Priority) | + +### फ़ीचर अनुरोध (Feature requests) + +> [!NOTE]\ +> कृपया फ़ीचर अनुरोध सबमिट करते समय निम्नलिखित जानकारी अवश्य शामिल करें: + +- एक स्पष्ट और वर्णनात्मक शीर्षक +- फ़ीचर का विस्तृत विवरण +- फ़ीचर के उपयोग का मामला (use case) +- फ़ीचर अनुरोध से संबंधित कोई अन्य संदर्भ या स्क्रीनशॉट + +हम प्राथमिकता कैसे तय करते हैं: + +| फ़ीचर प्रकार (Feature Type) | प्राथमिकता (Priority) | +| ------------------------------------------------------------ | --------------- | +| किसी टीम सदस्य द्वारा उच्च प्राथमिकता (High-Priority) के रूप में चिह्नित फ़ीचर | उच्च प्राथमिकता (High Priority) | +| हमारे [community feedback board](https://github.com/langgenius/dify/discussions/categories/feedbacks) से लोकप्रिय फ़ीचर अनुरोध | मध्यम प्राथमिकता (Medium Priority) | +| गैर-मुख्य फ़ीचर्स और छोटे सुधार | निम्न प्राथमिकता (Low Priority) | +| मूल्यवान लेकिन तात्कालिक नहीं | भविष्य का फ़ीचर (Future-Feature) | + +## अपना PR सबमिट करना (Submitting your PR) + +### पुल रिक्वेस्ट प्रक्रिया (Pull Request Process) + +1. रिपॉज़िटरी को Fork करें +1. PR ड्राफ्ट करने से पहले, कृपया अपने बदलावों पर चर्चा करने के लिए एक issue बनाएँ +1. अपने परिवर्तनों के लिए एक नई शाखा (branch) बनाएँ +1. अपने बदलावों के लिए उपयुक्त टेस्ट जोड़ें +1. सुनिश्चित करें कि आपका कोड मौजूदा टेस्ट पास करता है +1. PR विवरण में issue लिंक करें, जैसे: `fixes #` +1. मर्ज हो जाएँ! 🎉 + +### प्रोजेक्ट सेटअप करें (Setup the project) + +#### फ्रंटएंड (Frontend) + +फ्रंटएंड सेवा सेटअप करने के लिए, कृपया हमारी विस्तृत [guide](https://github.com/langgenius/dify/blob/main/web/README.md) देखें जो `web/README.md` फ़ाइल में उपलब्ध है।\ +यह दस्तावेज़ आपको फ्रंटएंड वातावरण को सही ढंग से सेटअप करने के लिए विस्तृत निर्देश प्रदान करता है। + +#### बैकएंड (Backend) + +बैकएंड सेवा सेटअप करने के लिए, कृपया हमारी विस्तृत [instructions](https://github.com/langgenius/dify/blob/main/api/README.md) देखें जो `api/README.md` फ़ाइल में दी गई हैं।\ +यह दस्तावेज़ चरण-दर-चरण मार्गदर्शन प्रदान करता है जिससे आप बैकएंड को सुचारू रूप से चला सकें। + +#### अन्य महत्वपूर्ण बातें (Other things to note) + +सेटअप शुरू करने से पहले इस दस्तावेज़ की सावधानीपूर्वक समीक्षा करने की अनुशंसा की जाती है, क्योंकि इसमें निम्नलिखित महत्वपूर्ण जानकारी शामिल है: + +- आवश्यक पूर्व-आवश्यकताएँ और निर्भरताएँ +- इंस्टॉलेशन चरण +- कॉन्फ़िगरेशन विवरण +- सामान्य समस्या निवारण सुझाव + +यदि सेटअप प्रक्रिया के दौरान आपको कोई समस्या आती है, तो बेझिझक हमसे संपर्क करें। + +## सहायता प्राप्त करना (Getting Help) + +यदि योगदान करते समय आप कहीं अटक जाएँ या कोई महत्वपूर्ण प्रश्न हो, तो संबंधित GitHub issue के माध्यम से हमें अपने प्रश्न भेजें, या त्वरित बातचीत के लिए हमारे [Discord](https://discord.gg/8Tpq4AcN9c) पर जुड़ें। diff --git a/docs/hi-IN/README.md b/docs/hi-IN/README.md new file mode 100644 index 0000000000..7c4fc70db0 --- /dev/null +++ b/docs/hi-IN/README.md @@ -0,0 +1,224 @@ +![cover-v5-optimized](../../images/GitHub_README_if.png) + +

+ 📌 Dify वर्कफ़्लो फ़ाइल अपलोड पेश है: Google NotebookLM पॉडकास्ट को पुनः बनाएँ +

+ +

+ Dify Cloud · + स्व-होस्टिंग · + दस्तावेज़ीकरण · + Dify संस्करण का अवलोकन +

+ +

+ + Static Badge + + Static Badge + + chat on Discord + + join Reddit + + follow on X(Twitter) + + follow on LinkedIn + + Docker Pulls + + Commits last month + + Issues closed + + Discussion posts +

+ +

+ README in English + 繁體中文文件 + 简体中文文件 + 日本語のREADME + README en Español + README en Français + README tlhIngan Hol + README in Korean + README بالعربية + Türkçe README + README Tiếng Việt + README in Deutsch + README in Italiano + README in বাংলা + README in हिन्दी +

+ +Dify एक मुक्त-स्रोत प्लेटफ़ॉर्म है जो LLM अनुप्रयोगों (एप्लिकेशनों) के विकास के लिए बनाया गया है। इसका सहज इंटरफ़ेस एजेंटिक एआई वर्कफ़्लो, RAG पाइपलाइनों, एजेंट क्षमताओं, मॉडल प्रबंधन, ऑब्ज़र्वेबिलिटी (निगरानी) सुविधाओं और अन्य को एक साथ जोड़ता है — जिससे आप प्रोटोटाइप से उत्पादन (प्रोडक्शन) तक जल्दी पहुँच सकते हैं। + +## त्वरित प्रारंभ + +> Dify स्थापित करने से पहले, सुनिश्चित करें कि आपकी मशीन निम्नलिखित न्यूनतम सिस्टम आवश्यकताओं को पूरा करती है: +> +> - CPU >= 2 Core +> - RAM >= 4 GiB + +
+ +Dify सर्वर शुरू करने का सबसे आसान तरीका [Docker Compose](../..docker/docker-compose.yaml) के माध्यम से है। नीचे दिए गए कमांड्स से Dify चलाने से पहले, सुनिश्चित करें कि आपकी मशीन पर [Docker] (https://docs.docker.com/get-docker/) और [Docker Compose] (https://docs.docker.com/compose/install/) इंस्टॉल हैं।: + +```bash +cd dify +cd docker +cp .env.example .env +docker compose up -d +``` + +रन करने के बाद, आप अपने ब्राउज़र में [http://localhost/install](http://localhost/install) पर Dify डैशबोर्ड एक्सेस कर सकते हैं और प्रारंभिक सेटअप प्रक्रिया शुरू कर सकते हैं। + +#### सहायता प्राप्त करना + +यदि आपको Dify सेटअप करते समय कोई समस्या आती है, तो कृपया हमारे [FAQ](https://docs.dify.ai/getting-started/install-self-hosted/faqs) को देखें। यदि फिर भी समस्या बनी रहती है, तो [the community and us](#community--contact) से संपर्क करें। + +> यदि आप Dify में योगदान देना चाहते हैं या अतिरिक्त विकास करना चाहते हैं, तो हमारे [guide to deploying from source code](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code) को देखें। + +## मुख्य विशेषताएँ + +**1. वर्कफ़्लो**:\ +एक दृश्य कैनवास पर शक्तिशाली एआई वर्कफ़्लो बनाएं और परीक्षण करें, नीचे दी गई सभी सुविधाओं और उससे भी आगे का उपयोग करते हुए। + +**2. व्यापक मॉडल समर्थन**:\ +कई इन्फ़रेंस प्रदाताओं और स्व-होस्टेड समाधानों से सैकड़ों स्वामित्व / मुक्त-स्रोत LLMs के साथ सहज एकीकरण, जिसमें GPT, Mistral, Llama3, और कोई भी OpenAI API-संगत मॉडल शामिल हैं। समर्थित मॉडल प्रदाताओं की पूरी सूची [here](https://docs.dify.ai/getting-started/readme/model-providers) पर पाई जा सकती है। + +![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) + +**3. प्रॉम्प्ट IDE**:\ +प्रॉम्प्ट बनाने, मॉडल प्रदर्शन की तुलना करने, और चैट-आधारित ऐप में टेक्स्ट-टू-स्पीच जैसी अतिरिक्त सुविधाएँ जोड़ने के लिए सहज इंटरफ़ेस। + +**4. RAG पाइपलाइन**:\ +विस्तृत RAG क्षमताएँ जो दस्तावेज़ इनजेशन से लेकर रिट्रीवल तक सब कुछ कवर करती हैं, और PDFs, PPTs, तथा अन्य सामान्य दस्तावेज़ प्रारूपों से टेक्स्ट निकालने के लिए आउट-ऑफ़-द-बॉक्स समर्थन प्रदान करती हैं। + +**5. एजेंट क्षमताएँ**:\ +आप LLM फ़ंक्शन कॉलिंग या ReAct के आधार पर एजेंट परिभाषित कर सकते हैं, और एजेंट के लिए पूर्व-निर्मित या कस्टम टूल जोड़ सकते हैं। Dify एआई एजेंटों के लिए 50+ अंतर्निर्मित टूल प्रदान करता है, जैसे Google Search, DALL·E, Stable Diffusion और WolframAlpha। + +**6. LLMOps**:\ +समय के साथ एप्लिकेशन लॉग्स और प्रदर्शन की निगरानी और विश्लेषण करें। आप उत्पादन डेटा और एनोटेशनों के आधार पर प्रॉम्प्ट्स, डेटासेट्स और मॉडल्स को निरंतर सुधार सकते हैं। + +**7. Backend-as-a-Service**:\ +Dify की सभी सेवाएँ संबंधित APIs के साथ आती हैं, जिससे आप Dify को आसानी से अपने व्यावसायिक लॉजिक में एकीकृत कर सकते हैं। + +## Dify का उपयोग करना + +- **Cloud
**\ + हम [Dify Cloud](https://dify.ai) सेवा प्रदान करते हैं, जिसे कोई भी बिना किसी सेटअप के आज़मा सकता है। यह स्व-परिनियोजित संस्करण की सभी क्षमताएँ प्रदान करता है और सैंडबॉक्स प्लान में 200 निःशुल्क GPT-4 कॉल्स शामिल करता है। + +- **Dify कम्युनिटी संस्करण की स्व-होस्टिंग
**\ + अपने वातावरण में Dify को जल्दी चलाएँ इस [starter guide](#quick-start) की मदद से।\ + आगे के संदर्भों और विस्तृत निर्देशों के लिए हमारी [documentation](https://docs.dify.ai) देखें। + +- **उद्यमों / संगठनों के लिए Dify
**\ + हम अतिरिक्त एंटरप्राइज़-केंद्रित सुविधाएँ प्रदान करते हैं।\ + [इस चैटबॉट के माध्यम से हमें अपने प्रश्न भेजें](https://udify.app/chat/22L1zSxg6yW1cWQg) या [हमें ईमेल भेजें](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) ताकि हम एंटरप्राइज़ आवश्यकताओं पर चर्चा कर सकें।
+ + > AWS का उपयोग करने वाले स्टार्टअप्स और छोटे व्यवसायों के लिए, [AWS Marketplace पर Dify Premium](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) देखें और इसे एक क्लिक में अपने AWS VPC पर डिप्लॉय करें। यह एक किफायती AMI ऑफ़रिंग है, जो आपको कस्टम लोगो और ब्रांडिंग के साथ ऐप्स बनाने की अनुमति देती है। + +## आगे बने रहें + +GitHub पर Dify को स्टार करें और नए रिलीज़ की सूचना तुरंत प्राप्त करें। + +![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) + +## उन्नत सेटअप + +### कस्टम कॉन्फ़िगरेशन + +यदि आपको कॉन्फ़िगरेशन को कस्टमाइज़ करने की आवश्यकता है, तो कृपया हमारी [.env.example](../../docker/.env.example) फ़ाइल में दिए गए टिप्पणियों (comments) को देखें और अपने `.env` फ़ाइल में संबंधित मानों को अपडेट करें।\ +इसके अतिरिक्त, आपको अपने विशेष डिप्लॉयमेंट वातावरण और आवश्यकताओं के आधार पर `docker-compose.yaml` फ़ाइल में भी बदलाव करने की आवश्यकता हो सकती है, जैसे इमेज संस्करण, पोर्ट मैपिंग या वॉल्यूम माउंट्स बदलना।\ +कोई भी बदलाव करने के बाद, कृपया `docker-compose up -d` कमांड को पुनः चलाएँ।\ +उपलब्ध सभी environment variables की पूरी सूची [here](https://docs.dify.ai/getting-started/install-self-hosted/environments) पर पाई जा सकती है। + +### Grafana के साथ मेट्रिक्स मॉनिटरिंग + +Grafana में Dify के PostgreSQL डेटाबेस को डेटा स्रोत के रूप में उपयोग करते हुए डैशबोर्ड आयात करें, ताकि आप ऐप्स, टेनेंट्स, संदेशों आदि के स्तर पर मेट्रिक्स की निगरानी कर सकें। + +- [Grafana Dashboard by @bowenliang123](https://github.com/bowenliang123/dify-grafana-dashboard) + +### Kubernetes के साथ डिप्लॉयमेंट + +यदि आप उच्च उपलब्धता (high-availability) सेटअप कॉन्फ़िगर करना चाहते हैं, तो समुदाय द्वारा योगदान किए गए [Helm Charts](https://helm.sh/) और YAML फ़ाइलें उपलब्ध हैं जो Dify को Kubernetes पर डिप्लॉय करने की अनुमति देती हैं। + +- [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify) +- [Helm Chart by @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm) +- [Helm Chart by @magicsong](https://github.com/magicsong/ai-charts) +- [YAML file by @Winson-030](https://github.com/Winson-030/dify-kubernetes) +- [YAML file by @wyy-holding](https://github.com/wyy-holding/dify-k8s) +- [🚀 NEW! YAML files (Supports Dify v1.6.0) by @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes) + +#### डिप्लॉयमेंट के लिए Terraform का उपयोग + +[terraform](https://www.terraform.io/) का उपयोग करके एक क्लिक में Dify को क्लाउड प्लेटफ़ॉर्म पर डिप्लॉय करें। + +##### Azure Global + +- [Azure Terraform by @nikawang](https://github.com/nikawang/dify-azure-terraform) + +##### Google Cloud + +- [Google Cloud Terraform by @sotazum](https://github.com/DeNA/dify-google-cloud-terraform) + +#### डिप्लॉयमेंट के लिए AWS CDK का उपयोग + +[CDK](https://aws.amazon.com/cdk/) का उपयोग करके Dify को AWS पर डिप्लॉय करें। + +##### AWS + +- [AWS CDK by @KevinZhao (EKS आधारित)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) +- [AWS CDK by @tmokmss (ECS आधारित)](https://github.com/aws-samples/dify-self-hosted-on-aws) + +#### Alibaba Cloud Computing Nest का उपयोग + +[Alibaba Cloud Computing Nest](https://computenest.console.aliyun.com/service/instance/create/default?type=user&ServiceName=Dify%E7%A4%BE%E5%8C%BA%E7%89%88) के साथ Dify को Alibaba Cloud पर तेज़ी से डिप्लॉय करें। + +#### Alibaba Cloud Data Management का उपयोग + +[Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) के साथ एक क्लिक में Dify को Alibaba Cloud पर डिप्लॉय करें। + +#### Azure Devops Pipeline के साथ AKS पर डिप्लॉय करें + +[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) के साथ एक क्लिक में Dify को AKS पर डिप्लॉय करें। + +## योगदान (Contributing) + +जो लोग कोड में योगदान देना चाहते हैं, वे हमारे [Contribution Guide](./CONTRIBUTING.md) को देखें।\ +साथ ही, कृपया Dify को सोशल मीडिया, कार्यक्रमों और सम्मेलनों में साझा करके इसका समर्थन करने पर विचार करें। + +> हम ऐसे योगदानकर्ताओं की तलाश कर रहे हैं जो Dify को मंदारिन या अंग्रेज़ी के अलावा अन्य भाषाओं में अनुवाद करने में मदद कर सकें।\ +> यदि आप सहायता करने में रुचि रखते हैं, तो अधिक जानकारी के लिए [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) देखें, और हमारे [Discord Community Server](https://discord.gg/8Tpq4AcN9c) के `global-users` चैनल में हमें संदेश दें। + +## समुदाय और संपर्क (Community & contact) + +- [GitHub Discussion](https://github.com/langgenius/dify/discussions) — सर्वोत्तम उपयोग के लिए: प्रतिक्रिया साझा करने और प्रश्न पूछने हेतु। +- [GitHub Issues](https://github.com/langgenius/dify/issues) — सर्वोत्तम उपयोग के लिए: Dify.AI का उपयोग करते समय आने वाली बग्स या फीचर सुझावों के लिए। देखें: [Contribution Guide](../../CONTRIBUTING.md)। +- [Discord](https://discord.gg/FngNHpbcY7) — सर्वोत्तम उपयोग के लिए: अपने एप्लिकेशन साझा करने और समुदाय के साथ जुड़ने के लिए। +- [X(Twitter)](https://twitter.com/dify_ai) — सर्वोत्तम उपयोग के लिए: अपने एप्लिकेशन साझा करने और समुदाय से जुड़े रहने के लिए। + +**योगदानकर्ता** + + + + + +## स्टार इतिहास (Star history) + +[![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date) + +## सुरक्षा प्रकटीकरण (Security disclosure) + +आपकी गोपनीयता की सुरक्षा के लिए, कृपया GitHub पर सुरक्षा संबंधित समस्याएँ पोस्ट करने से बचें।\ +इसके बजाय, समस्याओं की रिपोर्ट security@dify.ai पर करें, और हमारी टीम आपको विस्तृत उत्तर के साथ प्रतिक्रिया देगी। + +## लाइसेंस (License) + +यह रिपॉज़िटरी [Dify Open Source License](../../LICENSE) के अंतर्गत लाइसेंस प्राप्त है, जो Apache 2.0 पर आधारित है और इसमें अतिरिक्त शर्तें शामिल हैं। diff --git a/docs/it-IT/README.md b/docs/it-IT/README.md new file mode 100644 index 0000000000..598e87ec25 --- /dev/null +++ b/docs/it-IT/README.md @@ -0,0 +1,213 @@ +![cover-v5-optimized](../../images/GitHub_README_if.png) + +

+ 📌 Introduzione a Dify Workflow File Upload: ricreando il podcast di Google NotebookLM +

+ +

+ Dify Cloud · + Self-Hosted · + Documentazione · + Panoramica dei prodotti Dify +

+ +

+ + Static Badge + + Static Badge + + chat on Discord + + join Reddit + + follow on X(Twitter) + + follow on LinkedIn + + Docker Pulls + + Commits last month + + Issues closed + + Discussion posts +

+ +

+ README in English + 繁體中文文件 + 简体中文文件 + 日本語のREADME + README en Español + README en Français + README tlhIngan Hol + README in Korean + README بالعربية + Türkçe README + README Tiếng Việt + README in Deutsch + README in Italiano + README in বাংলা +

+ +Dify è una piattaforma open-source per lo sviluppo di applicazioni LLM. La sua interfaccia intuitiva combina flussi di lavoro AI basati su agenti, pipeline RAG, funzionalità di agenti, gestione dei modelli, funzionalità di monitoraggio e altro ancora, permettendovi di passare rapidamente da un prototipo alla produzione. + +## Avvio Rapido + +> Prima di installare Dify, assicuratevi che il vostro sistema soddisfi i seguenti requisiti minimi: +> +> - CPU >= 2 Core +> - RAM >= 4 GiB + +
+ +Il modo più semplice per avviare il server Dify è tramite [docker compose](../../docker/docker-compose.yaml). Prima di eseguire Dify con i seguenti comandi, assicuratevi che [Docker](https://docs.docker.com/get-docker/) e [Docker Compose](https://docs.docker.com/compose/install/) siano installati sul vostro sistema: + +```bash +cd dify +cd docker +cp .env.example .env +docker compose up -d +``` + +Dopo aver avviato il server, potete accedere al dashboard di Dify tramite il vostro browser all'indirizzo [http://localhost/install](http://localhost/install) e avviare il processo di inizializzazione. + +#### Richiedere Aiuto + +Consultate le nostre [FAQ](https://docs.dify.ai/getting-started/install-self-hosted/faqs) se riscontrate problemi durante la configurazione di Dify. Contattateci [tramite la community](#community--contatti) se continuano a verificarsi difficoltà. + +> Se desiderate contribuire a Dify o effettuare ulteriori sviluppi, consultate la nostra [guida al deployment dal codice sorgente](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code). + +## Caratteristiche Principali + +**1. Workflow**: +Create e testate potenti flussi di lavoro AI su un'interfaccia visuale, utilizzando tutte le funzionalità seguenti e oltre. + +**2. Supporto Completo dei Modelli**: +Integrazione perfetta con centinaia di LLM proprietari e open-source di decine di provider di inferenza e soluzioni self-hosted, che coprono GPT, Mistral, Llama3 e tutti i modelli compatibili con l'API OpenAI. L'elenco completo dei provider di modelli supportati è disponibile [qui](https://docs.dify.ai/getting-started/readme/model-providers). + +![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) + +**3. Prompt IDE**: +Interfaccia intuitiva per creare prompt, confrontare le prestazioni dei modelli e aggiungere funzionalità aggiuntive come text-to-speech in un'applicazione basata su chat. + +**4. Pipeline RAG**: +Funzionalità RAG complete che coprono tutto, dall'acquisizione dei documenti alla loro interrogazione, con supporto pronto all'uso per l'estrazione di testo da PDF, PPT e altri formati di documenti comuni. + +**5. Capacità degli Agenti**: +Potete definire agenti basati su LLM Function Calling o ReAct e aggiungere strumenti predefiniti o personalizzati per l'agente. Dify fornisce oltre 50 strumenti integrati per gli agenti AI, come Google Search, DALL·E, Stable Diffusion e WolframAlpha. + +**6. LLMOps**: +Monitorate e analizzate i log delle applicazioni e le prestazioni nel tempo. Potete migliorare continuamente prompt, dataset e modelli basandovi sui dati di produzione e sulle annotazioni. + +**7. Backend-as-a-Service**: +Tutte le offerte di Dify sono dotate di API corrispondenti, permettendovi di integrare facilmente Dify nella vostra logica di business. + +## Utilizzo di Dify + +- **Cloud
** + Ospitiamo un servizio [Dify Cloud](https://dify.ai) che chiunque può provare senza configurazione. Offre tutte le funzionalità della versione self-hosted e include 200 chiamate GPT-4 gratuite nel piano sandbox. + +- **Dify Community Edition Self-Hosted
** + Avviate rapidamente Dify nel vostro ambiente con questa [guida di avvio rapido](#avvio-rapido). Utilizzate la nostra [documentazione](https://docs.dify.ai) per ulteriori informazioni e istruzioni dettagliate. + +- **Dify per Aziende / Organizzazioni
** + Offriamo funzionalità aggiuntive specifiche per le aziende. Potete [scriverci via email](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) per discutere le vostre esigenze aziendali.
+ + > Per startup e piccole imprese che utilizzano AWS, date un'occhiata a [Dify Premium su AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) e distribuitelo con un solo clic nel vostro AWS VPC. Si tratta di un'offerta AMI conveniente con l'opzione di creare app con logo e branding personalizzati. + +## Resta Sempre Aggiornato + +Mettete una stella a Dify su GitHub e ricevete notifiche immediate sui nuovi rilasci. + +![star-us](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) + +## Configurazioni Avanzate + +Se dovete personalizzare la configurazione, leggete i commenti nel nostro file [.env.example](../../docker/.env.example) e aggiornate i valori corrispondenti nel vostro file `.env`. Inoltre, potrebbe essere necessario apportare modifiche al file `docker-compose.yaml`, come cambiare le versioni delle immagini, le mappature delle porte o i mount dei volumi, a seconda del vostro ambiente di distribuzione specifico e dei vostri requisiti. Dopo aver apportato le modifiche, riavviate `docker-compose up -d`. L'elenco completo delle variabili d'ambiente disponibili è disponibile [qui](https://docs.dify.ai/getting-started/install-self-hosted/environments). + +### Monitoraggio delle Metriche con Grafana + +Importate la dashboard in Grafana, utilizzando il database PostgreSQL di Dify come origine dati, per monitorare le metriche a livello di app, tenant, messaggi e altro ancora. + +- [Dashboard Grafana di @bowenliang123](https://github.com/bowenliang123/dify-grafana-dashboard) + +### Distribuzione con Kubernetes + +Se desiderate configurare un'installazione ad alta disponibilità, ci sono [Helm Charts](https://helm.sh/) e file YAML forniti dalla community che consentono di distribuire Dify su Kubernetes. + +- [Helm Chart di @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify) +- [Helm Chart di @BorisPolonsky](https://github.com/BorisPolonsky/dify-helm) +- [Helm Chart di @magicsong](https://github.com/magicsong/ai-charts) +- [File YAML di @Winson-030](https://github.com/Winson-030/dify-kubernetes) +- [File YAML di @wyy-holding](https://github.com/wyy-holding/dify-k8s) +- [🚀 NUOVO! File YAML (Supporta Dify v1.6.0) di @Zhoneym](https://github.com/Zhoneym/DifyAI-Kubernetes) + +#### Utilizzo di Terraform per la Distribuzione + +Distribuite Dify con un solo clic su una piattaforma cloud utilizzando [terraform](https://www.terraform.io/). + +##### Azure Global + +- [Azure Terraform di @nikawang](https://github.com/nikawang/dify-azure-terraform) + +##### Google Cloud + +- [Google Cloud Terraform di @sotazum](https://github.com/DeNA/dify-google-cloud-terraform) + +#### Utilizzo di AWS CDK per la Distribuzione + +Distribuzione di Dify su AWS con [CDK](https://aws.amazon.com/cdk/) + +##### AWS + +- [AWS CDK di @KevinZhao (basato su EKS)](https://github.com/aws-samples/solution-for-deploying-dify-on-aws) +- [AWS CDK di @tmokmss (basato su ECS)](https://github.com/aws-samples/dify-self-hosted-on-aws) + +#### Alibaba Cloud + +[Alibaba Cloud Computing Nest](https://computenest.console.aliyun.com/service/instance/create/default?type=user&ServiceName=Dify%E7%A4%BE%E5%8C%BA%E7%89%88) + +#### Alibaba Cloud Data Management + +Distribuzione con un clic di Dify su Alibaba Cloud con [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) + +#### Utilizzo di Azure DevOps Pipeline per la Distribuzione su AKS + +Distribuite Dify con un clic in AKS utilizzando [Azure DevOps Pipeline Helm Chart di @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) + +## Contribuire + +Se desiderate contribuire con codice, leggete la nostra [Guida ai Contributi](../../CONTRIBUTING.md). Allo stesso tempo, vi chiediamo di supportare Dify condividendolo sui social media e presentandolo a eventi e conferenze. + +> Cerchiamo collaboratori che aiutino a tradurre Dify in altre lingue oltre al mandarino o all'inglese. Se siete interessati a collaborare, leggete il [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) per ulteriori informazioni e lasciate un commento nel canale `global-users` del nostro [server della community Discord](https://discord.gg/8Tpq4AcN9c). + +## Community & Contatti + +- [GitHub Discussion](https://github.com/langgenius/dify/discussions). Ideale per: condividere feedback e porre domande. +- [GitHub Issues](https://github.com/langgenius/dify/issues). Ideale per: bug che riscontrate durante l'utilizzo di Dify.AI e proposte di funzionalità. Consultate la nostra [Guida ai Contributi](../../CONTRIBUTING.md). +- [Discord](https://discord.gg/FngNHpbcY7). Ideale per: condividere le vostre applicazioni e interagire con la community. +- [X(Twitter)](https://twitter.com/dify_ai). Ideale per: condividere le vostre applicazioni e interagire con la community. + +**Collaboratori** + + + + + +## Storia delle Stelle + +[![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date) + +## Divulgazione sulla Sicurezza + +Per proteggere la vostra privacy, evitate di pubblicare problemi di sicurezza su GitHub. Inviate invece le vostre domande a security@dify.ai e vi forniremo una risposta più dettagliata. + +## Licenza + +Questo repository è disponibile sotto la [Dify Open Source License](../../LICENSE), che è essenzialmente Apache 2.0 con alcune restrizioni aggiuntive. diff --git a/docs/pt-BR/README.md b/docs/pt-BR/README.md index f96b18eabb..444faa0a67 100644 --- a/docs/pt-BR/README.md +++ b/docs/pt-BR/README.md @@ -91,7 +91,7 @@ Todas os recursos do Dify vêm com APIs correspondentes, permitindo que você in Use nossa [documentação](https://docs.dify.ai) para referências adicionais e instruções mais detalhadas. - **Dify para empresas/organizações
** - Oferecemos recursos adicionais voltados para empresas. [Envie suas perguntas através deste chatbot](https://udify.app/chat/22L1zSxg6yW1cWQg) ou [envie-nos um e-mail](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) para discutir necessidades empresariais.
+ Oferecemos recursos adicionais voltados para empresas. Você pode [falar conosco por e-mail](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) para discutir necessidades empresariais.
> Para startups e pequenas empresas que utilizam AWS, confira o [Dify Premium no AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) e implemente no seu próprio AWS VPC com um clique. É uma oferta AMI acessível com a opção de criar aplicativos com logotipo e marca personalizados. diff --git a/docs/vi-VN/README.md b/docs/vi-VN/README.md index 51f7c5d994..07329e84cd 100644 --- a/docs/vi-VN/README.md +++ b/docs/vi-VN/README.md @@ -86,7 +86,7 @@ Tất cả các dịch vụ của Dify đều đi kèm với các API tương Sử dụng [tài liệu](https://docs.dify.ai) của chúng tôi để tham khảo thêm và nhận hướng dẫn chi tiết hơn. - **Dify cho doanh nghiệp / tổ chức
** - Chúng tôi cung cấp các tính năng bổ sung tập trung vào doanh nghiệp. [Ghi lại câu hỏi của bạn cho chúng tôi thông qua chatbot này](https://udify.app/chat/22L1zSxg6yW1cWQg) hoặc [gửi email cho chúng tôi](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) để thảo luận về nhu cầu doanh nghiệp.
+ Chúng tôi cung cấp các tính năng bổ sung tập trung vào doanh nghiệp. [Gửi email cho chúng tôi](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) để thảo luận về nhu cầu doanh nghiệp.
> Đối với các công ty khởi nghiệp và doanh nghiệp nhỏ sử dụng AWS, hãy xem [Dify Premium trên AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) và triển khai nó vào AWS VPC của riêng bạn chỉ với một cú nhấp chuột. Đây là một AMI giá cả phải chăng với tùy chọn tạo ứng dụng với logo và thương hiệu tùy chỉnh. diff --git a/sdks/python-client/dify_client/async_client.py b/sdks/python-client/dify_client/async_client.py index 984f668d0c..23126cf326 100644 --- a/sdks/python-client/dify_client/async_client.py +++ b/sdks/python-client/dify_client/async_client.py @@ -21,7 +21,7 @@ Example: import json import os -from typing import Literal, Dict, List, Any, IO +from typing import Literal, Dict, List, Any, IO, Optional, Union import aiofiles import httpx @@ -75,8 +75,8 @@ class AsyncDifyClient: self, method: str, endpoint: str, - json: dict | None = None, - params: dict | None = None, + json: Dict | None = None, + params: Dict | None = None, stream: bool = False, **kwargs, ): @@ -170,6 +170,72 @@ class AsyncDifyClient: """Get file preview by file ID.""" return await self._send_request("GET", f"/files/{file_id}/preview") + # App Configuration APIs + async def get_app_site_config(self, app_id: str): + """Get app site configuration. + + Args: + app_id: ID of the app + + Returns: + App site configuration + """ + url = f"/apps/{app_id}/site/config" + return await self._send_request("GET", url) + + async def update_app_site_config(self, app_id: str, config_data: Dict[str, Any]): + """Update app site configuration. + + Args: + app_id: ID of the app + config_data: Configuration data to update + + Returns: + Updated app site configuration + """ + url = f"/apps/{app_id}/site/config" + return await self._send_request("PUT", url, json=config_data) + + async def get_app_api_tokens(self, app_id: str): + """Get API tokens for an app. + + Args: + app_id: ID of the app + + Returns: + List of API tokens + """ + url = f"/apps/{app_id}/api-tokens" + return await self._send_request("GET", url) + + async def create_app_api_token(self, app_id: str, name: str, description: str | None = None): + """Create a new API token for an app. + + Args: + app_id: ID of the app + name: Name for the API token + description: Description for the API token (optional) + + Returns: + Created API token information + """ + data = {"name": name, "description": description} + url = f"/apps/{app_id}/api-tokens" + return await self._send_request("POST", url, json=data) + + async def delete_app_api_token(self, app_id: str, token_id: str): + """Delete an API token. + + Args: + app_id: ID of the app + token_id: ID of the token to delete + + Returns: + Deletion result + """ + url = f"/apps/{app_id}/api-tokens/{token_id}" + return await self._send_request("DELETE", url) + class AsyncCompletionClient(AsyncDifyClient): """Async client for Completion API operations.""" @@ -179,7 +245,7 @@ class AsyncCompletionClient(AsyncDifyClient): inputs: dict, response_mode: Literal["blocking", "streaming"], user: str, - files: dict | None = None, + files: Dict | None = None, ): """Create a completion message. @@ -216,7 +282,7 @@ class AsyncChatClient(AsyncDifyClient): user: str, response_mode: Literal["blocking", "streaming"] = "blocking", conversation_id: str | None = None, - files: dict | None = None, + files: Dict | None = None, ): """Create a chat message. @@ -295,7 +361,7 @@ class AsyncChatClient(AsyncDifyClient): data = {"user": user} return await self._send_request("DELETE", f"/conversations/{conversation_id}", data) - async def audio_to_text(self, audio_file: IO[bytes] | tuple, user: str): + async def audio_to_text(self, audio_file: Union[IO[bytes], tuple], user: str): """Convert audio to text.""" data = {"user": user} files = {"file": audio_file} @@ -340,6 +406,35 @@ class AsyncChatClient(AsyncDifyClient): """Delete an annotation.""" return await self._send_request("DELETE", f"/apps/annotations/{annotation_id}") + # Enhanced Annotation APIs + async def get_annotation_reply_job_status(self, action: str, job_id: str): + """Get status of an annotation reply action job.""" + url = f"/apps/annotation-reply/{action}/status/{job_id}" + return await self._send_request("GET", url) + + async def list_annotations_with_pagination(self, page: int = 1, limit: int = 20, keyword: str | None = None): + """List annotations for application with pagination.""" + params = {"page": page, "limit": limit} + if keyword: + params["keyword"] = keyword + return await self._send_request("GET", "/apps/annotations", params=params) + + async def create_annotation_with_response(self, question: str, answer: str): + """Create a new annotation with full response handling.""" + data = {"question": question, "answer": answer} + return await self._send_request("POST", "/apps/annotations", json=data) + + async def update_annotation_with_response(self, annotation_id: str, question: str, answer: str): + """Update an existing annotation with full response handling.""" + data = {"question": question, "answer": answer} + url = f"/apps/annotations/{annotation_id}" + return await self._send_request("PUT", url, json=data) + + async def delete_annotation_with_response(self, annotation_id: str): + """Delete an annotation with full response handling.""" + url = f"/apps/annotations/{annotation_id}" + return await self._send_request("DELETE", url) + # Conversation Variables APIs async def get_conversation_variables(self, conversation_id: str, user: str): """Get all variables for a specific conversation. @@ -373,6 +468,52 @@ class AsyncChatClient(AsyncDifyClient): url = f"/conversations/{conversation_id}/variables/{variable_id}" return await self._send_request("PATCH", url, json=data) + # Enhanced Conversation Variable APIs + async def list_conversation_variables_with_pagination( + self, conversation_id: str, user: str, page: int = 1, limit: int = 20 + ): + """List conversation variables with pagination.""" + params = {"page": page, "limit": limit, "user": user} + url = f"/conversations/{conversation_id}/variables" + return await self._send_request("GET", url, params=params) + + async def update_conversation_variable_with_response( + self, conversation_id: str, variable_id: str, user: str, value: Any + ): + """Update a conversation variable with full response handling.""" + data = {"value": value, "user": user} + url = f"/conversations/{conversation_id}/variables/{variable_id}" + return await self._send_request("PUT", url, data=data) + + # Additional annotation methods for API parity + async def get_annotation_reply_job_status(self, action: str, job_id: str): + """Get status of an annotation reply action job.""" + url = f"/apps/annotation-reply/{action}/status/{job_id}" + return await self._send_request("GET", url) + + async def list_annotations_with_pagination(self, page: int = 1, limit: int = 20, keyword: str | None = None): + """List annotations for application with pagination.""" + params = {"page": page, "limit": limit} + if keyword: + params["keyword"] = keyword + return await self._send_request("GET", "/apps/annotations", params=params) + + async def create_annotation_with_response(self, question: str, answer: str): + """Create a new annotation with full response handling.""" + data = {"question": question, "answer": answer} + return await self._send_request("POST", "/apps/annotations", json=data) + + async def update_annotation_with_response(self, annotation_id: str, question: str, answer: str): + """Update an existing annotation with full response handling.""" + data = {"question": question, "answer": answer} + url = f"/apps/annotations/{annotation_id}" + return await self._send_request("PUT", url, json=data) + + async def delete_annotation_with_response(self, annotation_id: str): + """Delete an annotation with full response handling.""" + url = f"/apps/annotations/{annotation_id}" + return await self._send_request("DELETE", url) + class AsyncWorkflowClient(AsyncDifyClient): """Async client for Workflow API operations.""" @@ -436,6 +577,68 @@ class AsyncWorkflowClient(AsyncDifyClient): stream=(response_mode == "streaming"), ) + # Enhanced Workflow APIs + async def get_workflow_draft(self, app_id: str): + """Get workflow draft configuration. + + Args: + app_id: ID of the workflow app + + Returns: + Workflow draft configuration + """ + url = f"/apps/{app_id}/workflow/draft" + return await self._send_request("GET", url) + + async def update_workflow_draft(self, app_id: str, workflow_data: Dict[str, Any]): + """Update workflow draft configuration. + + Args: + app_id: ID of the workflow app + workflow_data: Workflow configuration data + + Returns: + Updated workflow draft + """ + url = f"/apps/{app_id}/workflow/draft" + return await self._send_request("PUT", url, json=workflow_data) + + async def publish_workflow(self, app_id: str): + """Publish workflow from draft. + + Args: + app_id: ID of the workflow app + + Returns: + Published workflow information + """ + url = f"/apps/{app_id}/workflow/publish" + return await self._send_request("POST", url) + + async def get_workflow_run_history( + self, + app_id: str, + page: int = 1, + limit: int = 20, + status: Literal["succeeded", "failed", "stopped"] | None = None, + ): + """Get workflow run history. + + Args: + app_id: ID of the workflow app + page: Page number (default: 1) + limit: Number of items per page (default: 20) + status: Filter by status (optional) + + Returns: + Paginated workflow run history + """ + params = {"page": page, "limit": limit} + if status: + params["status"] = status + url = f"/apps/{app_id}/workflow/runs" + return await self._send_request("GET", url, params=params) + class AsyncWorkspaceClient(AsyncDifyClient): """Async client for workspace-related operations.""" @@ -445,6 +648,41 @@ class AsyncWorkspaceClient(AsyncDifyClient): url = f"/workspaces/current/models/model-types/{model_type}" return await self._send_request("GET", url) + async def get_available_models_by_type(self, model_type: str): + """Get available models by model type (enhanced version).""" + url = f"/workspaces/current/models/model-types/{model_type}" + return await self._send_request("GET", url) + + async def get_model_providers(self): + """Get all model providers.""" + return await self._send_request("GET", "/workspaces/current/model-providers") + + async def get_model_provider_models(self, provider_name: str): + """Get models for a specific provider.""" + url = f"/workspaces/current/model-providers/{provider_name}/models" + return await self._send_request("GET", url) + + async def validate_model_provider_credentials(self, provider_name: str, credentials: Dict[str, Any]): + """Validate model provider credentials.""" + url = f"/workspaces/current/model-providers/{provider_name}/credentials/validate" + return await self._send_request("POST", url, json=credentials) + + # File Management APIs + async def get_file_info(self, file_id: str): + """Get information about a specific file.""" + url = f"/files/{file_id}/info" + return await self._send_request("GET", url) + + async def get_file_download_url(self, file_id: str): + """Get download URL for a file.""" + url = f"/files/{file_id}/download-url" + return await self._send_request("GET", url) + + async def delete_file(self, file_id: str): + """Delete a file.""" + url = f"/files/{file_id}" + return await self._send_request("DELETE", url) + class AsyncKnowledgeBaseClient(AsyncDifyClient): """Async client for Knowledge Base API operations.""" @@ -481,7 +719,7 @@ class AsyncKnowledgeBaseClient(AsyncDifyClient): """List all datasets.""" return await self._send_request("GET", "/datasets", params={"page": page, "limit": page_size}, **kwargs) - async def create_document_by_text(self, name: str, text: str, extra_params: dict | None = None, **kwargs): + async def create_document_by_text(self, name: str, text: str, extra_params: Dict | None = None, **kwargs): """Create a document by text. Args: @@ -508,7 +746,7 @@ class AsyncKnowledgeBaseClient(AsyncDifyClient): document_id: str, name: str, text: str, - extra_params: dict | None = None, + extra_params: Dict | None = None, **kwargs, ): """Update a document by text.""" @@ -522,7 +760,7 @@ class AsyncKnowledgeBaseClient(AsyncDifyClient): self, file_path: str, original_document_id: str | None = None, - extra_params: dict | None = None, + extra_params: Dict | None = None, ): """Create a document by file.""" async with aiofiles.open(file_path, "rb") as f: @@ -538,7 +776,7 @@ class AsyncKnowledgeBaseClient(AsyncDifyClient): url = f"/datasets/{self._get_dataset_id()}/document/create_by_file" return await self._send_request_with_files("POST", url, {"data": json.dumps(data)}, files) - async def update_document_by_file(self, document_id: str, file_path: str, extra_params: dict | None = None): + async def update_document_by_file(self, document_id: str, file_path: str, extra_params: Dict | None = None): """Update a document by file.""" async with aiofiles.open(file_path, "rb") as f: files = {"file": (os.path.basename(file_path), f)} @@ -806,3 +1044,1031 @@ class AsyncKnowledgeBaseClient(AsyncDifyClient): url = f"/datasets/{ds_id}/documents/status/{action}" data = {"document_ids": document_ids} return await self._send_request("PATCH", url, json=data) + + # Enhanced Dataset APIs + + async def create_dataset_from_template(self, template_name: str, name: str, description: str | None = None): + """Create a dataset from a predefined template. + + Args: + template_name: Name of the template to use + name: Name for the new dataset + description: Description for the dataset (optional) + + Returns: + Created dataset information + """ + data = { + "template_name": template_name, + "name": name, + "description": description, + } + return await self._send_request("POST", "/datasets/from-template", json=data) + + async def duplicate_dataset(self, dataset_id: str, name: str): + """Duplicate an existing dataset. + + Args: + dataset_id: ID of dataset to duplicate + name: Name for duplicated dataset + + Returns: + New dataset information + """ + data = {"name": name} + url = f"/datasets/{dataset_id}/duplicate" + return await self._send_request("POST", url, json=data) + + async def update_conversation_variable_with_response( + self, conversation_id: str, variable_id: str, user: str, value: Any + ): + """Update a conversation variable with full response handling.""" + data = {"value": value, "user": user} + url = f"/conversations/{conversation_id}/variables/{variable_id}" + return await self._send_request("PUT", url, json=data) + + async def list_conversation_variables_with_pagination( + self, conversation_id: str, user: str, page: int = 1, limit: int = 20 + ): + """List conversation variables with pagination.""" + params = {"page": page, "limit": limit, "user": user} + url = f"/conversations/{conversation_id}/variables" + return await self._send_request("GET", url, params=params) + + +class AsyncEnterpriseClient(AsyncDifyClient): + """Async Enterprise and Account Management APIs for Dify platform administration.""" + + async def get_account_info(self): + """Get current account information.""" + return await self._send_request("GET", "/account") + + async def update_account_info(self, account_data: Dict[str, Any]): + """Update account information.""" + return await self._send_request("PUT", "/account", json=account_data) + + # Member Management APIs + async def list_members(self, page: int = 1, limit: int = 20, keyword: str | None = None): + """List workspace members with pagination.""" + params = {"page": page, "limit": limit} + if keyword: + params["keyword"] = keyword + return await self._send_request("GET", "/members", params=params) + + async def invite_member(self, email: str, role: str, name: str | None = None): + """Invite a new member to the workspace.""" + data = {"email": email, "role": role} + if name: + data["name"] = name + return await self._send_request("POST", "/members/invite", json=data) + + async def get_member(self, member_id: str): + """Get detailed information about a specific member.""" + url = f"/members/{member_id}" + return await self._send_request("GET", url) + + async def update_member(self, member_id: str, member_data: Dict[str, Any]): + """Update member information.""" + url = f"/members/{member_id}" + return await self._send_request("PUT", url, json=member_data) + + async def remove_member(self, member_id: str): + """Remove a member from the workspace.""" + url = f"/members/{member_id}" + return await self._send_request("DELETE", url) + + async def deactivate_member(self, member_id: str): + """Deactivate a member account.""" + url = f"/members/{member_id}/deactivate" + return await self._send_request("POST", url) + + async def reactivate_member(self, member_id: str): + """Reactivate a deactivated member account.""" + url = f"/members/{member_id}/reactivate" + return await self._send_request("POST", url) + + # Role Management APIs + async def list_roles(self): + """List all available roles in the workspace.""" + return await self._send_request("GET", "/roles") + + async def create_role(self, name: str, description: str, permissions: List[str]): + """Create a new role with specified permissions.""" + data = {"name": name, "description": description, "permissions": permissions} + return await self._send_request("POST", "/roles", json=data) + + async def get_role(self, role_id: str): + """Get detailed information about a specific role.""" + url = f"/roles/{role_id}" + return await self._send_request("GET", url) + + async def update_role(self, role_id: str, role_data: Dict[str, Any]): + """Update role information.""" + url = f"/roles/{role_id}" + return await self._send_request("PUT", url, json=role_data) + + async def delete_role(self, role_id: str): + """Delete a role.""" + url = f"/roles/{role_id}" + return await self._send_request("DELETE", url) + + # Permission Management APIs + async def list_permissions(self): + """List all available permissions.""" + return await self._send_request("GET", "/permissions") + + async def get_role_permissions(self, role_id: str): + """Get permissions for a specific role.""" + url = f"/roles/{role_id}/permissions" + return await self._send_request("GET", url) + + async def update_role_permissions(self, role_id: str, permissions: List[str]): + """Update permissions for a role.""" + url = f"/roles/{role_id}/permissions" + data = {"permissions": permissions} + return await self._send_request("PUT", url, json=data) + + # Workspace Settings APIs + async def get_workspace_settings(self): + """Get workspace settings and configuration.""" + return await self._send_request("GET", "/workspace/settings") + + async def update_workspace_settings(self, settings_data: Dict[str, Any]): + """Update workspace settings.""" + return await self._send_request("PUT", "/workspace/settings", json=settings_data) + + async def get_workspace_statistics(self): + """Get workspace usage statistics.""" + return await self._send_request("GET", "/workspace/statistics") + + # Billing and Subscription APIs + async def get_billing_info(self): + """Get current billing information.""" + return await self._send_request("GET", "/billing") + + async def get_subscription_info(self): + """Get current subscription information.""" + return await self._send_request("GET", "/subscription") + + async def update_subscription(self, subscription_data: Dict[str, Any]): + """Update subscription settings.""" + return await self._send_request("PUT", "/subscription", json=subscription_data) + + async def get_billing_history(self, page: int = 1, limit: int = 20): + """Get billing history with pagination.""" + params = {"page": page, "limit": limit} + return await self._send_request("GET", "/billing/history", params=params) + + async def get_usage_metrics(self, start_date: str, end_date: str, metric_type: str | None = None): + """Get usage metrics for a date range.""" + params = {"start_date": start_date, "end_date": end_date} + if metric_type: + params["metric_type"] = metric_type + return await self._send_request("GET", "/usage/metrics", params=params) + + # Audit Logs APIs + async def get_audit_logs( + self, + page: int = 1, + limit: int = 20, + action: str | None = None, + user_id: str | None = None, + start_date: str | None = None, + end_date: str | None = None, + ): + """Get audit logs with filtering options.""" + params = {"page": page, "limit": limit} + if action: + params["action"] = action + if user_id: + params["user_id"] = user_id + if start_date: + params["start_date"] = start_date + if end_date: + params["end_date"] = end_date + return await self._send_request("GET", "/audit/logs", params=params) + + async def export_audit_logs(self, format: str = "csv", filters: Dict[str, Any] | None = None): + """Export audit logs in specified format.""" + params = {"format": format} + if filters: + params.update(filters) + return await self._send_request("GET", "/audit/logs/export", params=params) + + +class AsyncSecurityClient(AsyncDifyClient): + """Async Security and Access Control APIs for Dify platform security management.""" + + # API Key Management APIs + async def list_api_keys(self, page: int = 1, limit: int = 20, status: str | None = None): + """List all API keys with pagination and filtering.""" + params = {"page": page, "limit": limit} + if status: + params["status"] = status + return await self._send_request("GET", "/security/api-keys", params=params) + + async def create_api_key( + self, + name: str, + permissions: List[str], + expires_at: str | None = None, + description: str | None = None, + ): + """Create a new API key with specified permissions.""" + data = {"name": name, "permissions": permissions} + if expires_at: + data["expires_at"] = expires_at + if description: + data["description"] = description + return await self._send_request("POST", "/security/api-keys", json=data) + + async def get_api_key(self, key_id: str): + """Get detailed information about an API key.""" + url = f"/security/api-keys/{key_id}" + return await self._send_request("GET", url) + + async def update_api_key(self, key_id: str, key_data: Dict[str, Any]): + """Update API key information.""" + url = f"/security/api-keys/{key_id}" + return await self._send_request("PUT", url, json=key_data) + + async def revoke_api_key(self, key_id: str): + """Revoke an API key.""" + url = f"/security/api-keys/{key_id}/revoke" + return await self._send_request("POST", url) + + async def rotate_api_key(self, key_id: str): + """Rotate an API key (generate new key).""" + url = f"/security/api-keys/{key_id}/rotate" + return await self._send_request("POST", url) + + # Rate Limiting APIs + async def get_rate_limits(self): + """Get current rate limiting configuration.""" + return await self._send_request("GET", "/security/rate-limits") + + async def update_rate_limits(self, limits_config: Dict[str, Any]): + """Update rate limiting configuration.""" + return await self._send_request("PUT", "/security/rate-limits", json=limits_config) + + async def get_rate_limit_usage(self, timeframe: str = "1h"): + """Get rate limit usage statistics.""" + params = {"timeframe": timeframe} + return await self._send_request("GET", "/security/rate-limits/usage", params=params) + + # Access Control Lists APIs + async def list_access_policies(self, page: int = 1, limit: int = 20): + """List access control policies.""" + params = {"page": page, "limit": limit} + return await self._send_request("GET", "/security/access-policies", params=params) + + async def create_access_policy(self, policy_data: Dict[str, Any]): + """Create a new access control policy.""" + return await self._send_request("POST", "/security/access-policies", json=policy_data) + + async def get_access_policy(self, policy_id: str): + """Get detailed information about an access policy.""" + url = f"/security/access-policies/{policy_id}" + return await self._send_request("GET", url) + + async def update_access_policy(self, policy_id: str, policy_data: Dict[str, Any]): + """Update an access control policy.""" + url = f"/security/access-policies/{policy_id}" + return await self._send_request("PUT", url, json=policy_data) + + async def delete_access_policy(self, policy_id: str): + """Delete an access control policy.""" + url = f"/security/access-policies/{policy_id}" + return await self._send_request("DELETE", url) + + # Security Settings APIs + async def get_security_settings(self): + """Get security configuration settings.""" + return await self._send_request("GET", "/security/settings") + + async def update_security_settings(self, settings_data: Dict[str, Any]): + """Update security configuration settings.""" + return await self._send_request("PUT", "/security/settings", json=settings_data) + + async def get_security_audit_logs( + self, + page: int = 1, + limit: int = 20, + event_type: str | None = None, + start_date: str | None = None, + end_date: str | None = None, + ): + """Get security-specific audit logs.""" + params = {"page": page, "limit": limit} + if event_type: + params["event_type"] = event_type + if start_date: + params["start_date"] = start_date + if end_date: + params["end_date"] = end_date + return await self._send_request("GET", "/security/audit-logs", params=params) + + # IP Whitelist/Blacklist APIs + async def get_ip_whitelist(self): + """Get IP whitelist configuration.""" + return await self._send_request("GET", "/security/ip-whitelist") + + async def update_ip_whitelist(self, ip_list: List[str], description: str | None = None): + """Update IP whitelist configuration.""" + data = {"ip_list": ip_list} + if description: + data["description"] = description + return await self._send_request("PUT", "/security/ip-whitelist", json=data) + + async def get_ip_blacklist(self): + """Get IP blacklist configuration.""" + return await self._send_request("GET", "/security/ip-blacklist") + + async def update_ip_blacklist(self, ip_list: List[str], description: str | None = None): + """Update IP blacklist configuration.""" + data = {"ip_list": ip_list} + if description: + data["description"] = description + return await self._send_request("PUT", "/security/ip-blacklist", json=data) + + # Authentication Settings APIs + async def get_auth_settings(self): + """Get authentication configuration settings.""" + return await self._send_request("GET", "/security/auth-settings") + + async def update_auth_settings(self, auth_data: Dict[str, Any]): + """Update authentication configuration settings.""" + return await self._send_request("PUT", "/security/auth-settings", json=auth_data) + + async def test_auth_configuration(self, auth_config: Dict[str, Any]): + """Test authentication configuration.""" + return await self._send_request("POST", "/security/auth-settings/test", json=auth_config) + + +class AsyncAnalyticsClient(AsyncDifyClient): + """Async Analytics and Monitoring APIs for Dify platform insights and metrics.""" + + # Usage Analytics APIs + async def get_usage_analytics( + self, + start_date: str, + end_date: str, + granularity: str = "day", + metrics: List[str] | None = None, + ): + """Get usage analytics for specified date range.""" + params = { + "start_date": start_date, + "end_date": end_date, + "granularity": granularity, + } + if metrics: + params["metrics"] = ",".join(metrics) + return await self._send_request("GET", "/analytics/usage", params=params) + + async def get_app_usage_analytics(self, app_id: str, start_date: str, end_date: str, granularity: str = "day"): + """Get usage analytics for a specific app.""" + params = { + "start_date": start_date, + "end_date": end_date, + "granularity": granularity, + } + url = f"/analytics/apps/{app_id}/usage" + return await self._send_request("GET", url, params=params) + + async def get_user_analytics(self, start_date: str, end_date: str, user_segment: str | None = None): + """Get user analytics and behavior insights.""" + params = {"start_date": start_date, "end_date": end_date} + if user_segment: + params["user_segment"] = user_segment + return await self._send_request("GET", "/analytics/users", params=params) + + # Performance Metrics APIs + async def get_performance_metrics(self, start_date: str, end_date: str, metric_type: str | None = None): + """Get performance metrics for the platform.""" + params = {"start_date": start_date, "end_date": end_date} + if metric_type: + params["metric_type"] = metric_type + return await self._send_request("GET", "/analytics/performance", params=params) + + async def get_app_performance_metrics(self, app_id: str, start_date: str, end_date: str): + """Get performance metrics for a specific app.""" + params = {"start_date": start_date, "end_date": end_date} + url = f"/analytics/apps/{app_id}/performance" + return await self._send_request("GET", url, params=params) + + async def get_model_performance_metrics(self, model_provider: str, model_name: str, start_date: str, end_date: str): + """Get performance metrics for a specific model.""" + params = {"start_date": start_date, "end_date": end_date} + url = f"/analytics/models/{model_provider}/{model_name}/performance" + return await self._send_request("GET", url, params=params) + + # Cost Tracking APIs + async def get_cost_analytics(self, start_date: str, end_date: str, cost_type: str | None = None): + """Get cost analytics and breakdown.""" + params = {"start_date": start_date, "end_date": end_date} + if cost_type: + params["cost_type"] = cost_type + return await self._send_request("GET", "/analytics/costs", params=params) + + async def get_app_cost_analytics(self, app_id: str, start_date: str, end_date: str): + """Get cost analytics for a specific app.""" + params = {"start_date": start_date, "end_date": end_date} + url = f"/analytics/apps/{app_id}/costs" + return await self._send_request("GET", url, params=params) + + async def get_cost_forecast(self, forecast_period: str = "30d"): + """Get cost forecast for specified period.""" + params = {"forecast_period": forecast_period} + return await self._send_request("GET", "/analytics/costs/forecast", params=params) + + # Real-time Monitoring APIs + async def get_real_time_metrics(self): + """Get real-time platform metrics.""" + return await self._send_request("GET", "/analytics/realtime") + + async def get_app_real_time_metrics(self, app_id: str): + """Get real-time metrics for a specific app.""" + url = f"/analytics/apps/{app_id}/realtime" + return await self._send_request("GET", url) + + async def get_system_health(self): + """Get overall system health status.""" + return await self._send_request("GET", "/analytics/health") + + # Custom Reports APIs + async def create_custom_report(self, report_config: Dict[str, Any]): + """Create a custom analytics report.""" + return await self._send_request("POST", "/analytics/reports", json=report_config) + + async def list_custom_reports(self, page: int = 1, limit: int = 20): + """List custom analytics reports.""" + params = {"page": page, "limit": limit} + return await self._send_request("GET", "/analytics/reports", params=params) + + async def get_custom_report(self, report_id: str): + """Get a specific custom report.""" + url = f"/analytics/reports/{report_id}" + return await self._send_request("GET", url) + + async def update_custom_report(self, report_id: str, report_config: Dict[str, Any]): + """Update a custom analytics report.""" + url = f"/analytics/reports/{report_id}" + return await self._send_request("PUT", url, json=report_config) + + async def delete_custom_report(self, report_id: str): + """Delete a custom analytics report.""" + url = f"/analytics/reports/{report_id}" + return await self._send_request("DELETE", url) + + async def generate_report(self, report_id: str, format: str = "pdf"): + """Generate and download a custom report.""" + params = {"format": format} + url = f"/analytics/reports/{report_id}/generate" + return await self._send_request("GET", url, params=params) + + # Export APIs + async def export_analytics_data(self, data_type: str, start_date: str, end_date: str, format: str = "csv"): + """Export analytics data in specified format.""" + params = { + "data_type": data_type, + "start_date": start_date, + "end_date": end_date, + "format": format, + } + return await self._send_request("GET", "/analytics/export", params=params) + + +class AsyncIntegrationClient(AsyncDifyClient): + """Async Integration and Plugin APIs for Dify platform extensibility.""" + + # Webhook Management APIs + async def list_webhooks(self, page: int = 1, limit: int = 20, status: str | None = None): + """List webhooks with pagination and filtering.""" + params = {"page": page, "limit": limit} + if status: + params["status"] = status + return await self._send_request("GET", "/integrations/webhooks", params=params) + + async def create_webhook(self, webhook_data: Dict[str, Any]): + """Create a new webhook.""" + return await self._send_request("POST", "/integrations/webhooks", json=webhook_data) + + async def get_webhook(self, webhook_id: str): + """Get detailed information about a webhook.""" + url = f"/integrations/webhooks/{webhook_id}" + return await self._send_request("GET", url) + + async def update_webhook(self, webhook_id: str, webhook_data: Dict[str, Any]): + """Update webhook configuration.""" + url = f"/integrations/webhooks/{webhook_id}" + return await self._send_request("PUT", url, json=webhook_data) + + async def delete_webhook(self, webhook_id: str): + """Delete a webhook.""" + url = f"/integrations/webhooks/{webhook_id}" + return await self._send_request("DELETE", url) + + async def test_webhook(self, webhook_id: str): + """Test webhook delivery.""" + url = f"/integrations/webhooks/{webhook_id}/test" + return await self._send_request("POST", url) + + async def get_webhook_logs(self, webhook_id: str, page: int = 1, limit: int = 20): + """Get webhook delivery logs.""" + params = {"page": page, "limit": limit} + url = f"/integrations/webhooks/{webhook_id}/logs" + return await self._send_request("GET", url, params=params) + + # Plugin Management APIs + async def list_plugins(self, page: int = 1, limit: int = 20, category: str | None = None): + """List available plugins.""" + params = {"page": page, "limit": limit} + if category: + params["category"] = category + return await self._send_request("GET", "/integrations/plugins", params=params) + + async def install_plugin(self, plugin_id: str, config: Dict[str, Any] | None = None): + """Install a plugin.""" + data = {"plugin_id": plugin_id} + if config: + data["config"] = config + return await self._send_request("POST", "/integrations/plugins/install", json=data) + + async def get_installed_plugin(self, installation_id: str): + """Get information about an installed plugin.""" + url = f"/integrations/plugins/{installation_id}" + return await self._send_request("GET", url) + + async def update_plugin_config(self, installation_id: str, config: Dict[str, Any]): + """Update plugin configuration.""" + url = f"/integrations/plugins/{installation_id}/config" + return await self._send_request("PUT", url, json=config) + + async def uninstall_plugin(self, installation_id: str): + """Uninstall a plugin.""" + url = f"/integrations/plugins/{installation_id}" + return await self._send_request("DELETE", url) + + async def enable_plugin(self, installation_id: str): + """Enable a plugin.""" + url = f"/integrations/plugins/{installation_id}/enable" + return await self._send_request("POST", url) + + async def disable_plugin(self, installation_id: str): + """Disable a plugin.""" + url = f"/integrations/plugins/{installation_id}/disable" + return await self._send_request("POST", url) + + # Import/Export APIs + async def export_app_data(self, app_id: str, format: str = "json", include_data: bool = True): + """Export application data.""" + params = {"format": format, "include_data": include_data} + url = f"/integrations/export/apps/{app_id}" + return await self._send_request("GET", url, params=params) + + async def import_app_data(self, import_data: Dict[str, Any]): + """Import application data.""" + return await self._send_request("POST", "/integrations/import/apps", json=import_data) + + async def get_import_status(self, import_id: str): + """Get import operation status.""" + url = f"/integrations/import/{import_id}/status" + return await self._send_request("GET", url) + + async def export_workspace_data(self, format: str = "json", include_data: bool = True): + """Export workspace data.""" + params = {"format": format, "include_data": include_data} + return await self._send_request("GET", "/integrations/export/workspace", params=params) + + async def import_workspace_data(self, import_data: Dict[str, Any]): + """Import workspace data.""" + return await self._send_request("POST", "/integrations/import/workspace", json=import_data) + + # Backup and Restore APIs + async def create_backup(self, backup_config: Dict[str, Any] | None = None): + """Create a system backup.""" + data = backup_config or {} + return await self._send_request("POST", "/integrations/backup/create", json=data) + + async def list_backups(self, page: int = 1, limit: int = 20): + """List available backups.""" + params = {"page": page, "limit": limit} + return await self._send_request("GET", "/integrations/backup", params=params) + + async def get_backup(self, backup_id: str): + """Get backup information.""" + url = f"/integrations/backup/{backup_id}" + return await self._send_request("GET", url) + + async def restore_backup(self, backup_id: str, restore_config: Dict[str, Any] | None = None): + """Restore from backup.""" + data = restore_config or {} + url = f"/integrations/backup/{backup_id}/restore" + return await self._send_request("POST", url, json=data) + + async def delete_backup(self, backup_id: str): + """Delete a backup.""" + url = f"/integrations/backup/{backup_id}" + return await self._send_request("DELETE", url) + + +class AsyncAdvancedModelClient(AsyncDifyClient): + """Async Advanced Model Management APIs for fine-tuning and custom deployments.""" + + # Fine-tuning Job Management APIs + async def list_fine_tuning_jobs( + self, + page: int = 1, + limit: int = 20, + status: str | None = None, + model_provider: str | None = None, + ): + """List fine-tuning jobs with filtering.""" + params = {"page": page, "limit": limit} + if status: + params["status"] = status + if model_provider: + params["model_provider"] = model_provider + return await self._send_request("GET", "/models/fine-tuning/jobs", params=params) + + async def create_fine_tuning_job(self, job_config: Dict[str, Any]): + """Create a new fine-tuning job.""" + return await self._send_request("POST", "/models/fine-tuning/jobs", json=job_config) + + async def get_fine_tuning_job(self, job_id: str): + """Get fine-tuning job details.""" + url = f"/models/fine-tuning/jobs/{job_id}" + return await self._send_request("GET", url) + + async def update_fine_tuning_job(self, job_id: str, job_config: Dict[str, Any]): + """Update fine-tuning job configuration.""" + url = f"/models/fine-tuning/jobs/{job_id}" + return await self._send_request("PUT", url, json=job_config) + + async def cancel_fine_tuning_job(self, job_id: str): + """Cancel a fine-tuning job.""" + url = f"/models/fine-tuning/jobs/{job_id}/cancel" + return await self._send_request("POST", url) + + async def resume_fine_tuning_job(self, job_id: str): + """Resume a paused fine-tuning job.""" + url = f"/models/fine-tuning/jobs/{job_id}/resume" + return await self._send_request("POST", url) + + async def get_fine_tuning_job_metrics(self, job_id: str): + """Get fine-tuning job training metrics.""" + url = f"/models/fine-tuning/jobs/{job_id}/metrics" + return await self._send_request("GET", url) + + async def get_fine_tuning_job_logs(self, job_id: str, page: int = 1, limit: int = 50): + """Get fine-tuning job logs.""" + params = {"page": page, "limit": limit} + url = f"/models/fine-tuning/jobs/{job_id}/logs" + return await self._send_request("GET", url, params=params) + + # Custom Model Deployment APIs + async def list_custom_deployments(self, page: int = 1, limit: int = 20, status: str | None = None): + """List custom model deployments.""" + params = {"page": page, "limit": limit} + if status: + params["status"] = status + return await self._send_request("GET", "/models/custom/deployments", params=params) + + async def create_custom_deployment(self, deployment_config: Dict[str, Any]): + """Create a custom model deployment.""" + return await self._send_request("POST", "/models/custom/deployments", json=deployment_config) + + async def get_custom_deployment(self, deployment_id: str): + """Get custom deployment details.""" + url = f"/models/custom/deployments/{deployment_id}" + return await self._send_request("GET", url) + + async def update_custom_deployment(self, deployment_id: str, deployment_config: Dict[str, Any]): + """Update custom deployment configuration.""" + url = f"/models/custom/deployments/{deployment_id}" + return await self._send_request("PUT", url, json=deployment_config) + + async def delete_custom_deployment(self, deployment_id: str): + """Delete a custom deployment.""" + url = f"/models/custom/deployments/{deployment_id}" + return await self._send_request("DELETE", url) + + async def scale_custom_deployment(self, deployment_id: str, scale_config: Dict[str, Any]): + """Scale custom deployment resources.""" + url = f"/models/custom/deployments/{deployment_id}/scale" + return await self._send_request("POST", url, json=scale_config) + + async def restart_custom_deployment(self, deployment_id: str): + """Restart a custom deployment.""" + url = f"/models/custom/deployments/{deployment_id}/restart" + return await self._send_request("POST", url) + + # Model Performance Monitoring APIs + async def get_model_performance_history( + self, + model_provider: str, + model_name: str, + start_date: str, + end_date: str, + metrics: List[str] | None = None, + ): + """Get model performance history.""" + params = {"start_date": start_date, "end_date": end_date} + if metrics: + params["metrics"] = ",".join(metrics) + url = f"/models/{model_provider}/{model_name}/performance/history" + return await self._send_request("GET", url, params=params) + + async def get_model_health_metrics(self, model_provider: str, model_name: str): + """Get real-time model health metrics.""" + url = f"/models/{model_provider}/{model_name}/health" + return await self._send_request("GET", url) + + async def get_model_usage_stats( + self, + model_provider: str, + model_name: str, + start_date: str, + end_date: str, + granularity: str = "day", + ): + """Get model usage statistics.""" + params = { + "start_date": start_date, + "end_date": end_date, + "granularity": granularity, + } + url = f"/models/{model_provider}/{model_name}/usage" + return await self._send_request("GET", url, params=params) + + async def get_model_cost_analysis(self, model_provider: str, model_name: str, start_date: str, end_date: str): + """Get model cost analysis.""" + params = {"start_date": start_date, "end_date": end_date} + url = f"/models/{model_provider}/{model_name}/costs" + return await self._send_request("GET", url, params=params) + + # Model Versioning APIs + async def list_model_versions(self, model_provider: str, model_name: str, page: int = 1, limit: int = 20): + """List model versions.""" + params = {"page": page, "limit": limit} + url = f"/models/{model_provider}/{model_name}/versions" + return await self._send_request("GET", url, params=params) + + async def create_model_version(self, model_provider: str, model_name: str, version_config: Dict[str, Any]): + """Create a new model version.""" + url = f"/models/{model_provider}/{model_name}/versions" + return await self._send_request("POST", url, json=version_config) + + async def get_model_version(self, model_provider: str, model_name: str, version_id: str): + """Get model version details.""" + url = f"/models/{model_provider}/{model_name}/versions/{version_id}" + return await self._send_request("GET", url) + + async def promote_model_version(self, model_provider: str, model_name: str, version_id: str): + """Promote model version to production.""" + url = f"/models/{model_provider}/{model_name}/versions/{version_id}/promote" + return await self._send_request("POST", url) + + async def rollback_model_version(self, model_provider: str, model_name: str, version_id: str): + """Rollback to a specific model version.""" + url = f"/models/{model_provider}/{model_name}/versions/{version_id}/rollback" + return await self._send_request("POST", url) + + # Model Registry APIs + async def list_registry_models(self, page: int = 1, limit: int = 20, filter: str | None = None): + """List models in registry.""" + params = {"page": page, "limit": limit} + if filter: + params["filter"] = filter + return await self._send_request("GET", "/models/registry", params=params) + + async def register_model(self, model_config: Dict[str, Any]): + """Register a new model in the registry.""" + return await self._send_request("POST", "/models/registry", json=model_config) + + async def get_registry_model(self, model_id: str): + """Get registered model details.""" + url = f"/models/registry/{model_id}" + return await self._send_request("GET", url) + + async def update_registry_model(self, model_id: str, model_config: Dict[str, Any]): + """Update registered model information.""" + url = f"/models/registry/{model_id}" + return await self._send_request("PUT", url, json=model_config) + + async def unregister_model(self, model_id: str): + """Unregister a model from the registry.""" + url = f"/models/registry/{model_id}" + return await self._send_request("DELETE", url) + + +class AsyncAdvancedAppClient(AsyncDifyClient): + """Async Advanced App Configuration APIs for comprehensive app management.""" + + # App Creation and Management APIs + async def create_app(self, app_config: Dict[str, Any]): + """Create a new application.""" + return await self._send_request("POST", "/apps", json=app_config) + + async def list_apps( + self, + page: int = 1, + limit: int = 20, + app_type: str | None = None, + status: str | None = None, + ): + """List applications with filtering.""" + params = {"page": page, "limit": limit} + if app_type: + params["app_type"] = app_type + if status: + params["status"] = status + return await self._send_request("GET", "/apps", params=params) + + async def get_app(self, app_id: str): + """Get detailed application information.""" + url = f"/apps/{app_id}" + return await self._send_request("GET", url) + + async def update_app(self, app_id: str, app_config: Dict[str, Any]): + """Update application configuration.""" + url = f"/apps/{app_id}" + return await self._send_request("PUT", url, json=app_config) + + async def delete_app(self, app_id: str): + """Delete an application.""" + url = f"/apps/{app_id}" + return await self._send_request("DELETE", url) + + async def duplicate_app(self, app_id: str, duplicate_config: Dict[str, Any]): + """Duplicate an application.""" + url = f"/apps/{app_id}/duplicate" + return await self._send_request("POST", url, json=duplicate_config) + + async def archive_app(self, app_id: str): + """Archive an application.""" + url = f"/apps/{app_id}/archive" + return await self._send_request("POST", url) + + async def restore_app(self, app_id: str): + """Restore an archived application.""" + url = f"/apps/{app_id}/restore" + return await self._send_request("POST", url) + + # App Publishing and Versioning APIs + async def publish_app(self, app_id: str, publish_config: Dict[str, Any] | None = None): + """Publish an application.""" + data = publish_config or {} + url = f"/apps/{app_id}/publish" + return await self._send_request("POST", url, json=data) + + async def unpublish_app(self, app_id: str): + """Unpublish an application.""" + url = f"/apps/{app_id}/unpublish" + return await self._send_request("POST", url) + + async def list_app_versions(self, app_id: str, page: int = 1, limit: int = 20): + """List application versions.""" + params = {"page": page, "limit": limit} + url = f"/apps/{app_id}/versions" + return await self._send_request("GET", url, params=params) + + async def create_app_version(self, app_id: str, version_config: Dict[str, Any]): + """Create a new application version.""" + url = f"/apps/{app_id}/versions" + return await self._send_request("POST", url, json=version_config) + + async def get_app_version(self, app_id: str, version_id: str): + """Get application version details.""" + url = f"/apps/{app_id}/versions/{version_id}" + return await self._send_request("GET", url) + + async def rollback_app_version(self, app_id: str, version_id: str): + """Rollback application to a specific version.""" + url = f"/apps/{app_id}/versions/{version_id}/rollback" + return await self._send_request("POST", url) + + # App Template APIs + async def list_app_templates(self, page: int = 1, limit: int = 20, category: str | None = None): + """List available app templates.""" + params = {"page": page, "limit": limit} + if category: + params["category"] = category + return await self._send_request("GET", "/apps/templates", params=params) + + async def get_app_template(self, template_id: str): + """Get app template details.""" + url = f"/apps/templates/{template_id}" + return await self._send_request("GET", url) + + async def create_app_from_template(self, template_id: str, app_config: Dict[str, Any]): + """Create an app from a template.""" + url = f"/apps/templates/{template_id}/create" + return await self._send_request("POST", url, json=app_config) + + async def create_custom_template(self, app_id: str, template_config: Dict[str, Any]): + """Create a custom template from an existing app.""" + url = f"/apps/{app_id}/create-template" + return await self._send_request("POST", url, json=template_config) + + # App Analytics and Metrics APIs + async def get_app_analytics( + self, + app_id: str, + start_date: str, + end_date: str, + metrics: List[str] | None = None, + ): + """Get application analytics.""" + params = {"start_date": start_date, "end_date": end_date} + if metrics: + params["metrics"] = ",".join(metrics) + url = f"/apps/{app_id}/analytics" + return await self._send_request("GET", url, params=params) + + async def get_app_user_feedback(self, app_id: str, page: int = 1, limit: int = 20, rating: int | None = None): + """Get user feedback for an application.""" + params = {"page": page, "limit": limit} + if rating: + params["rating"] = rating + url = f"/apps/{app_id}/feedback" + return await self._send_request("GET", url, params=params) + + async def get_app_error_logs( + self, + app_id: str, + start_date: str, + end_date: str, + error_type: str | None = None, + page: int = 1, + limit: int = 20, + ): + """Get application error logs.""" + params = { + "start_date": start_date, + "end_date": end_date, + "page": page, + "limit": limit, + } + if error_type: + params["error_type"] = error_type + url = f"/apps/{app_id}/errors" + return await self._send_request("GET", url, params=params) + + # Advanced Configuration APIs + async def get_app_advanced_config(self, app_id: str): + """Get advanced application configuration.""" + url = f"/apps/{app_id}/advanced-config" + return await self._send_request("GET", url) + + async def update_app_advanced_config(self, app_id: str, config: Dict[str, Any]): + """Update advanced application configuration.""" + url = f"/apps/{app_id}/advanced-config" + return await self._send_request("PUT", url, json=config) + + async def get_app_environment_variables(self, app_id: str): + """Get application environment variables.""" + url = f"/apps/{app_id}/environment" + return await self._send_request("GET", url) + + async def update_app_environment_variables(self, app_id: str, variables: Dict[str, str]): + """Update application environment variables.""" + url = f"/apps/{app_id}/environment" + return await self._send_request("PUT", url, json=variables) + + async def get_app_resource_limits(self, app_id: str): + """Get application resource limits.""" + url = f"/apps/{app_id}/resource-limits" + return await self._send_request("GET", url) + + async def update_app_resource_limits(self, app_id: str, limits: Dict[str, Any]): + """Update application resource limits.""" + url = f"/apps/{app_id}/resource-limits" + return await self._send_request("PUT", url, json=limits) + + # App Integration APIs + async def get_app_integrations(self, app_id: str): + """Get application integrations.""" + url = f"/apps/{app_id}/integrations" + return await self._send_request("GET", url) + + async def add_app_integration(self, app_id: str, integration_config: Dict[str, Any]): + """Add integration to application.""" + url = f"/apps/{app_id}/integrations" + return await self._send_request("POST", url, json=integration_config) + + async def update_app_integration(self, app_id: str, integration_id: str, config: Dict[str, Any]): + """Update application integration.""" + url = f"/apps/{app_id}/integrations/{integration_id}" + return await self._send_request("PUT", url, json=config) + + async def remove_app_integration(self, app_id: str, integration_id: str): + """Remove integration from application.""" + url = f"/apps/{app_id}/integrations/{integration_id}" + return await self._send_request("DELETE", url) + + async def test_app_integration(self, app_id: str, integration_id: str): + """Test application integration.""" + url = f"/apps/{app_id}/integrations/{integration_id}/test" + return await self._send_request("POST", url) diff --git a/sdks/python-client/dify_client/base_client.py b/sdks/python-client/dify_client/base_client.py new file mode 100644 index 0000000000..0ad6e07b23 --- /dev/null +++ b/sdks/python-client/dify_client/base_client.py @@ -0,0 +1,228 @@ +"""Base client with common functionality for both sync and async clients.""" + +import json +import time +import logging +from typing import Dict, Callable, Optional + +try: + # Python 3.10+ + from typing import ParamSpec +except ImportError: + # Python < 3.10 + from typing_extensions import ParamSpec + +from urllib.parse import urljoin + +import httpx + +P = ParamSpec("P") + +from .exceptions import ( + DifyClientError, + APIError, + AuthenticationError, + RateLimitError, + ValidationError, + NetworkError, + TimeoutError, +) + + +class BaseClientMixin: + """Mixin class providing common functionality for Dify clients.""" + + def __init__( + self, + api_key: str, + base_url: str = "https://api.dify.ai/v1", + timeout: float = 60.0, + max_retries: int = 3, + retry_delay: float = 1.0, + enable_logging: bool = False, + ): + """Initialize the base client. + + Args: + api_key: Your Dify API key + base_url: Base URL for the Dify API + timeout: Request timeout in seconds + max_retries: Maximum number of retry attempts + retry_delay: Delay between retries in seconds + enable_logging: Enable detailed logging + """ + if not api_key: + raise ValidationError("API key is required") + + self.api_key = api_key + self.base_url = base_url.rstrip("/") + self.timeout = timeout + self.max_retries = max_retries + self.retry_delay = retry_delay + self.enable_logging = enable_logging + + # Setup logging + self.logger = logging.getLogger(f"dify_client.{self.__class__.__name__.lower()}") + if enable_logging and not self.logger.handlers: + # Create console handler with formatter + handler = logging.StreamHandler() + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + handler.setFormatter(formatter) + self.logger.addHandler(handler) + self.logger.setLevel(logging.INFO) + self.enable_logging = True + else: + self.enable_logging = enable_logging + + def _get_headers(self, content_type: str = "application/json") -> Dict[str, str]: + """Get common request headers.""" + return { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": content_type, + "User-Agent": "dify-client-python/0.1.12", + } + + def _build_url(self, endpoint: str) -> str: + """Build full URL from endpoint.""" + return urljoin(self.base_url + "/", endpoint.lstrip("/")) + + def _handle_response(self, response: httpx.Response) -> httpx.Response: + """Handle HTTP response and raise appropriate exceptions.""" + try: + if response.status_code == 401: + raise AuthenticationError( + "Authentication failed. Check your API key.", + status_code=response.status_code, + response=response.json() if response.content else None, + ) + elif response.status_code == 429: + retry_after = response.headers.get("Retry-After") + raise RateLimitError( + "Rate limit exceeded. Please try again later.", + retry_after=int(retry_after) if retry_after else None, + ) + elif response.status_code >= 400: + try: + error_data = response.json() + message = error_data.get("message", f"HTTP {response.status_code}") + except: + message = f"HTTP {response.status_code}: {response.text}" + + raise APIError( + message, + status_code=response.status_code, + response=response.json() if response.content else None, + ) + + return response + + except json.JSONDecodeError: + raise APIError( + f"Invalid JSON response: {response.text}", + status_code=response.status_code, + ) + + def _retry_request( + self, + request_func: Callable[P, httpx.Response], + request_context: str | None = None, + *args: P.args, + **kwargs: P.kwargs, + ) -> httpx.Response: + """Retry a request with exponential backoff. + + Args: + request_func: Function that performs the HTTP request + request_context: Context description for logging (e.g., "GET /v1/messages") + *args: Positional arguments to pass to request_func + **kwargs: Keyword arguments to pass to request_func + + Returns: + httpx.Response: Successful response + + Raises: + NetworkError: On network failures after retries + TimeoutError: On timeout failures after retries + APIError: On API errors (4xx/5xx responses) + DifyClientError: On unexpected failures + """ + last_exception = None + + for attempt in range(self.max_retries + 1): + try: + response = request_func(*args, **kwargs) + return response # Let caller handle response processing + + except (httpx.NetworkError, httpx.TimeoutException) as e: + last_exception = e + context_msg = f" {request_context}" if request_context else "" + + if attempt < self.max_retries: + delay = self.retry_delay * (2**attempt) # Exponential backoff + self.logger.warning( + f"Request failed{context_msg} (attempt {attempt + 1}/{self.max_retries + 1}): {e}. " + f"Retrying in {delay:.2f} seconds..." + ) + time.sleep(delay) + else: + self.logger.error(f"Request failed{context_msg} after {self.max_retries + 1} attempts: {e}") + # Convert to custom exceptions + if isinstance(e, httpx.TimeoutException): + from .exceptions import TimeoutError + + raise TimeoutError(f"Request timed out after {self.max_retries} retries{context_msg}") from e + else: + from .exceptions import NetworkError + + raise NetworkError( + f"Network error after {self.max_retries} retries{context_msg}: {str(e)}" + ) from e + + if last_exception: + raise last_exception + raise DifyClientError("Request failed after retries") + + def _validate_params(self, **params) -> None: + """Validate request parameters.""" + for key, value in params.items(): + if value is None: + continue + + # String validations + if isinstance(value, str): + if not value.strip(): + raise ValidationError(f"Parameter '{key}' cannot be empty or whitespace only") + if len(value) > 10000: + raise ValidationError(f"Parameter '{key}' exceeds maximum length of 10000 characters") + + # List validations + elif isinstance(value, list): + if len(value) > 1000: + raise ValidationError(f"Parameter '{key}' exceeds maximum size of 1000 items") + + # Dictionary validations + elif isinstance(value, dict): + if len(value) > 100: + raise ValidationError(f"Parameter '{key}' exceeds maximum size of 100 items") + + # Type-specific validations + if key == "user" and not isinstance(value, str): + raise ValidationError(f"Parameter '{key}' must be a string") + elif key in ["page", "limit", "page_size"] and not isinstance(value, int): + raise ValidationError(f"Parameter '{key}' must be an integer") + elif key == "files" and not isinstance(value, (list, dict)): + raise ValidationError(f"Parameter '{key}' must be a list or dict") + elif key == "rating" and value not in ["like", "dislike"]: + raise ValidationError(f"Parameter '{key}' must be 'like' or 'dislike'") + + def _log_request(self, method: str, url: str, **kwargs) -> None: + """Log request details.""" + self.logger.info(f"Making {method} request to {url}") + if kwargs.get("json"): + self.logger.debug(f"Request body: {kwargs['json']}") + if kwargs.get("params"): + self.logger.debug(f"Query params: {kwargs['params']}") + + def _log_response(self, response: httpx.Response) -> None: + """Log response details.""" + self.logger.info(f"Received response: {response.status_code} ({len(response.content)} bytes)") diff --git a/sdks/python-client/dify_client/client.py b/sdks/python-client/dify_client/client.py index 41c5abe16d..cebdf6845c 100644 --- a/sdks/python-client/dify_client/client.py +++ b/sdks/python-client/dify_client/client.py @@ -1,11 +1,20 @@ import json +import logging import os -from typing import Literal, Dict, List, Any, IO +from typing import Literal, Dict, List, Any, IO, Optional, Union import httpx +from .base_client import BaseClientMixin +from .exceptions import ( + APIError, + AuthenticationError, + RateLimitError, + ValidationError, + FileUploadError, +) -class DifyClient: +class DifyClient(BaseClientMixin): """Synchronous Dify API client. This client uses httpx.Client for efficient connection pooling and resource management. @@ -21,6 +30,9 @@ class DifyClient: api_key: str, base_url: str = "https://api.dify.ai/v1", timeout: float = 60.0, + max_retries: int = 3, + retry_delay: float = 1.0, + enable_logging: bool = False, ): """Initialize the Dify client. @@ -28,9 +40,13 @@ class DifyClient: api_key: Your Dify API key base_url: Base URL for the Dify API timeout: Request timeout in seconds (default: 60.0) + max_retries: Maximum number of retry attempts (default: 3) + retry_delay: Delay between retries in seconds (default: 1.0) + enable_logging: Whether to enable request logging (default: True) """ - self.api_key = api_key - self.base_url = base_url + # Initialize base client functionality + BaseClientMixin.__init__(self, api_key, base_url, timeout, max_retries, retry_delay, enable_logging) + self._client = httpx.Client( base_url=base_url, timeout=httpx.Timeout(timeout, connect=5.0), @@ -53,12 +69,12 @@ class DifyClient: self, method: str, endpoint: str, - json: dict | None = None, - params: dict | None = None, + json: Dict[str, Any] | None = None, + params: Dict[str, Any] | None = None, stream: bool = False, **kwargs, ): - """Send an HTTP request to the Dify API. + """Send an HTTP request to the Dify API with retry logic. Args: method: HTTP method (GET, POST, PUT, PATCH, DELETE) @@ -71,23 +87,91 @@ class DifyClient: Returns: httpx.Response object """ + # Validate parameters + if json: + self._validate_params(**json) + if params: + self._validate_params(**params) + headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", } - # httpx.Client automatically prepends base_url - response = self._client.request( - method, - endpoint, - json=json, - params=params, - headers=headers, - **kwargs, - ) + def make_request(): + """Inner function to perform the actual HTTP request.""" + # Log request if logging is enabled + if self.enable_logging: + self.logger.info(f"Sending {method} request to {endpoint}") + # Debug logging for detailed information + if self.logger.isEnabledFor(logging.DEBUG): + if json: + self.logger.debug(f"Request body: {json}") + if params: + self.logger.debug(f"Request params: {params}") + + # httpx.Client automatically prepends base_url + response = self._client.request( + method, + endpoint, + json=json, + params=params, + headers=headers, + **kwargs, + ) + + # Log response if logging is enabled + if self.enable_logging: + self.logger.info(f"Received response: {response.status_code}") + + return response + + # Use the retry mechanism from base client + request_context = f"{method} {endpoint}" + response = self._retry_request(make_request, request_context) + + # Handle error responses (API errors don't retry) + self._handle_error_response(response) return response + def _handle_error_response(self, response, is_upload_request: bool = False) -> None: + """Handle HTTP error responses and raise appropriate exceptions.""" + + if response.status_code < 400: + return # Success response + + try: + error_data = response.json() + message = error_data.get("message", f"HTTP {response.status_code}") + except (ValueError, KeyError): + message = f"HTTP {response.status_code}" + error_data = None + + # Log error response if logging is enabled + if self.enable_logging: + self.logger.error(f"API error: {response.status_code} - {message}") + + if response.status_code == 401: + raise AuthenticationError(message, response.status_code, error_data) + elif response.status_code == 429: + retry_after = response.headers.get("Retry-After") + raise RateLimitError(message, retry_after) + elif response.status_code == 422: + raise ValidationError(message, response.status_code, error_data) + elif response.status_code == 400: + # Check if this is a file upload error based on the URL or context + current_url = getattr(response, "url", "") or "" + if is_upload_request or "upload" in str(current_url).lower() or "files" in str(current_url).lower(): + raise FileUploadError(message, response.status_code, error_data) + else: + raise APIError(message, response.status_code, error_data) + elif response.status_code >= 500: + # Server errors should raise APIError + raise APIError(message, response.status_code, error_data) + elif response.status_code >= 400: + raise APIError(message, response.status_code, error_data) + def _send_request_with_files(self, method: str, endpoint: str, data: dict, files: dict): """Send an HTTP request with file uploads. @@ -102,6 +186,12 @@ class DifyClient: """ headers = {"Authorization": f"Bearer {self.api_key}"} + # Log file upload request if logging is enabled + if self.enable_logging: + self.logger.info(f"Sending {method} file upload request to {endpoint}") + self.logger.debug(f"Form data: {data}") + self.logger.debug(f"Files: {files}") + response = self._client.request( method, endpoint, @@ -110,9 +200,17 @@ class DifyClient: files=files, ) + # Log response if logging is enabled + if self.enable_logging: + self.logger.info(f"Received file upload response: {response.status_code}") + + # Handle error responses + self._handle_error_response(response, is_upload_request=True) + return response def message_feedback(self, message_id: str, rating: Literal["like", "dislike"], user: str): + self._validate_params(message_id=message_id, rating=rating, user=user) data = {"rating": rating, "user": user} return self._send_request("POST", f"/messages/{message_id}/feedbacks", data) @@ -144,6 +242,72 @@ class DifyClient: """Get file preview by file ID.""" return self._send_request("GET", f"/files/{file_id}/preview") + # App Configuration APIs + def get_app_site_config(self, app_id: str): + """Get app site configuration. + + Args: + app_id: ID of the app + + Returns: + App site configuration + """ + url = f"/apps/{app_id}/site/config" + return self._send_request("GET", url) + + def update_app_site_config(self, app_id: str, config_data: Dict[str, Any]): + """Update app site configuration. + + Args: + app_id: ID of the app + config_data: Configuration data to update + + Returns: + Updated app site configuration + """ + url = f"/apps/{app_id}/site/config" + return self._send_request("PUT", url, json=config_data) + + def get_app_api_tokens(self, app_id: str): + """Get API tokens for an app. + + Args: + app_id: ID of the app + + Returns: + List of API tokens + """ + url = f"/apps/{app_id}/api-tokens" + return self._send_request("GET", url) + + def create_app_api_token(self, app_id: str, name: str, description: str | None = None): + """Create a new API token for an app. + + Args: + app_id: ID of the app + name: Name for the API token + description: Description for the API token (optional) + + Returns: + Created API token information + """ + data = {"name": name, "description": description} + url = f"/apps/{app_id}/api-tokens" + return self._send_request("POST", url, json=data) + + def delete_app_api_token(self, app_id: str, token_id: str): + """Delete an API token. + + Args: + app_id: ID of the app + token_id: ID of the token to delete + + Returns: + Deletion result + """ + url = f"/apps/{app_id}/api-tokens/{token_id}" + return self._send_request("DELETE", url) + class CompletionClient(DifyClient): def create_completion_message( @@ -151,8 +315,16 @@ class CompletionClient(DifyClient): inputs: dict, response_mode: Literal["blocking", "streaming"], user: str, - files: dict | None = None, + files: Dict[str, Any] | None = None, ): + # Validate parameters + if not isinstance(inputs, dict): + raise ValidationError("inputs must be a dictionary") + if response_mode not in ["blocking", "streaming"]: + raise ValidationError("response_mode must be 'blocking' or 'streaming'") + + self._validate_params(inputs=inputs, response_mode=response_mode, user=user) + data = { "inputs": inputs, "response_mode": response_mode, @@ -175,8 +347,18 @@ class ChatClient(DifyClient): user: str, response_mode: Literal["blocking", "streaming"] = "blocking", conversation_id: str | None = None, - files: dict | None = None, + files: Dict[str, Any] | None = None, ): + # Validate parameters + if not isinstance(inputs, dict): + raise ValidationError("inputs must be a dictionary") + if not isinstance(query, str) or not query.strip(): + raise ValidationError("query must be a non-empty string") + if response_mode not in ["blocking", "streaming"]: + raise ValidationError("response_mode must be 'blocking' or 'streaming'") + + self._validate_params(inputs=inputs, query=query, user=user, response_mode=response_mode) + data = { "inputs": inputs, "query": query, @@ -238,7 +420,7 @@ class ChatClient(DifyClient): data = {"user": user} return self._send_request("DELETE", f"/conversations/{conversation_id}", data) - def audio_to_text(self, audio_file: IO[bytes] | tuple, user: str): + def audio_to_text(self, audio_file: Union[IO[bytes], tuple], user: str): data = {"user": user} files = {"file": audio_file} return self._send_request_with_files("POST", "/audio-to-text", data, files) @@ -313,7 +495,48 @@ class ChatClient(DifyClient): """ data = {"value": value, "user": user} url = f"/conversations/{conversation_id}/variables/{variable_id}" - return self._send_request("PATCH", url, json=data) + return self._send_request("PUT", url, json=data) + + def delete_annotation_with_response(self, annotation_id: str): + """Delete an annotation with full response handling.""" + url = f"/apps/annotations/{annotation_id}" + return self._send_request("DELETE", url) + + def list_conversation_variables_with_pagination( + self, conversation_id: str, user: str, page: int = 1, limit: int = 20 + ): + """List conversation variables with pagination.""" + params = {"page": page, "limit": limit, "user": user} + url = f"/conversations/{conversation_id}/variables" + return self._send_request("GET", url, params=params) + + def update_conversation_variable_with_response(self, conversation_id: str, variable_id: str, user: str, value: Any): + """Update a conversation variable with full response handling.""" + data = {"value": value, "user": user} + url = f"/conversations/{conversation_id}/variables/{variable_id}" + return self._send_request("PUT", url, json=data) + + # Enhanced Annotation APIs + def get_annotation_reply_job_status(self, action: str, job_id: str): + """Get status of an annotation reply action job.""" + url = f"/apps/annotation-reply/{action}/status/{job_id}" + return self._send_request("GET", url) + + def list_annotations_with_pagination(self, page: int = 1, limit: int = 20, keyword: str | None = None): + """List annotations with pagination.""" + params = {"page": page, "limit": limit, "keyword": keyword} + return self._send_request("GET", "/apps/annotations", params=params) + + def create_annotation_with_response(self, question: str, answer: str): + """Create an annotation with full response handling.""" + data = {"question": question, "answer": answer} + return self._send_request("POST", "/apps/annotations", json=data) + + def update_annotation_with_response(self, annotation_id: str, question: str, answer: str): + """Update an annotation with full response handling.""" + data = {"question": question, "answer": answer} + url = f"/apps/annotations/{annotation_id}" + return self._send_request("PUT", url, json=data) class WorkflowClient(DifyClient): @@ -376,6 +599,68 @@ class WorkflowClient(DifyClient): stream=(response_mode == "streaming"), ) + # Enhanced Workflow APIs + def get_workflow_draft(self, app_id: str): + """Get workflow draft configuration. + + Args: + app_id: ID of the workflow app + + Returns: + Workflow draft configuration + """ + url = f"/apps/{app_id}/workflow/draft" + return self._send_request("GET", url) + + def update_workflow_draft(self, app_id: str, workflow_data: Dict[str, Any]): + """Update workflow draft configuration. + + Args: + app_id: ID of the workflow app + workflow_data: Workflow configuration data + + Returns: + Updated workflow draft + """ + url = f"/apps/{app_id}/workflow/draft" + return self._send_request("PUT", url, json=workflow_data) + + def publish_workflow(self, app_id: str): + """Publish workflow from draft. + + Args: + app_id: ID of the workflow app + + Returns: + Published workflow information + """ + url = f"/apps/{app_id}/workflow/publish" + return self._send_request("POST", url) + + def get_workflow_run_history( + self, + app_id: str, + page: int = 1, + limit: int = 20, + status: Literal["succeeded", "failed", "stopped"] | None = None, + ): + """Get workflow run history. + + Args: + app_id: ID of the workflow app + page: Page number (default: 1) + limit: Number of items per page (default: 20) + status: Filter by status (optional) + + Returns: + Paginated workflow run history + """ + params = {"page": page, "limit": limit} + if status: + params["status"] = status + url = f"/apps/{app_id}/workflow/runs" + return self._send_request("GET", url, params=params) + class WorkspaceClient(DifyClient): """Client for workspace-related operations.""" @@ -385,6 +670,41 @@ class WorkspaceClient(DifyClient): url = f"/workspaces/current/models/model-types/{model_type}" return self._send_request("GET", url) + def get_available_models_by_type(self, model_type: str): + """Get available models by model type (enhanced version).""" + url = f"/workspaces/current/models/model-types/{model_type}" + return self._send_request("GET", url) + + def get_model_providers(self): + """Get all model providers.""" + return self._send_request("GET", "/workspaces/current/model-providers") + + def get_model_provider_models(self, provider_name: str): + """Get models for a specific provider.""" + url = f"/workspaces/current/model-providers/{provider_name}/models" + return self._send_request("GET", url) + + def validate_model_provider_credentials(self, provider_name: str, credentials: Dict[str, Any]): + """Validate model provider credentials.""" + url = f"/workspaces/current/model-providers/{provider_name}/credentials/validate" + return self._send_request("POST", url, json=credentials) + + # File Management APIs + def get_file_info(self, file_id: str): + """Get information about a specific file.""" + url = f"/files/{file_id}/info" + return self._send_request("GET", url) + + def get_file_download_url(self, file_id: str): + """Get download URL for a file.""" + url = f"/files/{file_id}/download-url" + return self._send_request("GET", url) + + def delete_file(self, file_id: str): + """Delete a file.""" + url = f"/files/{file_id}" + return self._send_request("DELETE", url) + class KnowledgeBaseClient(DifyClient): def __init__( @@ -416,7 +736,7 @@ class KnowledgeBaseClient(DifyClient): def list_datasets(self, page: int = 1, page_size: int = 20, **kwargs): return self._send_request("GET", "/datasets", params={"page": page, "limit": page_size}, **kwargs) - def create_document_by_text(self, name, text, extra_params: dict | None = None, **kwargs): + def create_document_by_text(self, name, text, extra_params: Dict[str, Any] | None = None, **kwargs): """ Create a document by text. @@ -458,7 +778,7 @@ class KnowledgeBaseClient(DifyClient): document_id: str, name: str, text: str, - extra_params: dict | None = None, + extra_params: Dict[str, Any] | None = None, **kwargs, ): """ @@ -497,7 +817,7 @@ class KnowledgeBaseClient(DifyClient): self, file_path: str, original_document_id: str | None = None, - extra_params: dict | None = None, + extra_params: Dict[str, Any] | None = None, ): """ Create a document by file. @@ -537,7 +857,12 @@ class KnowledgeBaseClient(DifyClient): url = f"/datasets/{self._get_dataset_id()}/document/create_by_file" return self._send_request_with_files("POST", url, {"data": json.dumps(data)}, files) - def update_document_by_file(self, document_id: str, file_path: str, extra_params: dict | None = None): + def update_document_by_file( + self, + document_id: str, + file_path: str, + extra_params: Dict[str, Any] | None = None, + ): """ Update a document by file. @@ -893,3 +1218,50 @@ class KnowledgeBaseClient(DifyClient): url = f"/datasets/{ds_id}/documents/status/{action}" data = {"document_ids": document_ids} return self._send_request("PATCH", url, json=data) + + # Enhanced Dataset APIs + def create_dataset_from_template(self, template_name: str, name: str, description: str | None = None): + """Create a dataset from a predefined template. + + Args: + template_name: Name of the template to use + name: Name for the new dataset + description: Description for the dataset (optional) + + Returns: + Created dataset information + """ + data = { + "template_name": template_name, + "name": name, + "description": description, + } + return self._send_request("POST", "/datasets/from-template", json=data) + + def duplicate_dataset(self, dataset_id: str, name: str): + """Duplicate an existing dataset. + + Args: + dataset_id: ID of dataset to duplicate + name: Name for duplicated dataset + + Returns: + New dataset information + """ + data = {"name": name} + url = f"/datasets/{dataset_id}/duplicate" + return self._send_request("POST", url, json=data) + + def list_conversation_variables_with_pagination( + self, conversation_id: str, user: str, page: int = 1, limit: int = 20 + ): + """List conversation variables with pagination.""" + params = {"page": page, "limit": limit, "user": user} + url = f"/conversations/{conversation_id}/variables" + return self._send_request("GET", url, params=params) + + def update_conversation_variable_with_response(self, conversation_id: str, variable_id: str, user: str, value: Any): + """Update a conversation variable with full response handling.""" + data = {"value": value, "user": user} + url = f"/conversations/{conversation_id}/variables/{variable_id}" + return self._send_request("PUT", url, json=data) diff --git a/sdks/python-client/dify_client/exceptions.py b/sdks/python-client/dify_client/exceptions.py new file mode 100644 index 0000000000..e7ba2ff4b2 --- /dev/null +++ b/sdks/python-client/dify_client/exceptions.py @@ -0,0 +1,71 @@ +"""Custom exceptions for the Dify client.""" + +from typing import Optional, Dict, Any + + +class DifyClientError(Exception): + """Base exception for all Dify client errors.""" + + def __init__(self, message: str, status_code: int | None = None, response: Dict[str, Any] | None = None): + super().__init__(message) + self.message = message + self.status_code = status_code + self.response = response + + +class APIError(DifyClientError): + """Raised when the API returns an error response.""" + + def __init__(self, message: str, status_code: int, response: Dict[str, Any] | None = None): + super().__init__(message, status_code, response) + self.status_code = status_code + + +class AuthenticationError(DifyClientError): + """Raised when authentication fails.""" + + pass + + +class RateLimitError(DifyClientError): + """Raised when rate limit is exceeded.""" + + def __init__(self, message: str = "Rate limit exceeded", retry_after: int | None = None): + super().__init__(message) + self.retry_after = retry_after + + +class ValidationError(DifyClientError): + """Raised when request validation fails.""" + + pass + + +class NetworkError(DifyClientError): + """Raised when network-related errors occur.""" + + pass + + +class TimeoutError(DifyClientError): + """Raised when request times out.""" + + pass + + +class FileUploadError(DifyClientError): + """Raised when file upload fails.""" + + pass + + +class DatasetError(DifyClientError): + """Raised when dataset operations fail.""" + + pass + + +class WorkflowError(DifyClientError): + """Raised when workflow operations fail.""" + + pass diff --git a/sdks/python-client/dify_client/models.py b/sdks/python-client/dify_client/models.py new file mode 100644 index 0000000000..0321e9c3f4 --- /dev/null +++ b/sdks/python-client/dify_client/models.py @@ -0,0 +1,396 @@ +"""Response models for the Dify client with proper type hints.""" + +from typing import Optional, List, Dict, Any, Literal, Union +from dataclasses import dataclass, field +from datetime import datetime + + +@dataclass +class BaseResponse: + """Base response model.""" + + success: bool = True + message: str | None = None + + +@dataclass +class ErrorResponse(BaseResponse): + """Error response model.""" + + error_code: str | None = None + details: Dict[str, Any] | None = None + success: bool = False + + +@dataclass +class FileInfo: + """File information model.""" + + id: str + name: str + size: int + mime_type: str + url: str | None = None + created_at: datetime | None = None + + +@dataclass +class MessageResponse(BaseResponse): + """Message response model.""" + + id: str = "" + answer: str = "" + conversation_id: str | None = None + created_at: int | None = None + metadata: Dict[str, Any] | None = None + files: List[Dict[str, Any]] | None = None + + +@dataclass +class ConversationResponse(BaseResponse): + """Conversation response model.""" + + id: str = "" + name: str = "" + inputs: Dict[str, Any] | None = None + status: str | None = None + created_at: int | None = None + updated_at: int | None = None + + +@dataclass +class DatasetResponse(BaseResponse): + """Dataset response model.""" + + id: str = "" + name: str = "" + description: str | None = None + permission: str | None = None + indexing_technique: str | None = None + embedding_model: str | None = None + embedding_model_provider: str | None = None + retrieval_model: Dict[str, Any] | None = None + document_count: int | None = None + word_count: int | None = None + app_count: int | None = None + created_at: int | None = None + updated_at: int | None = None + + +@dataclass +class DocumentResponse(BaseResponse): + """Document response model.""" + + id: str = "" + name: str = "" + data_source_type: str | None = None + data_source_info: Dict[str, Any] | None = None + dataset_process_rule_id: str | None = None + batch: str | None = None + position: int | None = None + enabled: bool | None = None + disabled_at: float | None = None + disabled_by: str | None = None + archived: bool | None = None + archived_reason: str | None = None + archived_at: float | None = None + archived_by: str | None = None + word_count: int | None = None + hit_count: int | None = None + doc_form: str | None = None + doc_metadata: Dict[str, Any] | None = None + created_at: float | None = None + updated_at: float | None = None + indexing_status: str | None = None + completed_at: float | None = None + paused_at: float | None = None + error: str | None = None + stopped_at: float | None = None + + +@dataclass +class DocumentSegmentResponse(BaseResponse): + """Document segment response model.""" + + id: str = "" + position: int | None = None + document_id: str | None = None + content: str | None = None + answer: str | None = None + word_count: int | None = None + tokens: int | None = None + keywords: List[str] | None = None + index_node_id: str | None = None + index_node_hash: str | None = None + hit_count: int | None = None + enabled: bool | None = None + disabled_at: float | None = None + disabled_by: str | None = None + status: str | None = None + created_by: str | None = None + created_at: float | None = None + indexing_at: float | None = None + completed_at: float | None = None + error: str | None = None + stopped_at: float | None = None + + +@dataclass +class WorkflowRunResponse(BaseResponse): + """Workflow run response model.""" + + id: str = "" + workflow_id: str | None = None + status: Literal["running", "succeeded", "failed", "stopped"] | None = None + inputs: Dict[str, Any] | None = None + outputs: Dict[str, Any] | None = None + error: str | None = None + elapsed_time: float | None = None + total_tokens: int | None = None + total_steps: int | None = None + created_at: float | None = None + finished_at: float | None = None + + +@dataclass +class ApplicationParametersResponse(BaseResponse): + """Application parameters response model.""" + + opening_statement: str | None = None + suggested_questions: List[str] | None = None + speech_to_text: Dict[str, Any] | None = None + text_to_speech: Dict[str, Any] | None = None + retriever_resource: Dict[str, Any] | None = None + sensitive_word_avoidance: Dict[str, Any] | None = None + file_upload: Dict[str, Any] | None = None + system_parameters: Dict[str, Any] | None = None + user_input_form: List[Dict[str, Any]] | None = None + + +@dataclass +class AnnotationResponse(BaseResponse): + """Annotation response model.""" + + id: str = "" + question: str = "" + answer: str = "" + content: str | None = None + created_at: float | None = None + updated_at: float | None = None + created_by: str | None = None + updated_by: str | None = None + hit_count: int | None = None + + +@dataclass +class PaginatedResponse(BaseResponse): + """Paginated response model.""" + + data: List[Any] = field(default_factory=list) + has_more: bool = False + limit: int = 0 + total: int = 0 + page: int | None = None + + +@dataclass +class ConversationVariableResponse(BaseResponse): + """Conversation variable response model.""" + + conversation_id: str = "" + variables: List[Dict[str, Any]] = field(default_factory=list) + + +@dataclass +class FileUploadResponse(BaseResponse): + """File upload response model.""" + + id: str = "" + name: str = "" + size: int = 0 + mime_type: str = "" + url: str | None = None + created_at: float | None = None + + +@dataclass +class AudioResponse(BaseResponse): + """Audio generation/response model.""" + + audio: str | None = None # Base64 encoded audio data or URL + audio_url: str | None = None + duration: float | None = None + sample_rate: int | None = None + + +@dataclass +class SuggestedQuestionsResponse(BaseResponse): + """Suggested questions response model.""" + + message_id: str = "" + questions: List[str] = field(default_factory=list) + + +@dataclass +class AppInfoResponse(BaseResponse): + """App info response model.""" + + id: str = "" + name: str = "" + description: str | None = None + icon: str | None = None + icon_background: str | None = None + mode: str | None = None + tags: List[str] | None = None + enable_site: bool | None = None + enable_api: bool | None = None + api_token: str | None = None + + +@dataclass +class WorkspaceModelsResponse(BaseResponse): + """Workspace models response model.""" + + models: List[Dict[str, Any]] = field(default_factory=list) + + +@dataclass +class HitTestingResponse(BaseResponse): + """Hit testing response model.""" + + query: str = "" + records: List[Dict[str, Any]] = field(default_factory=list) + + +@dataclass +class DatasetTagsResponse(BaseResponse): + """Dataset tags response model.""" + + tags: List[Dict[str, Any]] = field(default_factory=list) + + +@dataclass +class WorkflowLogsResponse(BaseResponse): + """Workflow logs response model.""" + + logs: List[Dict[str, Any]] = field(default_factory=list) + total: int = 0 + page: int = 0 + limit: int = 0 + has_more: bool = False + + +@dataclass +class ModelProviderResponse(BaseResponse): + """Model provider response model.""" + + provider_name: str = "" + provider_type: str = "" + models: List[Dict[str, Any]] = field(default_factory=list) + is_enabled: bool = False + credentials: Dict[str, Any] | None = None + + +@dataclass +class FileInfoResponse(BaseResponse): + """File info response model.""" + + id: str = "" + name: str = "" + size: int = 0 + mime_type: str = "" + url: str | None = None + created_at: int | None = None + metadata: Dict[str, Any] | None = None + + +@dataclass +class WorkflowDraftResponse(BaseResponse): + """Workflow draft response model.""" + + id: str = "" + app_id: str = "" + draft_data: Dict[str, Any] = field(default_factory=dict) + version: int = 0 + created_at: int | None = None + updated_at: int | None = None + + +@dataclass +class ApiTokenResponse(BaseResponse): + """API token response model.""" + + id: str = "" + name: str = "" + token: str = "" + description: str | None = None + created_at: int | None = None + last_used_at: int | None = None + is_active: bool = True + + +@dataclass +class JobStatusResponse(BaseResponse): + """Job status response model.""" + + job_id: str = "" + job_status: str = "" + error_msg: str | None = None + progress: float | None = None + created_at: int | None = None + updated_at: int | None = None + + +@dataclass +class DatasetQueryResponse(BaseResponse): + """Dataset query response model.""" + + query: str = "" + records: List[Dict[str, Any]] = field(default_factory=list) + total: int = 0 + search_time: float | None = None + retrieval_model: Dict[str, Any] | None = None + + +@dataclass +class DatasetTemplateResponse(BaseResponse): + """Dataset template response model.""" + + template_name: str = "" + display_name: str = "" + description: str = "" + category: str = "" + icon: str | None = None + config_schema: Dict[str, Any] = field(default_factory=dict) + + +# Type aliases for common response types +ResponseType = Union[ + BaseResponse, + ErrorResponse, + MessageResponse, + ConversationResponse, + DatasetResponse, + DocumentResponse, + DocumentSegmentResponse, + WorkflowRunResponse, + ApplicationParametersResponse, + AnnotationResponse, + PaginatedResponse, + ConversationVariableResponse, + FileUploadResponse, + AudioResponse, + SuggestedQuestionsResponse, + AppInfoResponse, + WorkspaceModelsResponse, + HitTestingResponse, + DatasetTagsResponse, + WorkflowLogsResponse, + ModelProviderResponse, + FileInfoResponse, + WorkflowDraftResponse, + ApiTokenResponse, + JobStatusResponse, + DatasetQueryResponse, + DatasetTemplateResponse, +] diff --git a/sdks/python-client/examples/advanced_usage.py b/sdks/python-client/examples/advanced_usage.py new file mode 100644 index 0000000000..bc8720bef2 --- /dev/null +++ b/sdks/python-client/examples/advanced_usage.py @@ -0,0 +1,264 @@ +""" +Advanced usage examples for the Dify Python SDK. + +This example demonstrates: +- Error handling and retries +- Logging configuration +- Context managers +- Async usage +- File uploads +- Dataset management +""" + +import asyncio +import logging +from pathlib import Path + +from dify_client import ( + ChatClient, + CompletionClient, + AsyncChatClient, + KnowledgeBaseClient, + DifyClient, +) +from dify_client.exceptions import ( + APIError, + RateLimitError, + AuthenticationError, + DifyClientError, +) + + +def setup_logging(): + """Setup logging for the SDK.""" + logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + + +def example_chat_with_error_handling(): + """Example of chat with comprehensive error handling.""" + api_key = "your-api-key-here" + + try: + with ChatClient(api_key, enable_logging=True) as client: + # Simple chat message + response = client.create_chat_message( + inputs={}, query="Hello, how are you?", user="user-123", response_mode="blocking" + ) + + result = response.json() + print(f"Response: {result.get('answer')}") + + except AuthenticationError as e: + print(f"Authentication failed: {e}") + print("Please check your API key") + + except RateLimitError as e: + print(f"Rate limit exceeded: {e}") + if e.retry_after: + print(f"Retry after {e.retry_after} seconds") + + except APIError as e: + print(f"API error: {e.message}") + print(f"Status code: {e.status_code}") + + except DifyClientError as e: + print(f"Dify client error: {e}") + + except Exception as e: + print(f"Unexpected error: {e}") + + +def example_completion_with_files(): + """Example of completion with file upload.""" + api_key = "your-api-key-here" + + with CompletionClient(api_key) as client: + # Upload an image file first + file_path = "path/to/your/image.jpg" + + try: + with open(file_path, "rb") as f: + files = {"file": (Path(file_path).name, f, "image/jpeg")} + upload_response = client.file_upload("user-123", files) + upload_response.raise_for_status() + + file_id = upload_response.json().get("id") + print(f"File uploaded with ID: {file_id}") + + # Use the uploaded file in completion + files_list = [{"type": "image", "transfer_method": "local_file", "upload_file_id": file_id}] + + completion_response = client.create_completion_message( + inputs={"query": "Describe this image"}, response_mode="blocking", user="user-123", files=files_list + ) + + result = completion_response.json() + print(f"Completion result: {result.get('answer')}") + + except FileNotFoundError: + print(f"File not found: {file_path}") + except Exception as e: + print(f"Error during file upload/completion: {e}") + + +def example_dataset_management(): + """Example of dataset management operations.""" + api_key = "your-api-key-here" + + with KnowledgeBaseClient(api_key) as kb_client: + try: + # Create a new dataset + create_response = kb_client.create_dataset(name="My Test Dataset") + create_response.raise_for_status() + + dataset_id = create_response.json().get("id") + print(f"Created dataset with ID: {dataset_id}") + + # Create a client with the dataset ID + dataset_client = KnowledgeBaseClient(api_key, dataset_id=dataset_id) + + # Add a document by text + doc_response = dataset_client.create_document_by_text( + name="Test Document", text="This is a test document for the knowledge base." + ) + doc_response.raise_for_status() + + document_id = doc_response.json().get("document", {}).get("id") + print(f"Created document with ID: {document_id}") + + # List documents + list_response = dataset_client.list_documents() + list_response.raise_for_status() + + documents = list_response.json().get("data", []) + print(f"Dataset contains {len(documents)} documents") + + # Update dataset configuration + update_response = dataset_client.update_dataset( + name="Updated Dataset Name", description="Updated description", indexing_technique="high_quality" + ) + update_response.raise_for_status() + + print("Dataset updated successfully") + + except Exception as e: + print(f"Dataset management error: {e}") + + +async def example_async_chat(): + """Example of async chat usage.""" + api_key = "your-api-key-here" + + try: + async with AsyncChatClient(api_key) as client: + # Create chat message + response = await client.create_chat_message( + inputs={}, query="What's the weather like?", user="user-456", response_mode="blocking" + ) + + result = response.json() + print(f"Async response: {result.get('answer')}") + + # Get conversations + conversations = await client.get_conversations("user-456") + conversations.raise_for_status() + + conv_data = conversations.json() + print(f"Found {len(conv_data.get('data', []))} conversations") + + except Exception as e: + print(f"Async chat error: {e}") + + +def example_streaming_response(): + """Example of handling streaming responses.""" + api_key = "your-api-key-here" + + with ChatClient(api_key) as client: + try: + response = client.create_chat_message( + inputs={}, query="Tell me a story", user="user-789", response_mode="streaming" + ) + + print("Streaming response:") + for line in response.iter_lines(decode_unicode=True): + if line.startswith("data:"): + data = line[5:].strip() + if data: + import json + + try: + chunk = json.loads(data) + answer = chunk.get("answer", "") + if answer: + print(answer, end="", flush=True) + except json.JSONDecodeError: + continue + print() # New line after streaming + + except Exception as e: + print(f"Streaming error: {e}") + + +def example_application_info(): + """Example of getting application information.""" + api_key = "your-api-key-here" + + with DifyClient(api_key) as client: + try: + # Get app info + info_response = client.get_app_info() + info_response.raise_for_status() + + app_info = info_response.json() + print(f"App name: {app_info.get('name')}") + print(f"App mode: {app_info.get('mode')}") + print(f"App tags: {app_info.get('tags', [])}") + + # Get app parameters + params_response = client.get_application_parameters("user-123") + params_response.raise_for_status() + + params = params_response.json() + print(f"Opening statement: {params.get('opening_statement')}") + print(f"Suggested questions: {params.get('suggested_questions', [])}") + + except Exception as e: + print(f"App info error: {e}") + + +def main(): + """Run all examples.""" + setup_logging() + + print("=== Dify Python SDK Advanced Usage Examples ===\n") + + print("1. Chat with Error Handling:") + example_chat_with_error_handling() + print() + + print("2. Completion with Files:") + example_completion_with_files() + print() + + print("3. Dataset Management:") + example_dataset_management() + print() + + print("4. Async Chat:") + asyncio.run(example_async_chat()) + print() + + print("5. Streaming Response:") + example_streaming_response() + print() + + print("6. Application Info:") + example_application_info() + print() + + print("All examples completed!") + + +if __name__ == "__main__": + main() diff --git a/sdks/python-client/pyproject.toml b/sdks/python-client/pyproject.toml index db02cbd6e3..a25cb9150c 100644 --- a/sdks/python-client/pyproject.toml +++ b/sdks/python-client/pyproject.toml @@ -5,7 +5,7 @@ description = "A package for interacting with the Dify Service-API" readme = "README.md" requires-python = ">=3.10" dependencies = [ - "httpx>=0.27.0", + "httpx[http2]>=0.27.0", "aiofiles>=23.0.0", ] authors = [ diff --git a/sdks/python-client/tests/test_client.py b/sdks/python-client/tests/test_client.py index fce1b11eba..b0d2f8ba23 100644 --- a/sdks/python-client/tests/test_client.py +++ b/sdks/python-client/tests/test_client.py @@ -1,6 +1,7 @@ import os import time import unittest +from unittest.mock import Mock, patch, mock_open from dify_client.client import ( ChatClient, @@ -17,38 +18,46 @@ FILE_PATH_BASE = os.path.dirname(__file__) class TestKnowledgeBaseClient(unittest.TestCase): def setUp(self): - self.knowledge_base_client = KnowledgeBaseClient(API_KEY, base_url=API_BASE_URL) + self.api_key = "test-api-key" + self.base_url = "https://api.dify.ai/v1" + self.knowledge_base_client = KnowledgeBaseClient(self.api_key, base_url=self.base_url) self.README_FILE_PATH = os.path.abspath(os.path.join(FILE_PATH_BASE, "../README.md")) - self.dataset_id = None - self.document_id = None - self.segment_id = None - self.batch_id = None + self.dataset_id = "test-dataset-id" + self.document_id = "test-document-id" + self.segment_id = "test-segment-id" + self.batch_id = "test-batch-id" def _get_dataset_kb_client(self): - self.assertIsNotNone(self.dataset_id) - return KnowledgeBaseClient(API_KEY, base_url=API_BASE_URL, dataset_id=self.dataset_id) + return KnowledgeBaseClient(self.api_key, base_url=self.base_url, dataset_id=self.dataset_id) + + @patch("dify_client.client.httpx.Client") + def test_001_create_dataset(self, mock_httpx_client): + # Mock the HTTP response + mock_response = Mock() + mock_response.json.return_value = {"id": self.dataset_id, "name": "test_dataset"} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + # Re-create client with mocked httpx + self.knowledge_base_client = KnowledgeBaseClient(self.api_key, base_url=self.base_url) - def test_001_create_dataset(self): response = self.knowledge_base_client.create_dataset(name="test_dataset") data = response.json() self.assertIn("id", data) - self.dataset_id = data["id"] self.assertEqual("test_dataset", data["name"]) # the following tests require to be executed in order because they use # the dataset/document/segment ids from the previous test self._test_002_list_datasets() self._test_003_create_document_by_text() - time.sleep(1) self._test_004_update_document_by_text() - # self._test_005_batch_indexing_status() - time.sleep(1) self._test_006_update_document_by_file() - time.sleep(1) self._test_007_list_documents() self._test_008_delete_document() self._test_009_create_document_by_file() - time.sleep(1) self._test_010_add_segments() self._test_011_query_segments() self._test_012_update_document_segment() @@ -56,6 +65,12 @@ class TestKnowledgeBaseClient(unittest.TestCase): self._test_014_delete_dataset() def _test_002_list_datasets(self): + # Mock the response - using the already mocked client from test_001_create_dataset + mock_response = Mock() + mock_response.json.return_value = {"data": [], "total": 0} + mock_response.status_code = 200 + self.knowledge_base_client._client.request.return_value = mock_response + response = self.knowledge_base_client.list_datasets() data = response.json() self.assertIn("data", data) @@ -63,45 +78,62 @@ class TestKnowledgeBaseClient(unittest.TestCase): def _test_003_create_document_by_text(self): client = self._get_dataset_kb_client() + # Mock the response + mock_response = Mock() + mock_response.json.return_value = {"document": {"id": self.document_id}, "batch": self.batch_id} + mock_response.status_code = 200 + client._client.request.return_value = mock_response + response = client.create_document_by_text("test_document", "test_text") data = response.json() self.assertIn("document", data) - self.document_id = data["document"]["id"] - self.batch_id = data["batch"] def _test_004_update_document_by_text(self): client = self._get_dataset_kb_client() - self.assertIsNotNone(self.document_id) + # Mock the response + mock_response = Mock() + mock_response.json.return_value = {"document": {"id": self.document_id}, "batch": self.batch_id} + mock_response.status_code = 200 + client._client.request.return_value = mock_response + response = client.update_document_by_text(self.document_id, "test_document_updated", "test_text_updated") data = response.json() self.assertIn("document", data) self.assertIn("batch", data) - self.batch_id = data["batch"] - - def _test_005_batch_indexing_status(self): - client = self._get_dataset_kb_client() - response = client.batch_indexing_status(self.batch_id) - response.json() - self.assertEqual(response.status_code, 200) def _test_006_update_document_by_file(self): client = self._get_dataset_kb_client() - self.assertIsNotNone(self.document_id) + # Mock the response + mock_response = Mock() + mock_response.json.return_value = {"document": {"id": self.document_id}, "batch": self.batch_id} + mock_response.status_code = 200 + client._client.request.return_value = mock_response + response = client.update_document_by_file(self.document_id, self.README_FILE_PATH) data = response.json() self.assertIn("document", data) self.assertIn("batch", data) - self.batch_id = data["batch"] def _test_007_list_documents(self): client = self._get_dataset_kb_client() + # Mock the response + mock_response = Mock() + mock_response.json.return_value = {"data": []} + mock_response.status_code = 200 + client._client.request.return_value = mock_response + response = client.list_documents() data = response.json() self.assertIn("data", data) def _test_008_delete_document(self): client = self._get_dataset_kb_client() - self.assertIsNotNone(self.document_id) + # Mock the response + mock_response = Mock() + mock_response.json.return_value = {"result": "success"} + mock_response.status_code = 200 + client._client.request.return_value = mock_response + response = client.delete_document(self.document_id) data = response.json() self.assertIn("result", data) @@ -109,23 +141,37 @@ class TestKnowledgeBaseClient(unittest.TestCase): def _test_009_create_document_by_file(self): client = self._get_dataset_kb_client() + # Mock the response + mock_response = Mock() + mock_response.json.return_value = {"document": {"id": self.document_id}, "batch": self.batch_id} + mock_response.status_code = 200 + client._client.request.return_value = mock_response + response = client.create_document_by_file(self.README_FILE_PATH) data = response.json() self.assertIn("document", data) - self.document_id = data["document"]["id"] - self.batch_id = data["batch"] def _test_010_add_segments(self): client = self._get_dataset_kb_client() + # Mock the response + mock_response = Mock() + mock_response.json.return_value = {"data": [{"id": self.segment_id, "content": "test text segment 1"}]} + mock_response.status_code = 200 + client._client.request.return_value = mock_response + response = client.add_segments(self.document_id, [{"content": "test text segment 1"}]) data = response.json() self.assertIn("data", data) self.assertGreater(len(data["data"]), 0) - segment = data["data"][0] - self.segment_id = segment["id"] def _test_011_query_segments(self): client = self._get_dataset_kb_client() + # Mock the response + mock_response = Mock() + mock_response.json.return_value = {"data": [{"id": self.segment_id, "content": "test text segment 1"}]} + mock_response.status_code = 200 + client._client.request.return_value = mock_response + response = client.query_segments(self.document_id) data = response.json() self.assertIn("data", data) @@ -133,7 +179,12 @@ class TestKnowledgeBaseClient(unittest.TestCase): def _test_012_update_document_segment(self): client = self._get_dataset_kb_client() - self.assertIsNotNone(self.segment_id) + # Mock the response + mock_response = Mock() + mock_response.json.return_value = {"data": {"id": self.segment_id, "content": "test text segment 1 updated"}} + mock_response.status_code = 200 + client._client.request.return_value = mock_response + response = client.update_document_segment( self.document_id, self.segment_id, @@ -141,13 +192,16 @@ class TestKnowledgeBaseClient(unittest.TestCase): ) data = response.json() self.assertIn("data", data) - self.assertGreater(len(data["data"]), 0) - segment = data["data"] - self.assertEqual("test text segment 1 updated", segment["content"]) + self.assertEqual("test text segment 1 updated", data["data"]["content"]) def _test_013_delete_document_segment(self): client = self._get_dataset_kb_client() - self.assertIsNotNone(self.segment_id) + # Mock the response + mock_response = Mock() + mock_response.json.return_value = {"result": "success"} + mock_response.status_code = 200 + client._client.request.return_value = mock_response + response = client.delete_document_segment(self.document_id, self.segment_id) data = response.json() self.assertIn("result", data) @@ -155,94 +209,279 @@ class TestKnowledgeBaseClient(unittest.TestCase): def _test_014_delete_dataset(self): client = self._get_dataset_kb_client() + # Mock the response + mock_response = Mock() + mock_response.status_code = 204 + client._client.request.return_value = mock_response + response = client.delete_dataset() self.assertEqual(204, response.status_code) class TestChatClient(unittest.TestCase): - def setUp(self): - self.chat_client = ChatClient(API_KEY) + @patch("dify_client.client.httpx.Client") + def setUp(self, mock_httpx_client): + self.api_key = "test-api-key" + self.chat_client = ChatClient(self.api_key) - def test_create_chat_message(self): - response = self.chat_client.create_chat_message({}, "Hello, World!", "test_user") + # Set up default mock response for the client + mock_response = Mock() + mock_response.text = '{"answer": "Hello! This is a test response."}' + mock_response.json.return_value = {"answer": "Hello! This is a test response."} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + @patch("dify_client.client.httpx.Client") + def test_create_chat_message(self, mock_httpx_client): + # Mock the HTTP response + mock_response = Mock() + mock_response.text = '{"answer": "Hello! This is a test response."}' + mock_response.json.return_value = {"answer": "Hello! This is a test response."} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + # Create client with mocked httpx + chat_client = ChatClient(self.api_key) + response = chat_client.create_chat_message({}, "Hello, World!", "test_user") self.assertIn("answer", response.text) - def test_create_chat_message_with_vision_model_by_remote_url(self): - files = [{"type": "image", "transfer_method": "remote_url", "url": "your_image_url"}] - response = self.chat_client.create_chat_message({}, "Describe the picture.", "test_user", files=files) + @patch("dify_client.client.httpx.Client") + def test_create_chat_message_with_vision_model_by_remote_url(self, mock_httpx_client): + # Mock the HTTP response + mock_response = Mock() + mock_response.text = '{"answer": "I can see this is a test image description."}' + mock_response.json.return_value = {"answer": "I can see this is a test image description."} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + # Create client with mocked httpx + chat_client = ChatClient(self.api_key) + files = [{"type": "image", "transfer_method": "remote_url", "url": "https://example.com/test-image.jpg"}] + response = chat_client.create_chat_message({}, "Describe the picture.", "test_user", files=files) self.assertIn("answer", response.text) - def test_create_chat_message_with_vision_model_by_local_file(self): + @patch("dify_client.client.httpx.Client") + def test_create_chat_message_with_vision_model_by_local_file(self, mock_httpx_client): + # Mock the HTTP response + mock_response = Mock() + mock_response.text = '{"answer": "I can see this is a test uploaded image."}' + mock_response.json.return_value = {"answer": "I can see this is a test uploaded image."} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + # Create client with mocked httpx + chat_client = ChatClient(self.api_key) files = [ { "type": "image", "transfer_method": "local_file", - "upload_file_id": "your_file_id", + "upload_file_id": "test-file-id", } ] - response = self.chat_client.create_chat_message({}, "Describe the picture.", "test_user", files=files) + response = chat_client.create_chat_message({}, "Describe the picture.", "test_user", files=files) self.assertIn("answer", response.text) - def test_get_conversation_messages(self): - response = self.chat_client.get_conversation_messages("test_user", "your_conversation_id") + @patch("dify_client.client.httpx.Client") + def test_get_conversation_messages(self, mock_httpx_client): + # Mock the HTTP response + mock_response = Mock() + mock_response.text = '{"answer": "Here are the conversation messages."}' + mock_response.json.return_value = {"answer": "Here are the conversation messages."} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + # Create client with mocked httpx + chat_client = ChatClient(self.api_key) + response = chat_client.get_conversation_messages("test_user", "test-conversation-id") self.assertIn("answer", response.text) - def test_get_conversations(self): - response = self.chat_client.get_conversations("test_user") + @patch("dify_client.client.httpx.Client") + def test_get_conversations(self, mock_httpx_client): + # Mock the HTTP response + mock_response = Mock() + mock_response.text = '{"data": [{"id": "conv1", "name": "Test Conversation"}]}' + mock_response.json.return_value = {"data": [{"id": "conv1", "name": "Test Conversation"}]} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + # Create client with mocked httpx + chat_client = ChatClient(self.api_key) + response = chat_client.get_conversations("test_user") self.assertIn("data", response.text) class TestCompletionClient(unittest.TestCase): - def setUp(self): - self.completion_client = CompletionClient(API_KEY) + @patch("dify_client.client.httpx.Client") + def setUp(self, mock_httpx_client): + self.api_key = "test-api-key" + self.completion_client = CompletionClient(self.api_key) - def test_create_completion_message(self): - response = self.completion_client.create_completion_message( + # Set up default mock response for the client + mock_response = Mock() + mock_response.text = '{"answer": "This is a test completion response."}' + mock_response.json.return_value = {"answer": "This is a test completion response."} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + @patch("dify_client.client.httpx.Client") + def test_create_completion_message(self, mock_httpx_client): + # Mock the HTTP response + mock_response = Mock() + mock_response.text = '{"answer": "The weather today is sunny with a temperature of 75°F."}' + mock_response.json.return_value = {"answer": "The weather today is sunny with a temperature of 75°F."} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + # Create client with mocked httpx + completion_client = CompletionClient(self.api_key) + response = completion_client.create_completion_message( {"query": "What's the weather like today?"}, "blocking", "test_user" ) self.assertIn("answer", response.text) - def test_create_completion_message_with_vision_model_by_remote_url(self): - files = [{"type": "image", "transfer_method": "remote_url", "url": "your_image_url"}] - response = self.completion_client.create_completion_message( + @patch("dify_client.client.httpx.Client") + def test_create_completion_message_with_vision_model_by_remote_url(self, mock_httpx_client): + # Mock the HTTP response + mock_response = Mock() + mock_response.text = '{"answer": "This is a test image description from completion API."}' + mock_response.json.return_value = {"answer": "This is a test image description from completion API."} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + # Create client with mocked httpx + completion_client = CompletionClient(self.api_key) + files = [{"type": "image", "transfer_method": "remote_url", "url": "https://example.com/test-image.jpg"}] + response = completion_client.create_completion_message( {"query": "Describe the picture."}, "blocking", "test_user", files ) self.assertIn("answer", response.text) - def test_create_completion_message_with_vision_model_by_local_file(self): + @patch("dify_client.client.httpx.Client") + def test_create_completion_message_with_vision_model_by_local_file(self, mock_httpx_client): + # Mock the HTTP response + mock_response = Mock() + mock_response.text = '{"answer": "This is a test uploaded image description from completion API."}' + mock_response.json.return_value = {"answer": "This is a test uploaded image description from completion API."} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + # Create client with mocked httpx + completion_client = CompletionClient(self.api_key) files = [ { "type": "image", "transfer_method": "local_file", - "upload_file_id": "your_file_id", + "upload_file_id": "test-file-id", } ] - response = self.completion_client.create_completion_message( + response = completion_client.create_completion_message( {"query": "Describe the picture."}, "blocking", "test_user", files ) self.assertIn("answer", response.text) class TestDifyClient(unittest.TestCase): - def setUp(self): - self.dify_client = DifyClient(API_KEY) + @patch("dify_client.client.httpx.Client") + def setUp(self, mock_httpx_client): + self.api_key = "test-api-key" + self.dify_client = DifyClient(self.api_key) - def test_message_feedback(self): - response = self.dify_client.message_feedback("your_message_id", "like", "test_user") + # Set up default mock response for the client + mock_response = Mock() + mock_response.text = '{"result": "success"}' + mock_response.json.return_value = {"result": "success"} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + @patch("dify_client.client.httpx.Client") + def test_message_feedback(self, mock_httpx_client): + # Mock the HTTP response + mock_response = Mock() + mock_response.text = '{"success": true}' + mock_response.json.return_value = {"success": True} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + # Create client with mocked httpx + dify_client = DifyClient(self.api_key) + response = dify_client.message_feedback("test-message-id", "like", "test_user") self.assertIn("success", response.text) - def test_get_application_parameters(self): - response = self.dify_client.get_application_parameters("test_user") + @patch("dify_client.client.httpx.Client") + def test_get_application_parameters(self, mock_httpx_client): + # Mock the HTTP response + mock_response = Mock() + mock_response.text = '{"user_input_form": [{"field": "text", "label": "Input"}]}' + mock_response.json.return_value = {"user_input_form": [{"field": "text", "label": "Input"}]} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + # Create client with mocked httpx + dify_client = DifyClient(self.api_key) + response = dify_client.get_application_parameters("test_user") self.assertIn("user_input_form", response.text) - def test_file_upload(self): - file_path = "your_image_file_path" + @patch("dify_client.client.httpx.Client") + @patch("builtins.open", new_callable=mock_open, read_data=b"fake image data") + def test_file_upload(self, mock_file_open, mock_httpx_client): + # Mock the HTTP response + mock_response = Mock() + mock_response.text = '{"name": "panda.jpeg", "id": "test-file-id"}' + mock_response.json.return_value = {"name": "panda.jpeg", "id": "test-file-id"} + mock_response.status_code = 200 + + mock_client_instance = Mock() + mock_client_instance.request.return_value = mock_response + mock_httpx_client.return_value = mock_client_instance + + # Create client with mocked httpx + dify_client = DifyClient(self.api_key) + file_path = "/path/to/test/panda.jpeg" file_name = "panda.jpeg" mime_type = "image/jpeg" with open(file_path, "rb") as file: files = {"file": (file_name, file, mime_type)} - response = self.dify_client.file_upload("test_user", files) + response = dify_client.file_upload("test_user", files) self.assertIn("name", response.text) diff --git a/sdks/python-client/tests/test_exceptions.py b/sdks/python-client/tests/test_exceptions.py new file mode 100644 index 0000000000..eb44895749 --- /dev/null +++ b/sdks/python-client/tests/test_exceptions.py @@ -0,0 +1,79 @@ +"""Tests for custom exceptions.""" + +import unittest +from dify_client.exceptions import ( + DifyClientError, + APIError, + AuthenticationError, + RateLimitError, + ValidationError, + NetworkError, + TimeoutError, + FileUploadError, + DatasetError, + WorkflowError, +) + + +class TestExceptions(unittest.TestCase): + """Test custom exception classes.""" + + def test_base_exception(self): + """Test base DifyClientError.""" + error = DifyClientError("Test message", 500, {"error": "details"}) + self.assertEqual(str(error), "Test message") + self.assertEqual(error.status_code, 500) + self.assertEqual(error.response, {"error": "details"}) + + def test_api_error(self): + """Test APIError.""" + error = APIError("API failed", 400) + self.assertEqual(error.status_code, 400) + self.assertEqual(error.message, "API failed") + + def test_authentication_error(self): + """Test AuthenticationError.""" + error = AuthenticationError("Invalid API key") + self.assertEqual(str(error), "Invalid API key") + + def test_rate_limit_error(self): + """Test RateLimitError.""" + error = RateLimitError("Rate limited", retry_after=60) + self.assertEqual(error.retry_after, 60) + + error_default = RateLimitError() + self.assertEqual(error_default.retry_after, None) + + def test_validation_error(self): + """Test ValidationError.""" + error = ValidationError("Invalid parameter") + self.assertEqual(str(error), "Invalid parameter") + + def test_network_error(self): + """Test NetworkError.""" + error = NetworkError("Connection failed") + self.assertEqual(str(error), "Connection failed") + + def test_timeout_error(self): + """Test TimeoutError.""" + error = TimeoutError("Request timed out") + self.assertEqual(str(error), "Request timed out") + + def test_file_upload_error(self): + """Test FileUploadError.""" + error = FileUploadError("Upload failed") + self.assertEqual(str(error), "Upload failed") + + def test_dataset_error(self): + """Test DatasetError.""" + error = DatasetError("Dataset operation failed") + self.assertEqual(str(error), "Dataset operation failed") + + def test_workflow_error(self): + """Test WorkflowError.""" + error = WorkflowError("Workflow failed") + self.assertEqual(str(error), "Workflow failed") + + +if __name__ == "__main__": + unittest.main() diff --git a/sdks/python-client/tests/test_httpx_migration.py b/sdks/python-client/tests/test_httpx_migration.py index b8e434d7ec..cf26de6eba 100644 --- a/sdks/python-client/tests/test_httpx_migration.py +++ b/sdks/python-client/tests/test_httpx_migration.py @@ -152,6 +152,7 @@ class TestHttpxMigrationMocked(unittest.TestCase): """Test that json parameter is passed correctly.""" mock_response = Mock() mock_response.json.return_value = {"result": "success"} + mock_response.status_code = 200 # Add status_code attribute mock_client_instance = Mock() mock_client_instance.request.return_value = mock_response @@ -173,6 +174,7 @@ class TestHttpxMigrationMocked(unittest.TestCase): """Test that params parameter is passed correctly.""" mock_response = Mock() mock_response.json.return_value = {"result": "success"} + mock_response.status_code = 200 # Add status_code attribute mock_client_instance = Mock() mock_client_instance.request.return_value = mock_response diff --git a/sdks/python-client/tests/test_integration.py b/sdks/python-client/tests/test_integration.py new file mode 100644 index 0000000000..6f38c5de56 --- /dev/null +++ b/sdks/python-client/tests/test_integration.py @@ -0,0 +1,539 @@ +"""Integration tests with proper mocking.""" + +import unittest +from unittest.mock import Mock, patch, MagicMock +import json +import httpx +from dify_client import ( + DifyClient, + ChatClient, + CompletionClient, + WorkflowClient, + KnowledgeBaseClient, + WorkspaceClient, +) +from dify_client.exceptions import ( + APIError, + AuthenticationError, + RateLimitError, + ValidationError, +) + + +class TestDifyClientIntegration(unittest.TestCase): + """Integration tests for DifyClient with mocked HTTP responses.""" + + def setUp(self): + self.api_key = "test_api_key" + self.base_url = "https://api.dify.ai/v1" + self.client = DifyClient(api_key=self.api_key, base_url=self.base_url, enable_logging=False) + + @patch("httpx.Client.request") + def test_get_app_info_integration(self, mock_request): + """Test get_app_info integration.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "app_123", + "name": "Test App", + "description": "A test application", + "mode": "chat", + } + mock_request.return_value = mock_response + + response = self.client.get_app_info() + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(data["id"], "app_123") + self.assertEqual(data["name"], "Test App") + mock_request.assert_called_once_with( + "GET", + "/info", + json=None, + params=None, + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + ) + + @patch("httpx.Client.request") + def test_get_application_parameters_integration(self, mock_request): + """Test get_application_parameters integration.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "opening_statement": "Hello! How can I help you?", + "suggested_questions": ["What is AI?", "How does this work?"], + "speech_to_text": {"enabled": True}, + "text_to_speech": {"enabled": False}, + } + mock_request.return_value = mock_response + + response = self.client.get_application_parameters("user_123") + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(data["opening_statement"], "Hello! How can I help you?") + self.assertEqual(len(data["suggested_questions"]), 2) + mock_request.assert_called_once_with( + "GET", + "/parameters", + json=None, + params={"user": "user_123"}, + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + ) + + @patch("httpx.Client.request") + def test_file_upload_integration(self, mock_request): + """Test file_upload integration.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "file_123", + "name": "test.txt", + "size": 1024, + "mime_type": "text/plain", + } + mock_request.return_value = mock_response + + files = {"file": ("test.txt", "test content", "text/plain")} + response = self.client.file_upload("user_123", files) + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(data["id"], "file_123") + self.assertEqual(data["name"], "test.txt") + + @patch("httpx.Client.request") + def test_message_feedback_integration(self, mock_request): + """Test message_feedback integration.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"success": True} + mock_request.return_value = mock_response + + response = self.client.message_feedback("msg_123", "like", "user_123") + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertTrue(data["success"]) + mock_request.assert_called_once_with( + "POST", + "/messages/msg_123/feedbacks", + json={"rating": "like", "user": "user_123"}, + params=None, + headers={ + "Authorization": "Bearer test_api_key", + "Content-Type": "application/json", + }, + ) + + +class TestChatClientIntegration(unittest.TestCase): + """Integration tests for ChatClient.""" + + def setUp(self): + self.client = ChatClient("test_api_key", enable_logging=False) + + @patch("httpx.Client.request") + def test_create_chat_message_blocking(self, mock_request): + """Test create_chat_message with blocking response.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "msg_123", + "answer": "Hello! How can I help you today?", + "conversation_id": "conv_123", + "created_at": 1234567890, + } + mock_request.return_value = mock_response + + response = self.client.create_chat_message( + inputs={"query": "Hello"}, + query="Hello, AI!", + user="user_123", + response_mode="blocking", + ) + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(data["answer"], "Hello! How can I help you today?") + self.assertEqual(data["conversation_id"], "conv_123") + + @patch("httpx.Client.request") + def test_create_chat_message_streaming(self, mock_request): + """Test create_chat_message with streaming response.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.iter_lines.return_value = [ + b'data: {"answer": "Hello"}', + b'data: {"answer": " world"}', + b'data: {"answer": "!"}', + ] + mock_request.return_value = mock_response + + response = self.client.create_chat_message(inputs={}, query="Hello", user="user_123", response_mode="streaming") + + self.assertEqual(response.status_code, 200) + lines = list(response.iter_lines()) + self.assertEqual(len(lines), 3) + + @patch("httpx.Client.request") + def test_get_conversations_integration(self, mock_request): + """Test get_conversations integration.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "data": [ + {"id": "conv_1", "name": "Conversation 1"}, + {"id": "conv_2", "name": "Conversation 2"}, + ], + "has_more": False, + "limit": 20, + } + mock_request.return_value = mock_response + + response = self.client.get_conversations("user_123", limit=20) + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(len(data["data"]), 2) + self.assertEqual(data["data"][0]["name"], "Conversation 1") + + @patch("httpx.Client.request") + def test_get_conversation_messages_integration(self, mock_request): + """Test get_conversation_messages integration.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "data": [ + {"id": "msg_1", "role": "user", "content": "Hello"}, + {"id": "msg_2", "role": "assistant", "content": "Hi there!"}, + ] + } + mock_request.return_value = mock_response + + response = self.client.get_conversation_messages("user_123", conversation_id="conv_123") + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(len(data["data"]), 2) + self.assertEqual(data["data"][0]["role"], "user") + + +class TestCompletionClientIntegration(unittest.TestCase): + """Integration tests for CompletionClient.""" + + def setUp(self): + self.client = CompletionClient("test_api_key", enable_logging=False) + + @patch("httpx.Client.request") + def test_create_completion_message_blocking(self, mock_request): + """Test create_completion_message with blocking response.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "comp_123", + "answer": "This is a completion response.", + "created_at": 1234567890, + } + mock_request.return_value = mock_response + + response = self.client.create_completion_message( + inputs={"prompt": "Complete this sentence"}, + response_mode="blocking", + user="user_123", + ) + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(data["answer"], "This is a completion response.") + + @patch("httpx.Client.request") + def test_create_completion_message_with_files(self, mock_request): + """Test create_completion_message with files.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "comp_124", + "answer": "I can see the image shows...", + "files": [{"id": "file_1", "type": "image"}], + } + mock_request.return_value = mock_response + + files = { + "file": { + "type": "image", + "transfer_method": "remote_url", + "url": "https://example.com/image.jpg", + } + } + response = self.client.create_completion_message( + inputs={"prompt": "Describe this image"}, + response_mode="blocking", + user="user_123", + files=files, + ) + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertIn("image", data["answer"]) + self.assertEqual(len(data["files"]), 1) + + +class TestWorkflowClientIntegration(unittest.TestCase): + """Integration tests for WorkflowClient.""" + + def setUp(self): + self.client = WorkflowClient("test_api_key", enable_logging=False) + + @patch("httpx.Client.request") + def test_run_workflow_blocking(self, mock_request): + """Test run workflow with blocking response.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "run_123", + "workflow_id": "workflow_123", + "status": "succeeded", + "inputs": {"query": "Test input"}, + "outputs": {"result": "Test output"}, + "elapsed_time": 2.5, + } + mock_request.return_value = mock_response + + response = self.client.run(inputs={"query": "Test input"}, response_mode="blocking", user="user_123") + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(data["status"], "succeeded") + self.assertEqual(data["outputs"]["result"], "Test output") + + @patch("httpx.Client.request") + def test_get_workflow_logs(self, mock_request): + """Test get_workflow_logs integration.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "logs": [ + {"id": "log_1", "status": "succeeded", "created_at": 1234567890}, + {"id": "log_2", "status": "failed", "created_at": 1234567891}, + ], + "total": 2, + "page": 1, + "limit": 20, + } + mock_request.return_value = mock_response + + response = self.client.get_workflow_logs(page=1, limit=20) + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(len(data["logs"]), 2) + self.assertEqual(data["logs"][0]["status"], "succeeded") + + +class TestKnowledgeBaseClientIntegration(unittest.TestCase): + """Integration tests for KnowledgeBaseClient.""" + + def setUp(self): + self.client = KnowledgeBaseClient("test_api_key") + + @patch("httpx.Client.request") + def test_create_dataset(self, mock_request): + """Test create_dataset integration.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "dataset_123", + "name": "Test Dataset", + "description": "A test dataset", + "created_at": 1234567890, + } + mock_request.return_value = mock_response + + response = self.client.create_dataset(name="Test Dataset") + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(data["name"], "Test Dataset") + self.assertEqual(data["id"], "dataset_123") + + @patch("httpx.Client.request") + def test_list_datasets(self, mock_request): + """Test list_datasets integration.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "data": [ + {"id": "dataset_1", "name": "Dataset 1"}, + {"id": "dataset_2", "name": "Dataset 2"}, + ], + "has_more": False, + "limit": 20, + } + mock_request.return_value = mock_response + + response = self.client.list_datasets(page=1, page_size=20) + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(len(data["data"]), 2) + + @patch("httpx.Client.request") + def test_create_document_by_text(self, mock_request): + """Test create_document_by_text integration.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "document": { + "id": "doc_123", + "name": "Test Document", + "word_count": 100, + "status": "indexing", + } + } + mock_request.return_value = mock_response + + # Mock dataset_id + self.client.dataset_id = "dataset_123" + + response = self.client.create_document_by_text(name="Test Document", text="This is test document content.") + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(data["document"]["name"], "Test Document") + self.assertEqual(data["document"]["word_count"], 100) + + +class TestWorkspaceClientIntegration(unittest.TestCase): + """Integration tests for WorkspaceClient.""" + + def setUp(self): + self.client = WorkspaceClient("test_api_key", enable_logging=False) + + @patch("httpx.Client.request") + def test_get_available_models(self, mock_request): + """Test get_available_models integration.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "models": [ + {"id": "gpt-4", "name": "GPT-4", "provider": "openai"}, + {"id": "claude-3", "name": "Claude 3", "provider": "anthropic"}, + ] + } + mock_request.return_value = mock_response + + response = self.client.get_available_models("llm") + data = response.json() + + self.assertEqual(response.status_code, 200) + self.assertEqual(len(data["models"]), 2) + self.assertEqual(data["models"][0]["id"], "gpt-4") + + +class TestErrorScenariosIntegration(unittest.TestCase): + """Integration tests for error scenarios.""" + + def setUp(self): + self.client = DifyClient("test_api_key", enable_logging=False) + + @patch("httpx.Client.request") + def test_authentication_error_integration(self, mock_request): + """Test authentication error in integration.""" + mock_response = Mock() + mock_response.status_code = 401 + mock_response.json.return_value = {"message": "Invalid API key"} + mock_request.return_value = mock_response + + with self.assertRaises(AuthenticationError) as context: + self.client.get_app_info() + + self.assertEqual(str(context.exception), "Invalid API key") + self.assertEqual(context.exception.status_code, 401) + + @patch("httpx.Client.request") + def test_rate_limit_error_integration(self, mock_request): + """Test rate limit error in integration.""" + mock_response = Mock() + mock_response.status_code = 429 + mock_response.json.return_value = {"message": "Rate limit exceeded"} + mock_response.headers = {"Retry-After": "60"} + mock_request.return_value = mock_response + + with self.assertRaises(RateLimitError) as context: + self.client.get_app_info() + + self.assertEqual(str(context.exception), "Rate limit exceeded") + self.assertEqual(context.exception.retry_after, "60") + + @patch("httpx.Client.request") + def test_server_error_with_retry_integration(self, mock_request): + """Test server error with retry in integration.""" + # API errors don't retry by design - only network/timeout errors retry + mock_response_500 = Mock() + mock_response_500.status_code = 500 + mock_response_500.json.return_value = {"message": "Internal server error"} + + mock_request.return_value = mock_response_500 + + with patch("time.sleep"): # Skip actual sleep + with self.assertRaises(APIError) as context: + self.client.get_app_info() + + self.assertEqual(str(context.exception), "Internal server error") + self.assertEqual(mock_request.call_count, 1) + + @patch("httpx.Client.request") + def test_validation_error_integration(self, mock_request): + """Test validation error in integration.""" + mock_response = Mock() + mock_response.status_code = 422 + mock_response.json.return_value = { + "message": "Validation failed", + "details": {"field": "query", "error": "required"}, + } + mock_request.return_value = mock_response + + with self.assertRaises(ValidationError) as context: + self.client.get_app_info() + + self.assertEqual(str(context.exception), "Validation failed") + self.assertEqual(context.exception.status_code, 422) + + +class TestContextManagerIntegration(unittest.TestCase): + """Integration tests for context manager usage.""" + + @patch("httpx.Client.close") + @patch("httpx.Client.request") + def test_context_manager_usage(self, mock_request, mock_close): + """Test context manager properly closes connections.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"id": "app_123", "name": "Test App"} + mock_request.return_value = mock_response + + with DifyClient("test_api_key") as client: + response = client.get_app_info() + self.assertEqual(response.status_code, 200) + + # Verify close was called + mock_close.assert_called_once() + + @patch("httpx.Client.close") + def test_manual_close(self, mock_close): + """Test manual close method.""" + client = DifyClient("test_api_key") + client.close() + mock_close.assert_called_once() + + +if __name__ == "__main__": + unittest.main() diff --git a/sdks/python-client/tests/test_models.py b/sdks/python-client/tests/test_models.py new file mode 100644 index 0000000000..db9d92ad5b --- /dev/null +++ b/sdks/python-client/tests/test_models.py @@ -0,0 +1,640 @@ +"""Unit tests for response models.""" + +import unittest +import json +from datetime import datetime +from dify_client.models import ( + BaseResponse, + ErrorResponse, + FileInfo, + MessageResponse, + ConversationResponse, + DatasetResponse, + DocumentResponse, + DocumentSegmentResponse, + WorkflowRunResponse, + ApplicationParametersResponse, + AnnotationResponse, + PaginatedResponse, + ConversationVariableResponse, + FileUploadResponse, + AudioResponse, + SuggestedQuestionsResponse, + AppInfoResponse, + WorkspaceModelsResponse, + HitTestingResponse, + DatasetTagsResponse, + WorkflowLogsResponse, + ModelProviderResponse, + FileInfoResponse, + WorkflowDraftResponse, + ApiTokenResponse, + JobStatusResponse, + DatasetQueryResponse, + DatasetTemplateResponse, +) + + +class TestResponseModels(unittest.TestCase): + """Test cases for response model classes.""" + + def test_base_response(self): + """Test BaseResponse model.""" + response = BaseResponse(success=True, message="Operation successful") + self.assertTrue(response.success) + self.assertEqual(response.message, "Operation successful") + + def test_base_response_defaults(self): + """Test BaseResponse with default values.""" + response = BaseResponse(success=True) + self.assertTrue(response.success) + self.assertIsNone(response.message) + + def test_error_response(self): + """Test ErrorResponse model.""" + response = ErrorResponse( + success=False, + message="Error occurred", + error_code="VALIDATION_ERROR", + details={"field": "invalid_value"}, + ) + self.assertFalse(response.success) + self.assertEqual(response.message, "Error occurred") + self.assertEqual(response.error_code, "VALIDATION_ERROR") + self.assertEqual(response.details["field"], "invalid_value") + + def test_file_info(self): + """Test FileInfo model.""" + now = datetime.now() + file_info = FileInfo( + id="file_123", + name="test.txt", + size=1024, + mime_type="text/plain", + url="https://example.com/file.txt", + created_at=now, + ) + self.assertEqual(file_info.id, "file_123") + self.assertEqual(file_info.name, "test.txt") + self.assertEqual(file_info.size, 1024) + self.assertEqual(file_info.mime_type, "text/plain") + self.assertEqual(file_info.url, "https://example.com/file.txt") + self.assertEqual(file_info.created_at, now) + + def test_message_response(self): + """Test MessageResponse model.""" + response = MessageResponse( + success=True, + id="msg_123", + answer="Hello, world!", + conversation_id="conv_123", + created_at=1234567890, + metadata={"model": "gpt-4"}, + files=[{"id": "file_1", "type": "image"}], + ) + self.assertTrue(response.success) + self.assertEqual(response.id, "msg_123") + self.assertEqual(response.answer, "Hello, world!") + self.assertEqual(response.conversation_id, "conv_123") + self.assertEqual(response.created_at, 1234567890) + self.assertEqual(response.metadata["model"], "gpt-4") + self.assertEqual(response.files[0]["id"], "file_1") + + def test_conversation_response(self): + """Test ConversationResponse model.""" + response = ConversationResponse( + success=True, + id="conv_123", + name="Test Conversation", + inputs={"query": "Hello"}, + status="active", + created_at=1234567890, + updated_at=1234567891, + ) + self.assertTrue(response.success) + self.assertEqual(response.id, "conv_123") + self.assertEqual(response.name, "Test Conversation") + self.assertEqual(response.inputs["query"], "Hello") + self.assertEqual(response.status, "active") + self.assertEqual(response.created_at, 1234567890) + self.assertEqual(response.updated_at, 1234567891) + + def test_dataset_response(self): + """Test DatasetResponse model.""" + response = DatasetResponse( + success=True, + id="dataset_123", + name="Test Dataset", + description="A test dataset", + permission="read", + indexing_technique="high_quality", + embedding_model="text-embedding-ada-002", + embedding_model_provider="openai", + retrieval_model={"search_type": "semantic"}, + document_count=10, + word_count=5000, + app_count=2, + created_at=1234567890, + updated_at=1234567891, + ) + self.assertTrue(response.success) + self.assertEqual(response.id, "dataset_123") + self.assertEqual(response.name, "Test Dataset") + self.assertEqual(response.description, "A test dataset") + self.assertEqual(response.permission, "read") + self.assertEqual(response.indexing_technique, "high_quality") + self.assertEqual(response.embedding_model, "text-embedding-ada-002") + self.assertEqual(response.embedding_model_provider, "openai") + self.assertEqual(response.retrieval_model["search_type"], "semantic") + self.assertEqual(response.document_count, 10) + self.assertEqual(response.word_count, 5000) + self.assertEqual(response.app_count, 2) + + def test_document_response(self): + """Test DocumentResponse model.""" + response = DocumentResponse( + success=True, + id="doc_123", + name="test_document.txt", + data_source_type="upload_file", + position=1, + enabled=True, + word_count=1000, + hit_count=5, + doc_form="text_model", + created_at=1234567890.0, + indexing_status="completed", + completed_at=1234567891.0, + ) + self.assertTrue(response.success) + self.assertEqual(response.id, "doc_123") + self.assertEqual(response.name, "test_document.txt") + self.assertEqual(response.data_source_type, "upload_file") + self.assertEqual(response.position, 1) + self.assertTrue(response.enabled) + self.assertEqual(response.word_count, 1000) + self.assertEqual(response.hit_count, 5) + self.assertEqual(response.doc_form, "text_model") + self.assertEqual(response.created_at, 1234567890.0) + self.assertEqual(response.indexing_status, "completed") + self.assertEqual(response.completed_at, 1234567891.0) + + def test_document_segment_response(self): + """Test DocumentSegmentResponse model.""" + response = DocumentSegmentResponse( + success=True, + id="seg_123", + position=1, + document_id="doc_123", + content="This is a test segment.", + answer="Test answer", + word_count=5, + tokens=10, + keywords=["test", "segment"], + hit_count=2, + enabled=True, + status="completed", + created_at=1234567890.0, + completed_at=1234567891.0, + ) + self.assertTrue(response.success) + self.assertEqual(response.id, "seg_123") + self.assertEqual(response.position, 1) + self.assertEqual(response.document_id, "doc_123") + self.assertEqual(response.content, "This is a test segment.") + self.assertEqual(response.answer, "Test answer") + self.assertEqual(response.word_count, 5) + self.assertEqual(response.tokens, 10) + self.assertEqual(response.keywords, ["test", "segment"]) + self.assertEqual(response.hit_count, 2) + self.assertTrue(response.enabled) + self.assertEqual(response.status, "completed") + self.assertEqual(response.created_at, 1234567890.0) + self.assertEqual(response.completed_at, 1234567891.0) + + def test_workflow_run_response(self): + """Test WorkflowRunResponse model.""" + response = WorkflowRunResponse( + success=True, + id="run_123", + workflow_id="workflow_123", + status="succeeded", + inputs={"query": "test"}, + outputs={"answer": "result"}, + elapsed_time=5.5, + total_tokens=100, + total_steps=3, + created_at=1234567890.0, + finished_at=1234567895.5, + ) + self.assertTrue(response.success) + self.assertEqual(response.id, "run_123") + self.assertEqual(response.workflow_id, "workflow_123") + self.assertEqual(response.status, "succeeded") + self.assertEqual(response.inputs["query"], "test") + self.assertEqual(response.outputs["answer"], "result") + self.assertEqual(response.elapsed_time, 5.5) + self.assertEqual(response.total_tokens, 100) + self.assertEqual(response.total_steps, 3) + self.assertEqual(response.created_at, 1234567890.0) + self.assertEqual(response.finished_at, 1234567895.5) + + def test_application_parameters_response(self): + """Test ApplicationParametersResponse model.""" + response = ApplicationParametersResponse( + success=True, + opening_statement="Hello! How can I help you?", + suggested_questions=["What is AI?", "How does this work?"], + speech_to_text={"enabled": True}, + text_to_speech={"enabled": False, "voice": "alloy"}, + retriever_resource={"enabled": True}, + sensitive_word_avoidance={"enabled": False}, + file_upload={"enabled": True, "file_size_limit": 10485760}, + system_parameters={"max_tokens": 1000}, + user_input_form=[{"type": "text", "label": "Query"}], + ) + self.assertTrue(response.success) + self.assertEqual(response.opening_statement, "Hello! How can I help you?") + self.assertEqual(response.suggested_questions, ["What is AI?", "How does this work?"]) + self.assertTrue(response.speech_to_text["enabled"]) + self.assertFalse(response.text_to_speech["enabled"]) + self.assertEqual(response.text_to_speech["voice"], "alloy") + self.assertTrue(response.retriever_resource["enabled"]) + self.assertFalse(response.sensitive_word_avoidance["enabled"]) + self.assertTrue(response.file_upload["enabled"]) + self.assertEqual(response.file_upload["file_size_limit"], 10485760) + self.assertEqual(response.system_parameters["max_tokens"], 1000) + self.assertEqual(response.user_input_form[0]["type"], "text") + + def test_annotation_response(self): + """Test AnnotationResponse model.""" + response = AnnotationResponse( + success=True, + id="annotation_123", + question="What is the capital of France?", + answer="Paris", + content="Additional context", + created_at=1234567890.0, + updated_at=1234567891.0, + created_by="user_123", + updated_by="user_123", + hit_count=5, + ) + self.assertTrue(response.success) + self.assertEqual(response.id, "annotation_123") + self.assertEqual(response.question, "What is the capital of France?") + self.assertEqual(response.answer, "Paris") + self.assertEqual(response.content, "Additional context") + self.assertEqual(response.created_at, 1234567890.0) + self.assertEqual(response.updated_at, 1234567891.0) + self.assertEqual(response.created_by, "user_123") + self.assertEqual(response.updated_by, "user_123") + self.assertEqual(response.hit_count, 5) + + def test_paginated_response(self): + """Test PaginatedResponse model.""" + response = PaginatedResponse( + success=True, + data=[{"id": 1}, {"id": 2}, {"id": 3}], + has_more=True, + limit=10, + total=100, + page=1, + ) + self.assertTrue(response.success) + self.assertEqual(len(response.data), 3) + self.assertEqual(response.data[0]["id"], 1) + self.assertTrue(response.has_more) + self.assertEqual(response.limit, 10) + self.assertEqual(response.total, 100) + self.assertEqual(response.page, 1) + + def test_conversation_variable_response(self): + """Test ConversationVariableResponse model.""" + response = ConversationVariableResponse( + success=True, + conversation_id="conv_123", + variables=[ + {"id": "var_1", "name": "user_name", "value": "John"}, + {"id": "var_2", "name": "preferences", "value": {"theme": "dark"}}, + ], + ) + self.assertTrue(response.success) + self.assertEqual(response.conversation_id, "conv_123") + self.assertEqual(len(response.variables), 2) + self.assertEqual(response.variables[0]["name"], "user_name") + self.assertEqual(response.variables[0]["value"], "John") + self.assertEqual(response.variables[1]["name"], "preferences") + self.assertEqual(response.variables[1]["value"]["theme"], "dark") + + def test_file_upload_response(self): + """Test FileUploadResponse model.""" + response = FileUploadResponse( + success=True, + id="file_123", + name="test.txt", + size=1024, + mime_type="text/plain", + url="https://example.com/files/test.txt", + created_at=1234567890.0, + ) + self.assertTrue(response.success) + self.assertEqual(response.id, "file_123") + self.assertEqual(response.name, "test.txt") + self.assertEqual(response.size, 1024) + self.assertEqual(response.mime_type, "text/plain") + self.assertEqual(response.url, "https://example.com/files/test.txt") + self.assertEqual(response.created_at, 1234567890.0) + + def test_audio_response(self): + """Test AudioResponse model.""" + response = AudioResponse( + success=True, + audio="base64_encoded_audio_data", + audio_url="https://example.com/audio.mp3", + duration=10.5, + sample_rate=44100, + ) + self.assertTrue(response.success) + self.assertEqual(response.audio, "base64_encoded_audio_data") + self.assertEqual(response.audio_url, "https://example.com/audio.mp3") + self.assertEqual(response.duration, 10.5) + self.assertEqual(response.sample_rate, 44100) + + def test_suggested_questions_response(self): + """Test SuggestedQuestionsResponse model.""" + response = SuggestedQuestionsResponse( + success=True, + message_id="msg_123", + questions=[ + "What is machine learning?", + "How does AI work?", + "Can you explain neural networks?", + ], + ) + self.assertTrue(response.success) + self.assertEqual(response.message_id, "msg_123") + self.assertEqual(len(response.questions), 3) + self.assertEqual(response.questions[0], "What is machine learning?") + + def test_app_info_response(self): + """Test AppInfoResponse model.""" + response = AppInfoResponse( + success=True, + id="app_123", + name="Test App", + description="A test application", + icon="🤖", + icon_background="#FF6B6B", + mode="chat", + tags=["AI", "Chat", "Test"], + enable_site=True, + enable_api=True, + api_token="app_token_123", + ) + self.assertTrue(response.success) + self.assertEqual(response.id, "app_123") + self.assertEqual(response.name, "Test App") + self.assertEqual(response.description, "A test application") + self.assertEqual(response.icon, "🤖") + self.assertEqual(response.icon_background, "#FF6B6B") + self.assertEqual(response.mode, "chat") + self.assertEqual(response.tags, ["AI", "Chat", "Test"]) + self.assertTrue(response.enable_site) + self.assertTrue(response.enable_api) + self.assertEqual(response.api_token, "app_token_123") + + def test_workspace_models_response(self): + """Test WorkspaceModelsResponse model.""" + response = WorkspaceModelsResponse( + success=True, + models=[ + {"id": "gpt-4", "name": "GPT-4", "provider": "openai"}, + {"id": "claude-3", "name": "Claude 3", "provider": "anthropic"}, + ], + ) + self.assertTrue(response.success) + self.assertEqual(len(response.models), 2) + self.assertEqual(response.models[0]["id"], "gpt-4") + self.assertEqual(response.models[0]["name"], "GPT-4") + self.assertEqual(response.models[0]["provider"], "openai") + + def test_hit_testing_response(self): + """Test HitTestingResponse model.""" + response = HitTestingResponse( + success=True, + query="What is machine learning?", + records=[ + {"content": "Machine learning is a subset of AI...", "score": 0.95}, + {"content": "ML algorithms learn from data...", "score": 0.87}, + ], + ) + self.assertTrue(response.success) + self.assertEqual(response.query, "What is machine learning?") + self.assertEqual(len(response.records), 2) + self.assertEqual(response.records[0]["score"], 0.95) + + def test_dataset_tags_response(self): + """Test DatasetTagsResponse model.""" + response = DatasetTagsResponse( + success=True, + tags=[ + {"id": "tag_1", "name": "Technology", "color": "#FF0000"}, + {"id": "tag_2", "name": "Science", "color": "#00FF00"}, + ], + ) + self.assertTrue(response.success) + self.assertEqual(len(response.tags), 2) + self.assertEqual(response.tags[0]["name"], "Technology") + self.assertEqual(response.tags[0]["color"], "#FF0000") + + def test_workflow_logs_response(self): + """Test WorkflowLogsResponse model.""" + response = WorkflowLogsResponse( + success=True, + logs=[ + {"id": "log_1", "status": "succeeded", "created_at": 1234567890}, + {"id": "log_2", "status": "failed", "created_at": 1234567891}, + ], + total=50, + page=1, + limit=10, + has_more=True, + ) + self.assertTrue(response.success) + self.assertEqual(len(response.logs), 2) + self.assertEqual(response.logs[0]["status"], "succeeded") + self.assertEqual(response.total, 50) + self.assertEqual(response.page, 1) + self.assertEqual(response.limit, 10) + self.assertTrue(response.has_more) + + def test_model_serialization(self): + """Test that models can be serialized to JSON.""" + response = MessageResponse( + success=True, + id="msg_123", + answer="Hello, world!", + conversation_id="conv_123", + ) + + # Convert to dict and then to JSON + response_dict = { + "success": response.success, + "id": response.id, + "answer": response.answer, + "conversation_id": response.conversation_id, + } + + json_str = json.dumps(response_dict) + parsed = json.loads(json_str) + + self.assertTrue(parsed["success"]) + self.assertEqual(parsed["id"], "msg_123") + self.assertEqual(parsed["answer"], "Hello, world!") + self.assertEqual(parsed["conversation_id"], "conv_123") + + # Tests for new response models + def test_model_provider_response(self): + """Test ModelProviderResponse model.""" + response = ModelProviderResponse( + success=True, + provider_name="openai", + provider_type="llm", + models=[ + {"id": "gpt-4", "name": "GPT-4", "max_tokens": 8192}, + {"id": "gpt-3.5-turbo", "name": "GPT-3.5 Turbo", "max_tokens": 4096}, + ], + is_enabled=True, + credentials={"api_key": "sk-..."}, + ) + self.assertTrue(response.success) + self.assertEqual(response.provider_name, "openai") + self.assertEqual(response.provider_type, "llm") + self.assertEqual(len(response.models), 2) + self.assertEqual(response.models[0]["id"], "gpt-4") + self.assertTrue(response.is_enabled) + self.assertEqual(response.credentials["api_key"], "sk-...") + + def test_file_info_response(self): + """Test FileInfoResponse model.""" + response = FileInfoResponse( + success=True, + id="file_123", + name="document.pdf", + size=2048576, + mime_type="application/pdf", + url="https://example.com/files/document.pdf", + created_at=1234567890, + metadata={"pages": 10, "author": "John Doe"}, + ) + self.assertTrue(response.success) + self.assertEqual(response.id, "file_123") + self.assertEqual(response.name, "document.pdf") + self.assertEqual(response.size, 2048576) + self.assertEqual(response.mime_type, "application/pdf") + self.assertEqual(response.url, "https://example.com/files/document.pdf") + self.assertEqual(response.created_at, 1234567890) + self.assertEqual(response.metadata["pages"], 10) + + def test_workflow_draft_response(self): + """Test WorkflowDraftResponse model.""" + response = WorkflowDraftResponse( + success=True, + id="draft_123", + app_id="app_456", + draft_data={"nodes": [], "edges": [], "config": {"name": "Test Workflow"}}, + version=1, + created_at=1234567890, + updated_at=1234567891, + ) + self.assertTrue(response.success) + self.assertEqual(response.id, "draft_123") + self.assertEqual(response.app_id, "app_456") + self.assertEqual(response.draft_data["config"]["name"], "Test Workflow") + self.assertEqual(response.version, 1) + self.assertEqual(response.created_at, 1234567890) + self.assertEqual(response.updated_at, 1234567891) + + def test_api_token_response(self): + """Test ApiTokenResponse model.""" + response = ApiTokenResponse( + success=True, + id="token_123", + name="Production Token", + token="app-xxxxxxxxxxxx", + description="Token for production environment", + created_at=1234567890, + last_used_at=1234567891, + is_active=True, + ) + self.assertTrue(response.success) + self.assertEqual(response.id, "token_123") + self.assertEqual(response.name, "Production Token") + self.assertEqual(response.token, "app-xxxxxxxxxxxx") + self.assertEqual(response.description, "Token for production environment") + self.assertEqual(response.created_at, 1234567890) + self.assertEqual(response.last_used_at, 1234567891) + self.assertTrue(response.is_active) + + def test_job_status_response(self): + """Test JobStatusResponse model.""" + response = JobStatusResponse( + success=True, + job_id="job_123", + job_status="running", + error_msg=None, + progress=0.75, + created_at=1234567890, + updated_at=1234567891, + ) + self.assertTrue(response.success) + self.assertEqual(response.job_id, "job_123") + self.assertEqual(response.job_status, "running") + self.assertIsNone(response.error_msg) + self.assertEqual(response.progress, 0.75) + self.assertEqual(response.created_at, 1234567890) + self.assertEqual(response.updated_at, 1234567891) + + def test_dataset_query_response(self): + """Test DatasetQueryResponse model.""" + response = DatasetQueryResponse( + success=True, + query="What is machine learning?", + records=[ + {"content": "Machine learning is...", "score": 0.95}, + {"content": "ML algorithms...", "score": 0.87}, + ], + total=2, + search_time=0.123, + retrieval_model={"method": "semantic_search", "top_k": 3}, + ) + self.assertTrue(response.success) + self.assertEqual(response.query, "What is machine learning?") + self.assertEqual(len(response.records), 2) + self.assertEqual(response.total, 2) + self.assertEqual(response.search_time, 0.123) + self.assertEqual(response.retrieval_model["method"], "semantic_search") + + def test_dataset_template_response(self): + """Test DatasetTemplateResponse model.""" + response = DatasetTemplateResponse( + success=True, + template_name="customer_support", + display_name="Customer Support", + description="Template for customer support knowledge base", + category="support", + icon="🎧", + config_schema={"fields": [{"name": "category", "type": "string"}]}, + ) + self.assertTrue(response.success) + self.assertEqual(response.template_name, "customer_support") + self.assertEqual(response.display_name, "Customer Support") + self.assertEqual(response.description, "Template for customer support knowledge base") + self.assertEqual(response.category, "support") + self.assertEqual(response.icon, "🎧") + self.assertEqual(response.config_schema["fields"][0]["name"], "category") + + +if __name__ == "__main__": + unittest.main() diff --git a/sdks/python-client/tests/test_retry_and_error_handling.py b/sdks/python-client/tests/test_retry_and_error_handling.py new file mode 100644 index 0000000000..bd415bde43 --- /dev/null +++ b/sdks/python-client/tests/test_retry_and_error_handling.py @@ -0,0 +1,313 @@ +"""Unit tests for retry mechanism and error handling.""" + +import unittest +from unittest.mock import Mock, patch, MagicMock +import httpx +from dify_client.client import DifyClient +from dify_client.exceptions import ( + APIError, + AuthenticationError, + RateLimitError, + ValidationError, + NetworkError, + TimeoutError, + FileUploadError, +) + + +class TestRetryMechanism(unittest.TestCase): + """Test cases for retry mechanism.""" + + def setUp(self): + self.api_key = "test_api_key" + self.base_url = "https://api.dify.ai/v1" + self.client = DifyClient( + api_key=self.api_key, + base_url=self.base_url, + max_retries=3, + retry_delay=0.1, # Short delay for tests + enable_logging=False, + ) + + @patch("httpx.Client.request") + def test_successful_request_no_retry(self, mock_request): + """Test that successful requests don't trigger retries.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.content = b'{"success": true}' + mock_request.return_value = mock_response + + response = self.client._send_request("GET", "/test") + + self.assertEqual(response, mock_response) + self.assertEqual(mock_request.call_count, 1) + + @patch("httpx.Client.request") + @patch("time.sleep") + def test_retry_on_network_error(self, mock_sleep, mock_request): + """Test retry on network errors.""" + # First two calls raise network error, third succeeds + mock_request.side_effect = [ + httpx.NetworkError("Connection failed"), + httpx.NetworkError("Connection failed"), + Mock(status_code=200, content=b'{"success": true}'), + ] + mock_response = Mock() + mock_response.status_code = 200 + mock_response.content = b'{"success": true}' + + response = self.client._send_request("GET", "/test") + + self.assertEqual(response.status_code, 200) + self.assertEqual(mock_request.call_count, 3) + self.assertEqual(mock_sleep.call_count, 2) + + @patch("httpx.Client.request") + @patch("time.sleep") + def test_retry_on_timeout_error(self, mock_sleep, mock_request): + """Test retry on timeout errors.""" + mock_request.side_effect = [ + httpx.TimeoutException("Request timed out"), + httpx.TimeoutException("Request timed out"), + Mock(status_code=200, content=b'{"success": true}'), + ] + + response = self.client._send_request("GET", "/test") + + self.assertEqual(response.status_code, 200) + self.assertEqual(mock_request.call_count, 3) + self.assertEqual(mock_sleep.call_count, 2) + + @patch("httpx.Client.request") + @patch("time.sleep") + def test_max_retries_exceeded(self, mock_sleep, mock_request): + """Test behavior when max retries are exceeded.""" + mock_request.side_effect = httpx.NetworkError("Persistent network error") + + with self.assertRaises(NetworkError): + self.client._send_request("GET", "/test") + + self.assertEqual(mock_request.call_count, 4) # 1 initial + 3 retries + self.assertEqual(mock_sleep.call_count, 3) + + @patch("httpx.Client.request") + def test_no_retry_on_client_error(self, mock_request): + """Test that client errors (4xx) don't trigger retries.""" + mock_response = Mock() + mock_response.status_code = 401 + mock_response.json.return_value = {"message": "Unauthorized"} + mock_request.return_value = mock_response + + with self.assertRaises(AuthenticationError): + self.client._send_request("GET", "/test") + + self.assertEqual(mock_request.call_count, 1) + + @patch("httpx.Client.request") + def test_retry_on_server_error(self, mock_request): + """Test that server errors (5xx) don't retry - they raise APIError immediately.""" + mock_response_500 = Mock() + mock_response_500.status_code = 500 + mock_response_500.json.return_value = {"message": "Internal server error"} + + mock_request.return_value = mock_response_500 + + with self.assertRaises(APIError) as context: + self.client._send_request("GET", "/test") + + self.assertEqual(str(context.exception), "Internal server error") + self.assertEqual(context.exception.status_code, 500) + # Should not retry server errors + self.assertEqual(mock_request.call_count, 1) + + @patch("httpx.Client.request") + def test_exponential_backoff(self, mock_request): + """Test exponential backoff timing.""" + mock_request.side_effect = [ + httpx.NetworkError("Connection failed"), + httpx.NetworkError("Connection failed"), + httpx.NetworkError("Connection failed"), + httpx.NetworkError("Connection failed"), # All attempts fail + ] + + with patch("time.sleep") as mock_sleep: + with self.assertRaises(NetworkError): + self.client._send_request("GET", "/test") + + # Check exponential backoff: 0.1, 0.2, 0.4 + expected_calls = [0.1, 0.2, 0.4] + actual_calls = [call[0][0] for call in mock_sleep.call_args_list] + self.assertEqual(actual_calls, expected_calls) + + +class TestErrorHandling(unittest.TestCase): + """Test cases for error handling.""" + + def setUp(self): + self.client = DifyClient(api_key="test_api_key", enable_logging=False) + + @patch("httpx.Client.request") + def test_authentication_error(self, mock_request): + """Test AuthenticationError handling.""" + mock_response = Mock() + mock_response.status_code = 401 + mock_response.json.return_value = {"message": "Invalid API key"} + mock_request.return_value = mock_response + + with self.assertRaises(AuthenticationError) as context: + self.client._send_request("GET", "/test") + + self.assertEqual(str(context.exception), "Invalid API key") + self.assertEqual(context.exception.status_code, 401) + + @patch("httpx.Client.request") + def test_rate_limit_error(self, mock_request): + """Test RateLimitError handling.""" + mock_response = Mock() + mock_response.status_code = 429 + mock_response.json.return_value = {"message": "Rate limit exceeded"} + mock_response.headers = {"Retry-After": "60"} + mock_request.return_value = mock_response + + with self.assertRaises(RateLimitError) as context: + self.client._send_request("GET", "/test") + + self.assertEqual(str(context.exception), "Rate limit exceeded") + self.assertEqual(context.exception.retry_after, "60") + + @patch("httpx.Client.request") + def test_validation_error(self, mock_request): + """Test ValidationError handling.""" + mock_response = Mock() + mock_response.status_code = 422 + mock_response.json.return_value = {"message": "Invalid parameters"} + mock_request.return_value = mock_response + + with self.assertRaises(ValidationError) as context: + self.client._send_request("GET", "/test") + + self.assertEqual(str(context.exception), "Invalid parameters") + self.assertEqual(context.exception.status_code, 422) + + @patch("httpx.Client.request") + def test_api_error(self, mock_request): + """Test general APIError handling.""" + mock_response = Mock() + mock_response.status_code = 500 + mock_response.json.return_value = {"message": "Internal server error"} + mock_request.return_value = mock_response + + with self.assertRaises(APIError) as context: + self.client._send_request("GET", "/test") + + self.assertEqual(str(context.exception), "Internal server error") + self.assertEqual(context.exception.status_code, 500) + + @patch("httpx.Client.request") + def test_error_response_without_json(self, mock_request): + """Test error handling when response doesn't contain valid JSON.""" + mock_response = Mock() + mock_response.status_code = 500 + mock_response.content = b"Internal Server Error" + mock_response.json.side_effect = ValueError("No JSON object could be decoded") + mock_request.return_value = mock_response + + with self.assertRaises(APIError) as context: + self.client._send_request("GET", "/test") + + self.assertEqual(str(context.exception), "HTTP 500") + + @patch("httpx.Client.request") + def test_file_upload_error(self, mock_request): + """Test FileUploadError handling.""" + mock_response = Mock() + mock_response.status_code = 400 + mock_response.json.return_value = {"message": "File upload failed"} + mock_request.return_value = mock_response + + with self.assertRaises(FileUploadError) as context: + self.client._send_request_with_files("POST", "/upload", {}, {}) + + self.assertEqual(str(context.exception), "File upload failed") + self.assertEqual(context.exception.status_code, 400) + + +class TestParameterValidation(unittest.TestCase): + """Test cases for parameter validation.""" + + def setUp(self): + self.client = DifyClient(api_key="test_api_key", enable_logging=False) + + def test_empty_string_validation(self): + """Test validation of empty strings.""" + with self.assertRaises(ValidationError): + self.client._validate_params(empty_string="") + + def test_whitespace_only_string_validation(self): + """Test validation of whitespace-only strings.""" + with self.assertRaises(ValidationError): + self.client._validate_params(whitespace_string=" ") + + def test_long_string_validation(self): + """Test validation of overly long strings.""" + long_string = "a" * 10001 # Exceeds 10000 character limit + with self.assertRaises(ValidationError): + self.client._validate_params(long_string=long_string) + + def test_large_list_validation(self): + """Test validation of overly large lists.""" + large_list = list(range(1001)) # Exceeds 1000 item limit + with self.assertRaises(ValidationError): + self.client._validate_params(large_list=large_list) + + def test_large_dict_validation(self): + """Test validation of overly large dictionaries.""" + large_dict = {f"key_{i}": i for i in range(101)} # Exceeds 100 item limit + with self.assertRaises(ValidationError): + self.client._validate_params(large_dict=large_dict) + + def test_valid_parameters_pass(self): + """Test that valid parameters pass validation.""" + # Should not raise any exception + self.client._validate_params( + valid_string="Hello, World!", + valid_list=[1, 2, 3], + valid_dict={"key": "value"}, + none_value=None, + ) + + def test_message_feedback_validation(self): + """Test validation in message_feedback method.""" + with self.assertRaises(ValidationError): + self.client.message_feedback("msg_id", "invalid_rating", "user") + + def test_completion_message_validation(self): + """Test validation in create_completion_message method.""" + from dify_client.client import CompletionClient + + client = CompletionClient("test_api_key") + + with self.assertRaises(ValidationError): + client.create_completion_message( + inputs="not_a_dict", # Should be a dict + response_mode="invalid_mode", # Should be 'blocking' or 'streaming' + user="test_user", + ) + + def test_chat_message_validation(self): + """Test validation in create_chat_message method.""" + from dify_client.client import ChatClient + + client = ChatClient("test_api_key") + + with self.assertRaises(ValidationError): + client.create_chat_message( + inputs="not_a_dict", # Should be a dict + query="", # Should not be empty + user="test_user", + response_mode="invalid_mode", # Should be 'blocking' or 'streaming' + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/sdks/python-client/uv.lock b/sdks/python-client/uv.lock index 19f348289b..4a9d7d5193 100644 --- a/sdks/python-client/uv.lock +++ b/sdks/python-client/uv.lock @@ -59,7 +59,7 @@ version = "0.1.12" source = { editable = "." } dependencies = [ { name = "aiofiles" }, - { name = "httpx" }, + { name = "httpx", extra = ["http2"] }, ] [package.optional-dependencies] @@ -71,7 +71,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "aiofiles", specifier = ">=23.0.0" }, - { name = "httpx", specifier = ">=0.27.0" }, + { name = "httpx", extras = ["http2"], specifier = ">=0.27.0" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.21.0" }, ] @@ -98,6 +98,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, +] + +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -126,6 +148,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[package.optional-dependencies] +http2 = [ + { name = "h2" }, +] + +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, +] + [[package]] name = "idna" version = "3.10" diff --git a/web/.env.example b/web/.env.example index 4c5c8641e0..eff6f77fd9 100644 --- a/web/.env.example +++ b/web/.env.example @@ -12,6 +12,9 @@ NEXT_PUBLIC_API_PREFIX=http://localhost:5001/console/api # console or api domain. # example: http://udify.app/api NEXT_PUBLIC_PUBLIC_API_PREFIX=http://localhost:5001/api +# When the frontend and backend run on different subdomains, set NEXT_PUBLIC_COOKIE_DOMAIN=1. +NEXT_PUBLIC_COOKIE_DOMAIN= + # The API PREFIX for MARKETPLACE NEXT_PUBLIC_MARKETPLACE_API_PREFIX=https://marketplace.dify.ai/api/v1 # The URL for MARKETPLACE diff --git a/web/.husky/pre-commit b/web/.husky/pre-commit index 1db4b6dd67..26e9bf69d4 100644 --- a/web/.husky/pre-commit +++ b/web/.husky/pre-commit @@ -44,9 +44,32 @@ fi if $web_modified; then echo "Running ESLint on web module" + + if git diff --cached --quiet -- 'web/**/*.ts' 'web/**/*.tsx'; then + web_ts_modified=false + else + ts_diff_status=$? + if [ $ts_diff_status -eq 1 ]; then + web_ts_modified=true + else + echo "Unable to determine staged TypeScript changes (git exit code: $ts_diff_status)." + exit $ts_diff_status + fi + fi + cd ./web || exit 1 lint-staged + if $web_ts_modified; then + echo "Running TypeScript type-check" + if ! pnpm run type-check; then + echo "Type check failed. Please run 'pnpm run type-check' to fix the errors." + exit 1 + fi + else + echo "No staged TypeScript changes detected, skipping type-check" + fi + echo "Running unit tests check" modified_files=$(git diff --cached --name-only -- utils | grep -v '\.spec\.ts$' || true) diff --git a/web/.storybook/utils/form-story-wrapper.tsx b/web/.storybook/utils/form-story-wrapper.tsx new file mode 100644 index 0000000000..689c3a20ff --- /dev/null +++ b/web/.storybook/utils/form-story-wrapper.tsx @@ -0,0 +1,83 @@ +import { useState } from 'react' +import type { ReactNode } from 'react' +import { useStore } from '@tanstack/react-form' +import { useAppForm } from '@/app/components/base/form' + +type UseAppFormOptions = Parameters[0] +type AppFormInstance = ReturnType + +type FormStoryWrapperProps = { + options?: UseAppFormOptions + children: (form: AppFormInstance) => ReactNode + title?: string + subtitle?: string +} + +export const FormStoryWrapper = ({ + options, + children, + title, + subtitle, +}: FormStoryWrapperProps) => { + const [lastSubmitted, setLastSubmitted] = useState(null) + const [submitCount, setSubmitCount] = useState(0) + + const form = useAppForm({ + ...options, + onSubmit: (context) => { + setSubmitCount(count => count + 1) + setLastSubmitted(context.value) + options?.onSubmit?.(context) + }, + }) + + const values = useStore(form.store, state => state.values) + const isSubmitting = useStore(form.store, state => state.isSubmitting) + const canSubmit = useStore(form.store, state => state.canSubmit) + + return ( +
+
+ {(title || subtitle) && ( +
+ {title &&

{title}

} + {subtitle &&

{subtitle}

} +
+ )} + {children(form)} +
+ +
+ ) +} + +export type FormStoryRender = (form: AppFormInstance) => ReactNode diff --git a/web/README.md b/web/README.md index a47cfab041..6daf1e922e 100644 --- a/web/README.md +++ b/web/README.md @@ -32,6 +32,7 @@ NEXT_PUBLIC_EDITION=SELF_HOSTED # different from api or web app domain. # example: http://cloud.dify.ai/console/api NEXT_PUBLIC_API_PREFIX=http://localhost:5001/console/api +NEXT_PUBLIC_COOKIE_DOMAIN= # The URL for Web APP, refers to the Web App base URL of WEB service if web app domain is different from # console or api domain. # example: http://udify.app/api @@ -41,6 +42,11 @@ NEXT_PUBLIC_PUBLIC_API_PREFIX=http://localhost:5001/api NEXT_PUBLIC_SENTRY_DSN= ``` +> [!IMPORTANT] +> +> 1. When the frontend and backend run on different subdomains, set NEXT_PUBLIC_COOKIE_DOMAIN=1. The frontend and backend must be under the same top-level domain in order to share authentication cookies. +> 1. It's necessary to set NEXT_PUBLIC_API_PREFIX and NEXT_PUBLIC_PUBLIC_API_PREFIX to the correct backend API URL. + Finally, run the development server: ```bash diff --git a/web/__tests__/check-i18n.test.ts b/web/__tests__/check-i18n.test.ts index b579f22d4b..7773edcdbb 100644 --- a/web/__tests__/check-i18n.test.ts +++ b/web/__tests__/check-i18n.test.ts @@ -759,4 +759,104 @@ export default translation` expect(result).not.toContain('Zbuduj inteligentnego agenta') }) }) + + describe('Performance and Scalability', () => { + it('should handle large translation files efficiently', async () => { + // Create a large translation file with 1000 keys + const largeContent = `const translation = { +${Array.from({ length: 1000 }, (_, i) => ` key${i}: 'value${i}',`).join('\n')} +} + +export default translation` + + fs.writeFileSync(path.join(testEnDir, 'large.ts'), largeContent) + + const startTime = Date.now() + const keys = await getKeysFromLanguage('en-US') + const endTime = Date.now() + + expect(keys.length).toBe(1000) + expect(endTime - startTime).toBeLessThan(1000) // Should complete in under 1 second + }) + + it('should handle multiple translation files concurrently', async () => { + // Create multiple files + for (let i = 0; i < 10; i++) { + const content = `const translation = { + key${i}: 'value${i}', + nested${i}: { + subkey: 'subvalue' + } +} + +export default translation` + fs.writeFileSync(path.join(testEnDir, `file${i}.ts`), content) + } + + const startTime = Date.now() + const keys = await getKeysFromLanguage('en-US') + const endTime = Date.now() + + expect(keys.length).toBe(20) // 10 files * 2 keys each + expect(endTime - startTime).toBeLessThan(500) + }) + }) + + describe('Unicode and Internationalization', () => { + it('should handle Unicode characters in keys and values', async () => { + const unicodeContent = `const translation = { + '中文键': '中文值', + 'العربية': 'قيمة', + 'emoji_😀': 'value with emoji 🎉', + 'mixed_中文_English': 'mixed value' +} + +export default translation` + + fs.writeFileSync(path.join(testEnDir, 'unicode.ts'), unicodeContent) + + const keys = await getKeysFromLanguage('en-US') + + expect(keys).toContain('unicode.中文键') + expect(keys).toContain('unicode.العربية') + expect(keys).toContain('unicode.emoji_😀') + expect(keys).toContain('unicode.mixed_中文_English') + }) + + it('should handle RTL language files', async () => { + const rtlContent = `const translation = { + مرحبا: 'Hello', + العالم: 'World', + nested: { + مفتاح: 'key' + } +} + +export default translation` + + fs.writeFileSync(path.join(testEnDir, 'rtl.ts'), rtlContent) + + const keys = await getKeysFromLanguage('en-US') + + expect(keys).toContain('rtl.مرحبا') + expect(keys).toContain('rtl.العالم') + expect(keys).toContain('rtl.nested.مفتاح') + }) + }) + + describe('Error Recovery', () => { + it('should handle syntax errors in translation files gracefully', async () => { + const invalidContent = `const translation = { + validKey: 'valid value', + invalidKey: 'missing quote, + anotherKey: 'another value' +} + +export default translation` + + fs.writeFileSync(path.join(testEnDir, 'invalid.ts'), invalidContent) + + await expect(getKeysFromLanguage('en-US')).rejects.toThrow() + }) + }) }) diff --git a/web/__tests__/embedded-user-id-auth.test.tsx b/web/__tests__/embedded-user-id-auth.test.tsx new file mode 100644 index 0000000000..5c3c3c943f --- /dev/null +++ b/web/__tests__/embedded-user-id-auth.test.tsx @@ -0,0 +1,132 @@ +import React from 'react' +import { fireEvent, render, screen, waitFor } from '@testing-library/react' + +import MailAndPasswordAuth from '@/app/(shareLayout)/webapp-signin/components/mail-and-password-auth' +import CheckCode from '@/app/(shareLayout)/webapp-signin/check-code/page' + +jest.mock('react-i18next', () => ({ + useTranslation: () => ({ + t: (key: string) => key, + }), +})) + +const replaceMock = jest.fn() +const backMock = jest.fn() + +jest.mock('next/navigation', () => ({ + usePathname: jest.fn(() => '/chatbot/test-app'), + useRouter: jest.fn(() => ({ + replace: replaceMock, + back: backMock, + })), + useSearchParams: jest.fn(), +})) + +const mockStoreState = { + embeddedUserId: 'embedded-user-99', + shareCode: 'test-app', +} + +const useWebAppStoreMock = jest.fn((selector?: (state: typeof mockStoreState) => any) => { + return selector ? selector(mockStoreState) : mockStoreState +}) + +jest.mock('@/context/web-app-context', () => ({ + useWebAppStore: (selector?: (state: typeof mockStoreState) => any) => useWebAppStoreMock(selector), +})) + +const webAppLoginMock = jest.fn() +const webAppEmailLoginWithCodeMock = jest.fn() +const sendWebAppEMailLoginCodeMock = jest.fn() + +jest.mock('@/service/common', () => ({ + webAppLogin: (...args: any[]) => webAppLoginMock(...args), + webAppEmailLoginWithCode: (...args: any[]) => webAppEmailLoginWithCodeMock(...args), + sendWebAppEMailLoginCode: (...args: any[]) => sendWebAppEMailLoginCodeMock(...args), +})) + +const fetchAccessTokenMock = jest.fn() + +jest.mock('@/service/share', () => ({ + fetchAccessToken: (...args: any[]) => fetchAccessTokenMock(...args), +})) + +const setWebAppAccessTokenMock = jest.fn() +const setWebAppPassportMock = jest.fn() + +jest.mock('@/service/webapp-auth', () => ({ + setWebAppAccessToken: (...args: any[]) => setWebAppAccessTokenMock(...args), + setWebAppPassport: (...args: any[]) => setWebAppPassportMock(...args), + webAppLogout: jest.fn(), +})) + +jest.mock('@/app/components/signin/countdown', () => () =>
) + +jest.mock('@remixicon/react', () => ({ + RiMailSendFill: () =>
, + RiArrowLeftLine: () =>
, +})) + +const { useSearchParams } = jest.requireMock('next/navigation') as { + useSearchParams: jest.Mock +} + +beforeEach(() => { + jest.clearAllMocks() +}) + +describe('embedded user id propagation in authentication flows', () => { + it('passes embedded user id when logging in with email and password', async () => { + const params = new URLSearchParams() + params.set('redirect_url', encodeURIComponent('/chatbot/test-app')) + useSearchParams.mockReturnValue(params) + + webAppLoginMock.mockResolvedValue({ result: 'success', data: { access_token: 'login-token' } }) + fetchAccessTokenMock.mockResolvedValue({ access_token: 'passport-token' }) + + render() + + fireEvent.change(screen.getByLabelText('login.email'), { target: { value: 'user@example.com' } }) + fireEvent.change(screen.getByLabelText(/login\.password/), { target: { value: 'strong-password' } }) + fireEvent.click(screen.getByRole('button', { name: 'login.signBtn' })) + + await waitFor(() => { + expect(fetchAccessTokenMock).toHaveBeenCalledWith({ + appCode: 'test-app', + userId: 'embedded-user-99', + }) + }) + expect(setWebAppAccessTokenMock).toHaveBeenCalledWith('login-token') + expect(setWebAppPassportMock).toHaveBeenCalledWith('test-app', 'passport-token') + expect(replaceMock).toHaveBeenCalledWith('/chatbot/test-app') + }) + + it('passes embedded user id when verifying email code', async () => { + const params = new URLSearchParams() + params.set('redirect_url', encodeURIComponent('/chatbot/test-app')) + params.set('email', encodeURIComponent('user@example.com')) + params.set('token', encodeURIComponent('token-abc')) + useSearchParams.mockReturnValue(params) + + webAppEmailLoginWithCodeMock.mockResolvedValue({ result: 'success', data: { access_token: 'code-token' } }) + fetchAccessTokenMock.mockResolvedValue({ access_token: 'passport-token' }) + + render() + + fireEvent.change( + screen.getByPlaceholderText('login.checkCode.verificationCodePlaceholder'), + { target: { value: '123456' } }, + ) + fireEvent.click(screen.getByRole('button', { name: 'login.checkCode.verify' })) + + await waitFor(() => { + expect(fetchAccessTokenMock).toHaveBeenCalledWith({ + appCode: 'test-app', + userId: 'embedded-user-99', + }) + }) + expect(setWebAppAccessTokenMock).toHaveBeenCalledWith('code-token') + expect(setWebAppPassportMock).toHaveBeenCalledWith('test-app', 'passport-token') + expect(replaceMock).toHaveBeenCalledWith('/chatbot/test-app') + }) +}) diff --git a/web/__tests__/embedded-user-id-store.test.tsx b/web/__tests__/embedded-user-id-store.test.tsx new file mode 100644 index 0000000000..24a815222e --- /dev/null +++ b/web/__tests__/embedded-user-id-store.test.tsx @@ -0,0 +1,155 @@ +import React from 'react' +import { render, screen, waitFor } from '@testing-library/react' + +import WebAppStoreProvider, { useWebAppStore } from '@/context/web-app-context' + +jest.mock('next/navigation', () => ({ + usePathname: jest.fn(() => '/chatbot/sample-app'), + useSearchParams: jest.fn(() => { + const params = new URLSearchParams() + return params + }), +})) + +jest.mock('@/service/use-share', () => { + const { AccessMode } = jest.requireActual('@/models/access-control') + return { + useGetWebAppAccessModeByCode: jest.fn(() => ({ + isLoading: false, + data: { accessMode: AccessMode.PUBLIC }, + })), + } +}) + +jest.mock('@/app/components/base/chat/utils', () => ({ + getProcessedSystemVariablesFromUrlParams: jest.fn(), +})) + +const { getProcessedSystemVariablesFromUrlParams: mockGetProcessedSystemVariablesFromUrlParams } + = jest.requireMock('@/app/components/base/chat/utils') as { + getProcessedSystemVariablesFromUrlParams: jest.Mock + } + +jest.mock('@/context/global-public-context', () => { + const mockGlobalStoreState = { + isGlobalPending: false, + setIsGlobalPending: jest.fn(), + systemFeatures: {}, + setSystemFeatures: jest.fn(), + } + const useGlobalPublicStore = Object.assign( + (selector?: (state: typeof mockGlobalStoreState) => any) => + selector ? selector(mockGlobalStoreState) : mockGlobalStoreState, + { + setState: (updater: any) => { + if (typeof updater === 'function') + Object.assign(mockGlobalStoreState, updater(mockGlobalStoreState) ?? {}) + + else + Object.assign(mockGlobalStoreState, updater) + }, + __mockState: mockGlobalStoreState, + }, + ) + return { + useGlobalPublicStore, + } +}) + +const { + useGlobalPublicStore: useGlobalPublicStoreMock, +} = jest.requireMock('@/context/global-public-context') as { + useGlobalPublicStore: ((selector?: (state: any) => any) => any) & { + setState: (updater: any) => void + __mockState: { + isGlobalPending: boolean + setIsGlobalPending: jest.Mock + systemFeatures: Record + setSystemFeatures: jest.Mock + } + } +} +const mockGlobalStoreState = useGlobalPublicStoreMock.__mockState + +const TestConsumer = () => { + const embeddedUserId = useWebAppStore(state => state.embeddedUserId) + const embeddedConversationId = useWebAppStore(state => state.embeddedConversationId) + return ( + <> +
{embeddedUserId ?? 'null'}
+
{embeddedConversationId ?? 'null'}
+ + ) +} + +const initialWebAppStore = (() => { + const snapshot = useWebAppStore.getState() + return { + shareCode: null as string | null, + appInfo: null, + appParams: null, + webAppAccessMode: snapshot.webAppAccessMode, + appMeta: null, + userCanAccessApp: false, + embeddedUserId: null, + embeddedConversationId: null, + updateShareCode: snapshot.updateShareCode, + updateAppInfo: snapshot.updateAppInfo, + updateAppParams: snapshot.updateAppParams, + updateWebAppAccessMode: snapshot.updateWebAppAccessMode, + updateWebAppMeta: snapshot.updateWebAppMeta, + updateUserCanAccessApp: snapshot.updateUserCanAccessApp, + updateEmbeddedUserId: snapshot.updateEmbeddedUserId, + updateEmbeddedConversationId: snapshot.updateEmbeddedConversationId, + } +})() + +beforeEach(() => { + mockGlobalStoreState.isGlobalPending = false + mockGetProcessedSystemVariablesFromUrlParams.mockReset() + useWebAppStore.setState(initialWebAppStore, true) +}) + +describe('WebAppStoreProvider embedded user id handling', () => { + it('hydrates embedded user and conversation ids from system variables', async () => { + mockGetProcessedSystemVariablesFromUrlParams.mockResolvedValue({ + user_id: 'iframe-user-123', + conversation_id: 'conversation-456', + }) + + render( + + + , + ) + + await waitFor(() => { + expect(screen.getByTestId('embedded-user-id')).toHaveTextContent('iframe-user-123') + expect(screen.getByTestId('embedded-conversation-id')).toHaveTextContent('conversation-456') + }) + expect(useWebAppStore.getState().embeddedUserId).toBe('iframe-user-123') + expect(useWebAppStore.getState().embeddedConversationId).toBe('conversation-456') + }) + + it('clears embedded user id when system variable is absent', async () => { + useWebAppStore.setState(state => ({ + ...state, + embeddedUserId: 'previous-user', + embeddedConversationId: 'existing-conversation', + })) + mockGetProcessedSystemVariablesFromUrlParams.mockResolvedValue({}) + + render( + + + , + ) + + await waitFor(() => { + expect(screen.getByTestId('embedded-user-id')).toHaveTextContent('null') + expect(screen.getByTestId('embedded-conversation-id')).toHaveTextContent('null') + }) + expect(useWebAppStore.getState().embeddedUserId).toBeNull() + expect(useWebAppStore.getState().embeddedConversationId).toBeNull() + }) +}) diff --git a/web/__tests__/navigation-utils.test.ts b/web/__tests__/navigation-utils.test.ts index fa4986e63d..3eeba52943 100644 --- a/web/__tests__/navigation-utils.test.ts +++ b/web/__tests__/navigation-utils.test.ts @@ -286,4 +286,116 @@ describe('Navigation Utilities', () => { expect(mockPush).toHaveBeenCalledWith('/datasets/filtered-set/documents?page=1&limit=50&status=active&type=pdf&sort=created_at&order=desc') }) }) + + describe('Edge Cases and Error Handling', () => { + test('handles special characters in query parameters', () => { + Object.defineProperty(window, 'location', { + value: { search: '?keyword=hello%20world&filter=type%3Apdf&tag=%E4%B8%AD%E6%96%87' }, + writable: true, + }) + + const path = createNavigationPath('/datasets/123/documents') + expect(path).toContain('hello+world') + expect(path).toContain('type%3Apdf') + expect(path).toContain('%E4%B8%AD%E6%96%87') + }) + + test('handles duplicate query parameters', () => { + Object.defineProperty(window, 'location', { + value: { search: '?tag=tag1&tag=tag2&tag=tag3' }, + writable: true, + }) + + const params = extractQueryParams(['tag']) + // URLSearchParams.get() returns the first value + expect(params.tag).toBe('tag1') + }) + + test('handles very long query strings', () => { + const longValue = 'a'.repeat(1000) + Object.defineProperty(window, 'location', { + value: { search: `?data=${longValue}` }, + writable: true, + }) + + const path = createNavigationPath('/datasets/123/documents') + expect(path).toContain(longValue) + expect(path.length).toBeGreaterThan(1000) + }) + + test('handles empty string values in query parameters', () => { + const path = createNavigationPathWithParams('/datasets/123/documents', { + page: 1, + keyword: '', + filter: '', + sort: 'name', + }) + + expect(path).toBe('/datasets/123/documents?page=1&sort=name') + expect(path).not.toContain('keyword=') + expect(path).not.toContain('filter=') + }) + + test('handles null and undefined values in mergeQueryParams', () => { + Object.defineProperty(window, 'location', { + value: { search: '?page=1&limit=10&keyword=test' }, + writable: true, + }) + + const merged = mergeQueryParams({ + keyword: null, + filter: undefined, + sort: 'name', + }) + const result = merged.toString() + + expect(result).toContain('page=1') + expect(result).toContain('limit=10') + expect(result).not.toContain('keyword') + expect(result).toContain('sort=name') + }) + + test('handles navigation with hash fragments', () => { + Object.defineProperty(window, 'location', { + value: { search: '?page=1', hash: '#section-2' }, + writable: true, + }) + + const path = createNavigationPath('/datasets/123/documents') + // Should preserve query params but not hash + expect(path).toBe('/datasets/123/documents?page=1') + }) + + test('handles malformed query strings gracefully', () => { + Object.defineProperty(window, 'location', { + value: { search: '?page=1&invalid&limit=10&=value&key=' }, + writable: true, + }) + + const params = extractQueryParams(['page', 'limit', 'invalid', 'key']) + expect(params.page).toBe('1') + expect(params.limit).toBe('10') + // Malformed params should be handled by URLSearchParams + expect(params.invalid).toBe('') // for `&invalid` + expect(params.key).toBe('') // for `&key=` + }) + }) + + describe('Performance Tests', () => { + test('handles large number of query parameters efficiently', () => { + const manyParams = Array.from({ length: 50 }, (_, i) => `param${i}=value${i}`).join('&') + Object.defineProperty(window, 'location', { + value: { search: `?${manyParams}` }, + writable: true, + }) + + const startTime = Date.now() + const path = createNavigationPath('/datasets/123/documents') + const endTime = Date.now() + + expect(endTime - startTime).toBeLessThan(50) // Should be fast + expect(path).toContain('param0=value0') + expect(path).toContain('param49=value49') + }) + }) }) diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/card-view.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/card-view.tsx index d404da0461..fb431c5ac8 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/card-view.tsx +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/card-view.tsx @@ -1,6 +1,6 @@ 'use client' import type { FC } from 'react' -import React from 'react' +import React, { useCallback, useMemo } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' import AppCard from '@/app/components/app/overview/app-card' @@ -15,11 +15,16 @@ import { updateAppSiteStatus, } from '@/service/apps' import type { App } from '@/types/app' +import { AppModeEnum } from '@/types/app' import type { UpdateAppSiteCodeResponse } from '@/models/app' import { asyncRunSafe } from '@/utils' import { NEED_REFRESH_APP_LIST_KEY } from '@/config' import type { IAppCardProps } from '@/app/components/app/overview/app-card' import { useStore as useAppStore } from '@/app/components/app/store' +import { useAppWorkflow } from '@/service/use-workflow' +import type { BlockEnum } from '@/app/components/workflow/types' +import { isTriggerNode } from '@/app/components/workflow/types' +import { useDocLink } from '@/context/i18n' export type ICardViewProps = { appId: string @@ -29,12 +34,56 @@ export type ICardViewProps = { const CardView: FC = ({ appId, isInPanel, className }) => { const { t } = useTranslation() + const docLink = useDocLink() const { notify } = useContext(ToastContext) const appDetail = useAppStore(state => state.appDetail) const setAppDetail = useAppStore(state => state.setAppDetail) + const isWorkflowApp = appDetail?.mode === AppModeEnum.WORKFLOW const showMCPCard = isInPanel - const showTriggerCard = isInPanel && appDetail?.mode === 'workflow' + const showTriggerCard = isInPanel && isWorkflowApp + const { data: currentWorkflow } = useAppWorkflow(isWorkflowApp ? appDetail.id : '') + const hasTriggerNode = useMemo(() => { + if (!isWorkflowApp) + return false + if (!currentWorkflow) + return null + const nodes = currentWorkflow.graph?.nodes || [] + return nodes.some((node) => { + const nodeType = node.data?.type as BlockEnum | undefined + return !!nodeType && isTriggerNode(nodeType) + }) + }, [isWorkflowApp, currentWorkflow]) + const shouldRenderAppCards = !isWorkflowApp || hasTriggerNode === false + const disableAppCards = !shouldRenderAppCards + + const triggerDocUrl = docLink('/guides/workflow/node/start') + const buildTriggerModeMessage = useCallback((featureName: string) => ( +
+
+ {t('appOverview.overview.disableTooltip.triggerMode', { feature: featureName })} +
+
{ + event.stopPropagation() + window.open(triggerDocUrl, '_blank') + }} + > + {t('appOverview.overview.appInfo.enableTooltip.learnMore')} +
+
+ ), [t, triggerDocUrl]) + + const disableWebAppTooltip = disableAppCards + ? buildTriggerModeMessage(t('appOverview.overview.appInfo.title')) + : null + const disableApiTooltip = disableAppCards + ? buildTriggerModeMessage(t('appOverview.overview.apiInfo.title')) + : null + const disableMcpTooltip = disableAppCards + ? buildTriggerModeMessage(t('tools.mcp.server.title')) + : null const updateAppDetail = async () => { try { @@ -106,12 +155,14 @@ const CardView: FC = ({ appId, isInPanel, className }) => { if (!appDetail) return - return ( -
+ const appCards = ( + <> = ({ appId, isInPanel, className }) => { cardType="api" appInfo={appDetail} isInPanel={isInPanel} + triggerModeDisabled={disableAppCards} + triggerModeMessage={disableApiTooltip} onChangeStatus={onChangeApiStatus} /> {showMCPCard && ( )} - {showTriggerCard && ( - - )} + + ) + + const triggerCardNode = showTriggerCard ? ( + + ) : null + + return ( +
+ {disableAppCards && triggerCardNode} + {appCards} + {!disableAppCards && triggerCardNode}
) } diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/chart-view.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/chart-view.tsx index 6d5e74fb90..64cd2fbd28 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/chart-view.tsx +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/chart-view.tsx @@ -1,20 +1,26 @@ 'use client' -import { TIME_PERIOD_MAPPING } from '@/app/components/app/log/filter' +import React, { useState } from 'react' +import dayjs from 'dayjs' +import quarterOfYear from 'dayjs/plugin/quarterOfYear' +import { useTranslation } from 'react-i18next' import type { PeriodParams } from '@/app/components/app/overview/app-chart' import { AvgResponseTime, AvgSessionInteractions, AvgUserInteractions, ConversationsChart, CostChart, EndUsersChart, MessagesChart, TokenPerSecond, UserSatisfactionRate, WorkflowCostChart, WorkflowDailyTerminalsChart, WorkflowMessagesChart } from '@/app/components/app/overview/app-chart' import { useStore as useAppStore } from '@/app/components/app/store' -import type { Item } from '@/app/components/base/select' -import { SimpleSelect } from '@/app/components/base/select' -import { AppModeEnum } from '@/types/app' -import dayjs from 'dayjs' -import quarterOfYear from 'dayjs/plugin/quarterOfYear' -import React, { useState } from 'react' -import { useTranslation } from 'react-i18next' +import TimeRangePicker from './time-range-picker' +import { TIME_PERIOD_MAPPING as LONG_TIME_PERIOD_MAPPING } from '@/app/components/app/log/filter' +import { IS_CLOUD_EDITION } from '@/config' +import LongTimeRangePicker from './long-time-range-picker' dayjs.extend(quarterOfYear) const today = dayjs() +const TIME_PERIOD_MAPPING = [ + { value: 0, name: 'today' }, + { value: 7, name: 'last7days' }, + { value: 30, name: 'last30days' }, +] + const queryDateFormat = 'YYYY-MM-DD HH:mm' export type IChartViewProps = { @@ -25,23 +31,12 @@ export type IChartViewProps = { export default function ChartView({ appId, headerRight }: IChartViewProps) { const { t } = useTranslation() const appDetail = useAppStore(state => state.appDetail) - const isChatApp = appDetail?.mode !== AppModeEnum.COMPLETION && appDetail?.mode !== AppModeEnum.WORKFLOW - const isWorkflow = appDetail?.mode === AppModeEnum.WORKFLOW - const [period, setPeriod] = useState({ name: t('appLog.filter.period.last7days'), query: { start: today.subtract(7, 'day').startOf('day').format(queryDateFormat), end: today.endOf('day').format(queryDateFormat) } }) - - const onSelect = (item: Item) => { - if (item.value === -1) { - setPeriod({ name: item.name, query: undefined }) - } - else if (item.value === 0) { - const startOfToday = today.startOf('day').format(queryDateFormat) - const endOfToday = today.endOf('day').format(queryDateFormat) - setPeriod({ name: item.name, query: { start: startOfToday, end: endOfToday } }) - } - else { - setPeriod({ name: item.name, query: { start: today.subtract(item.value as number, 'day').startOf('day').format(queryDateFormat), end: today.endOf('day').format(queryDateFormat) } }) - } - } + const isChatApp = appDetail?.mode !== 'completion' && appDetail?.mode !== 'workflow' + const isWorkflow = appDetail?.mode === 'workflow' + const [period, setPeriod] = useState(IS_CLOUD_EDITION + ? { name: t('appLog.filter.period.today'), query: { start: today.startOf('day').format(queryDateFormat), end: today.endOf('day').format(queryDateFormat) } } + : { name: t('appLog.filter.period.last7days'), query: { start: today.subtract(7, 'day').startOf('day').format(queryDateFormat), end: today.endOf('day').format(queryDateFormat) } }, + ) if (!appDetail) return null @@ -51,20 +46,20 @@ export default function ChartView({ appId, headerRight }: IChartViewProps) {
{t('common.appMenus.overview')}
-
- ({ value: k, name: t(`appLog.filter.period.${v.name}`) }))} - className='mt-0 !w-40' - notClearable={true} - onSelect={(item) => { - const id = item.value - const value = TIME_PERIOD_MAPPING[id]?.value ?? '-1' - const name = item.name || t('appLog.filter.period.allTime') - onSelect({ value, name }) - }} - defaultValue={'2'} + {IS_CLOUD_EDITION ? ( + -
+ ) : ( + + )} + {headerRight}
diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/long-time-range-picker.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/long-time-range-picker.tsx new file mode 100644 index 0000000000..cad4d41a0e --- /dev/null +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/long-time-range-picker.tsx @@ -0,0 +1,63 @@ +'use client' +import type { PeriodParams } from '@/app/components/app/overview/app-chart' +import type { FC } from 'react' +import React from 'react' +import type { Item } from '@/app/components/base/select' +import { SimpleSelect } from '@/app/components/base/select' +import { useTranslation } from 'react-i18next' +import dayjs from 'dayjs' +type Props = { + periodMapping: { [key: string]: { value: number; name: string } } + onSelect: (payload: PeriodParams) => void + queryDateFormat: string +} + +const today = dayjs() + +const LongTimeRangePicker: FC = ({ + periodMapping, + onSelect, + queryDateFormat, +}) => { + const { t } = useTranslation() + + const handleSelect = React.useCallback((item: Item) => { + const id = item.value + const value = periodMapping[id]?.value ?? '-1' + const name = item.name || t('appLog.filter.period.allTime') + if (value === -1) { + onSelect({ name: t('appLog.filter.period.allTime'), query: undefined }) + } + else if (value === 0) { + const startOfToday = today.startOf('day').format(queryDateFormat) + const endOfToday = today.endOf('day').format(queryDateFormat) + onSelect({ + name, + query: { + start: startOfToday, + end: endOfToday, + }, + }) + } + else { + onSelect({ + name, + query: { + start: today.subtract(value as number, 'day').startOf('day').format(queryDateFormat), + end: today.endOf('day').format(queryDateFormat), + }, + }) + } + }, [onSelect, periodMapping, queryDateFormat, t]) + + return ( + ({ value: k, name: t(`appLog.filter.period.${v.name}`) }))} + className='mt-0 !w-40' + notClearable={true} + onSelect={handleSelect} + defaultValue={'2'} + /> + ) +} +export default React.memo(LongTimeRangePicker) diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/date-picker.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/date-picker.tsx new file mode 100644 index 0000000000..2bfdece433 --- /dev/null +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/date-picker.tsx @@ -0,0 +1,80 @@ +'use client' +import { RiCalendarLine } from '@remixicon/react' +import type { Dayjs } from 'dayjs' +import type { FC } from 'react' +import React, { useCallback } from 'react' +import cn from '@/utils/classnames' +import { formatToLocalTime } from '@/utils/format' +import { useI18N } from '@/context/i18n' +import Picker from '@/app/components/base/date-and-time-picker/date-picker' +import type { TriggerProps } from '@/app/components/base/date-and-time-picker/types' +import { noop } from 'lodash-es' +import dayjs from 'dayjs' + +type Props = { + start: Dayjs + end: Dayjs + onStartChange: (date?: Dayjs) => void + onEndChange: (date?: Dayjs) => void +} + +const today = dayjs() +const DatePicker: FC = ({ + start, + end, + onStartChange, + onEndChange, +}) => { + const { locale } = useI18N() + + const renderDate = useCallback(({ value, handleClickTrigger, isOpen }: TriggerProps) => { + return ( +
+ {value ? formatToLocalTime(value, locale, 'MMM D') : ''} +
+ ) + }, [locale]) + + const availableStartDate = end.subtract(30, 'day') + const startDateDisabled = useCallback((date: Dayjs) => { + if (date.isAfter(today, 'date')) + return true + return !((date.isAfter(availableStartDate, 'date') || date.isSame(availableStartDate, 'date')) && (date.isBefore(end, 'date') || date.isSame(end, 'date'))) + }, [availableStartDate, end]) + + const availableEndDate = start.add(30, 'day') + const endDateDisabled = useCallback((date: Dayjs) => { + if (date.isAfter(today, 'date')) + return true + return !((date.isAfter(start, 'date') || date.isSame(start, 'date')) && (date.isBefore(availableEndDate, 'date') || date.isSame(availableEndDate, 'date'))) + }, [availableEndDate, start]) + + return ( +
+
+ +
+ + - + +
+ + ) +} +export default React.memo(DatePicker) diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/index.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/index.tsx new file mode 100644 index 0000000000..4738bdeebf --- /dev/null +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/index.tsx @@ -0,0 +1,86 @@ +'use client' +import type { PeriodParams, PeriodParamsWithTimeRange } from '@/app/components/app/overview/app-chart' +import type { FC } from 'react' +import React, { useCallback, useState } from 'react' +import type { Dayjs } from 'dayjs' +import { HourglassShape } from '@/app/components/base/icons/src/vender/other' +import RangeSelector from './range-selector' +import DatePicker from './date-picker' +import dayjs from 'dayjs' +import { useI18N } from '@/context/i18n' +import { formatToLocalTime } from '@/utils/format' + +const today = dayjs() + +type Props = { + ranges: { value: number; name: string }[] + onSelect: (payload: PeriodParams) => void + queryDateFormat: string +} + +const TimeRangePicker: FC = ({ + ranges, + onSelect, + queryDateFormat, +}) => { + const { locale } = useI18N() + + const [isCustomRange, setIsCustomRange] = useState(false) + const [start, setStart] = useState(today) + const [end, setEnd] = useState(today) + + const handleRangeChange = useCallback((payload: PeriodParamsWithTimeRange) => { + setIsCustomRange(false) + setStart(payload.query!.start) + setEnd(payload.query!.end) + onSelect({ + name: payload.name, + query: { + start: payload.query!.start.format(queryDateFormat), + end: payload.query!.end.format(queryDateFormat), + }, + }) + }, [onSelect, queryDateFormat]) + + const handleDateChange = useCallback((type: 'start' | 'end') => { + return (date?: Dayjs) => { + if (!date) return + if (type === 'start' && date.isSame(start)) return + if (type === 'end' && date.isSame(end)) return + if (type === 'start') + setStart(date) + else + setEnd(date) + + const currStart = type === 'start' ? date : start + const currEnd = type === 'end' ? date : end + onSelect({ + name: `${formatToLocalTime(currStart, locale, 'MMM D')} - ${formatToLocalTime(currEnd, locale, 'MMM D')}`, + query: { + start: currStart.format(queryDateFormat), + end: currEnd.format(queryDateFormat), + }, + }) + + setIsCustomRange(true) + } + }, [start, end, onSelect, locale, queryDateFormat]) + + return ( +
+ + + +
+ ) +} +export default React.memo(TimeRangePicker) diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/range-selector.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/range-selector.tsx new file mode 100644 index 0000000000..f99ea52492 --- /dev/null +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/range-selector.tsx @@ -0,0 +1,81 @@ +'use client' +import type { PeriodParamsWithTimeRange, TimeRange } from '@/app/components/app/overview/app-chart' +import type { FC } from 'react' +import React, { useCallback } from 'react' +import { SimpleSelect } from '@/app/components/base/select' +import type { Item } from '@/app/components/base/select' +import dayjs from 'dayjs' +import { RiArrowDownSLine, RiCheckLine } from '@remixicon/react' +import cn from '@/utils/classnames' +import { useTranslation } from 'react-i18next' + +const today = dayjs() + +type Props = { + isCustomRange: boolean + ranges: { value: number; name: string }[] + onSelect: (payload: PeriodParamsWithTimeRange) => void +} + +const RangeSelector: FC = ({ + isCustomRange, + ranges, + onSelect, +}) => { + const { t } = useTranslation() + + const handleSelectRange = useCallback((item: Item) => { + const { name, value } = item + let period: TimeRange | null = null + if (value === 0) { + const startOfToday = today.startOf('day') + const endOfToday = today.endOf('day') + period = { start: startOfToday, end: endOfToday } + } + else { + period = { start: today.subtract(item.value as number, 'day').startOf('day'), end: today.endOf('day') } + } + onSelect({ query: period!, name }) + }, [onSelect]) + + const renderTrigger = useCallback((item: Item | null, isOpen: boolean) => { + return ( +
+
{isCustomRange ? t('appLog.filter.period.custom') : item?.name}
+ +
+ ) + }, [isCustomRange]) + + const renderOption = useCallback(({ item, selected }: { item: Item; selected: boolean }) => { + return ( + <> + {selected && ( + + + )} + {item.name} + + ) + }, []) + return ( + ({ ...v, name: t(`appLog.filter.period.${v.name}`) }))} + className='mt-0 !w-40' + notClearable={true} + onSelect={handleSelectRange} + defaultValue={0} + wrapperClassName='h-8' + optionWrapClassName='w-[200px] translate-x-[-24px]' + renderTrigger={renderTrigger} + optionClassName='flex items-center py-0 pl-7 pr-2 h-8' + renderOption={renderOption} + /> + ) +} +export default React.memo(RangeSelector) diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/provider-config-modal.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/provider-config-modal.tsx index 9682bf6a07..5933e73e66 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/provider-config-modal.tsx +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/provider-config-modal.tsx @@ -532,7 +532,7 @@ const ProviderConfigModal: FC = ({ > {t('common.operation.remove')} - + )} + setVerifyCode(e.target.value)} + maxLength={6} + className='mt-1' + placeholder={t('login.checkCode.verificationCodePlaceholder') || ''} + /> +
diff --git a/web/app/(shareLayout)/webapp-signin/components/mail-and-password-auth.tsx b/web/app/(shareLayout)/webapp-signin/components/mail-and-password-auth.tsx index ce220b103e..0136445ac9 100644 --- a/web/app/(shareLayout)/webapp-signin/components/mail-and-password-auth.tsx +++ b/web/app/(shareLayout)/webapp-signin/components/mail-and-password-auth.tsx @@ -10,6 +10,7 @@ import { emailRegex } from '@/config' import { webAppLogin } from '@/service/common' import Input from '@/app/components/base/input' import I18NContext from '@/context/i18n' +import { useWebAppStore } from '@/context/web-app-context' import { noop } from 'lodash-es' import { fetchAccessToken } from '@/service/share' import { setWebAppAccessToken, setWebAppPassport } from '@/service/webapp-auth' @@ -30,6 +31,7 @@ export default function MailAndPasswordAuth({ isEmailSetup }: MailAndPasswordAut const [isLoading, setIsLoading] = useState(false) const redirectUrl = searchParams.get('redirect_url') + const embeddedUserId = useWebAppStore(s => s.embeddedUserId) const getAppCodeFromRedirectUrl = useCallback(() => { if (!redirectUrl) @@ -82,7 +84,10 @@ export default function MailAndPasswordAuth({ isEmailSetup }: MailAndPasswordAut if (res.result === 'success') { setWebAppAccessToken(res.data.access_token) - const { access_token } = await fetchAccessToken({ appCode: appCode! }) + const { access_token } = await fetchAccessToken({ + appCode: appCode!, + userId: embeddedUserId || undefined, + }) setWebAppPassport(appCode!, access_token) router.replace(decodeURIComponent(redirectUrl)) } diff --git a/web/app/account/oauth/authorize/constants.ts b/web/app/account/oauth/authorize/constants.ts new file mode 100644 index 0000000000..f1d8b98ef4 --- /dev/null +++ b/web/app/account/oauth/authorize/constants.ts @@ -0,0 +1,3 @@ +export const OAUTH_AUTHORIZE_PENDING_KEY = 'oauth_authorize_pending' +export const REDIRECT_URL_KEY = 'oauth_redirect_url' +export const OAUTH_AUTHORIZE_PENDING_TTL = 60 * 3 diff --git a/web/app/account/oauth/authorize/page.tsx b/web/app/account/oauth/authorize/page.tsx index 4aa5fa0b8e..c9b26b97c1 100644 --- a/web/app/account/oauth/authorize/page.tsx +++ b/web/app/account/oauth/authorize/page.tsx @@ -19,11 +19,11 @@ import { } from '@remixicon/react' import dayjs from 'dayjs' import { useIsLogin } from '@/service/use-common' - -export const OAUTH_AUTHORIZE_PENDING_KEY = 'oauth_authorize_pending' -export const REDIRECT_URL_KEY = 'oauth_redirect_url' - -const OAUTH_AUTHORIZE_PENDING_TTL = 60 * 3 +import { + OAUTH_AUTHORIZE_PENDING_KEY, + OAUTH_AUTHORIZE_PENDING_TTL, + REDIRECT_URL_KEY, +} from './constants' function setItemWithExpiry(key: string, value: string, ttl: number) { const item = { diff --git a/web/app/components/app-sidebar/app-info.tsx b/web/app/components/app-sidebar/app-info.tsx index c2bda8d8fc..f143c2fcef 100644 --- a/web/app/components/app-sidebar/app-info.tsx +++ b/web/app/components/app-sidebar/app-info.tsx @@ -239,7 +239,7 @@ const AppInfo = ({ expand, onlyShowDetail = false, openState = false, onDetailEx const secondaryOperations: Operation[] = [ // Import DSL (conditional) - ...(appDetail.mode !== AppModeEnum.AGENT_CHAT && (appDetail.mode === AppModeEnum.ADVANCED_CHAT || appDetail.mode === AppModeEnum.WORKFLOW)) ? [{ + ...(appDetail.mode === AppModeEnum.ADVANCED_CHAT || appDetail.mode === AppModeEnum.WORKFLOW) ? [{ id: 'import', title: t('workflow.common.importDSL'), icon: , @@ -271,7 +271,7 @@ const AppInfo = ({ expand, onlyShowDetail = false, openState = false, onDetailEx ] // Keep the switch operation separate as it's not part of the main operations - const switchOperation = (appDetail.mode !== AppModeEnum.AGENT_CHAT && (appDetail.mode === AppModeEnum.COMPLETION || appDetail.mode === AppModeEnum.CHAT)) ? { + const switchOperation = (appDetail.mode === AppModeEnum.COMPLETION || appDetail.mode === AppModeEnum.CHAT) ? { id: 'switch', title: t('app.switch'), icon: , diff --git a/web/app/components/app-sidebar/app-operations.tsx b/web/app/components/app-sidebar/app-operations.tsx index 07b982502d..ca0ff85316 100644 --- a/web/app/components/app-sidebar/app-operations.tsx +++ b/web/app/components/app-sidebar/app-operations.tsx @@ -1,107 +1,213 @@ import type { JSX } from 'react' -import { cloneElement, useCallback } from 'react' -import { useState } from 'react' +import { cloneElement, useCallback, useEffect, useMemo, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import Button from '@/app/components/base/button' import { PortalToFollowElem, PortalToFollowElemContent, PortalToFollowElemTrigger } from '../base/portal-to-follow-elem' -import Divider from '@/app/components/base/divider' -import { RiMoreFill } from '@remixicon/react' -import cn from '@/utils/classnames' +import { RiMoreLine } from '@remixicon/react' export type Operation = { id: string title: string icon: JSX.Element onClick: () => void - type?: 'action' | 'divider' - className?: string + type?: 'divider' } -const AppOperations = ({ primaryOperations, secondaryOperations, gap }: { - primaryOperations: Operation[] - secondaryOperations: Operation[] +type AppOperationsProps = { gap: number -}) => { - const { t } = useTranslation() - const [showMore, setShowMore] = useState(false) - const handleTriggerMore = useCallback(() => { - setShowMore(prev => !prev) - }, []) + operations?: Operation[] + primaryOperations?: Operation[] + secondaryOperations?: Operation[] +} - const renderSecondaryOperation = (operation: Operation, index: number) => { - if (operation.type === 'divider') { - return ( - - ) +const EMPTY_OPERATIONS: Operation[] = [] + +const AppOperations = ({ + operations, + primaryOperations, + secondaryOperations, + gap, +}: AppOperationsProps) => { + const { t } = useTranslation() + const [visibleOpreations, setVisibleOperations] = useState([]) + const [moreOperations, setMoreOperations] = useState([]) + const [showMore, setShowMore] = useState(false) + const navRef = useRef(null) + const handleTriggerMore = useCallback(() => { + setShowMore(true) + }, [setShowMore]) + + const primaryOps = useMemo(() => { + if (operations) + return operations + if (primaryOperations) + return primaryOperations + return EMPTY_OPERATIONS + }, [operations, primaryOperations]) + + const secondaryOps = useMemo(() => { + if (operations) + return EMPTY_OPERATIONS + if (secondaryOperations) + return secondaryOperations + return EMPTY_OPERATIONS + }, [operations, secondaryOperations]) + const inlineOperations = primaryOps.filter(operation => operation.type !== 'divider') + + useEffect(() => { + const applyState = (visible: Operation[], overflow: Operation[]) => { + const combinedMore = [...overflow, ...secondaryOps] + if (!overflow.length && combinedMore[0]?.type === 'divider') + combinedMore.shift() + setVisibleOperations(visible) + setMoreOperations(combinedMore) } - return ( -
{ - setShowMore(false) - operation.onClick() - }} - > - {cloneElement(operation.icon, { - className: 'h-4 w-4 text-text-tertiary', - })} - - {operation.title} - -
- ) - } + const inline = primaryOps.filter(operation => operation.type !== 'divider') + + if (!inline.length) { + applyState([], []) + return + } + + const navElement = navRef.current + const moreElement = document.getElementById('more-measure') + + if (!navElement || !moreElement) + return + + let width = 0 + const containerWidth = navElement.clientWidth + const moreWidth = moreElement.clientWidth + + if (containerWidth === 0 || moreWidth === 0) + return + + const updatedEntries: Record = inline.reduce((pre, cur) => { + pre[cur.id] = false + return pre + }, {} as Record) + const childrens = Array.from(navElement.children).slice(0, -1) + for (let i = 0; i < childrens.length; i++) { + const child = childrens[i] as HTMLElement + const id = child.dataset.targetid + if (!id) break + const childWidth = child.clientWidth + + if (width + gap + childWidth + moreWidth <= containerWidth) { + updatedEntries[id] = true + width += gap + childWidth + } + else { + if (i === childrens.length - 1 && width + childWidth <= containerWidth) + updatedEntries[id] = true + else + updatedEntries[id] = false + break + } + } + + const visible = inline.filter(item => updatedEntries[item.id]) + const overflow = inline.filter(item => !updatedEntries[item.id]) + + applyState(visible, overflow) + }, [gap, primaryOps, secondaryOps]) + + const shouldShowMoreButton = moreOperations.length > 0 return ( -
- {/* Fixed primary operations */} - {primaryOperations.map(operation => + <> + + +
+
+ {visibleOpreations.map(operation => ( + + ))} + {shouldShowMoreButton && ( + + + + + +
+ {moreOperations.map(item => item.type === 'divider' + ? ( +
+ ) + : ( +
+ {cloneElement(item.icon, { className: 'h-4 w-4 text-text-tertiary' })} + {item.title} +
+ ))} +
+ + + )} +
+ ) } diff --git a/web/app/components/app/annotation/index.tsx b/web/app/components/app/annotation/index.tsx index 8718890e35..32d0c799fc 100644 --- a/web/app/components/app/annotation/index.tsx +++ b/web/app/components/app/annotation/index.tsx @@ -139,7 +139,7 @@ const Annotation: FC = (props) => { return (

{t('appLog.description')}

-
+
{isChatApp && ( diff --git a/web/app/components/app/annotation/list.tsx b/web/app/components/app/annotation/list.tsx index 70ecedb869..4135b4362e 100644 --- a/web/app/components/app/annotation/list.tsx +++ b/web/app/components/app/annotation/list.tsx @@ -54,95 +54,97 @@ const List: FC = ({ }, [isAllSelected, list, selectedIds, onSelectedIdsChange]) return ( -
- - - - - - - - - - - - - {list.map(item => ( - { - onView(item) - } - } - > - + {list.map(item => ( + { + onView(item) + } + } + > + + + + + + + + ))} + +
- - {t('appAnnotation.table.header.question')}{t('appAnnotation.table.header.answer')}{t('appAnnotation.table.header.createdAt')}{t('appAnnotation.table.header.hits')}{t('appAnnotation.table.header.actions')}
e.stopPropagation()}> + <> +
+ + + + - - - - - + + + + + - ))} - -
{ - if (selectedIds.includes(item.id)) - onSelectedIdsChange(selectedIds.filter(id => id !== item.id)) - else - onSelectedIdsChange([...selectedIds, item.id]) - }} + checked={isAllSelected} + indeterminate={!isAllSelected && isSomeSelected} + onCheck={handleSelectAll} /> {item.question}{item.answer}{formatTime(item.created_at, t('appLog.dateTimeFormat') as string)}{item.hit_count} e.stopPropagation()}> - {/* Actions */} -
- onView(item)}> - - - { - setCurrId(item.id) - setShowConfirmDelete(true) - }} - > - - -
-
{t('appAnnotation.table.header.question')}{t('appAnnotation.table.header.answer')}{t('appAnnotation.table.header.createdAt')}{t('appAnnotation.table.header.hits')}{t('appAnnotation.table.header.actions')}
- setShowConfirmDelete(false)} - onRemove={() => { - onRemove(currId as string) - setShowConfirmDelete(false) - }} - /> + +
e.stopPropagation()}> + { + if (selectedIds.includes(item.id)) + onSelectedIdsChange(selectedIds.filter(id => id !== item.id)) + else + onSelectedIdsChange([...selectedIds, item.id]) + }} + /> + {item.question}{item.answer}{formatTime(item.created_at, t('appLog.dateTimeFormat') as string)}{item.hit_count} e.stopPropagation()}> + {/* Actions */} +
+ onView(item)}> + + + { + setCurrId(item.id) + setShowConfirmDelete(true) + }} + > + + +
+
+ setShowConfirmDelete(false)} + onRemove={() => { + onRemove(currId as string) + setShowConfirmDelete(false) + }} + /> +
{selectedIds.length > 0 && ( )} -
+ ) } export default React.memo(List) diff --git a/web/app/components/app/app-publisher/index.tsx b/web/app/components/app/app-publisher/index.tsx index caa957b359..a11af3b816 100644 --- a/web/app/components/app/app-publisher/index.tsx +++ b/web/app/components/app/app-publisher/index.tsx @@ -1,3 +1,33 @@ +import { + memo, + useCallback, + useEffect, + useMemo, + useState, +} from 'react' +import { useTranslation } from 'react-i18next' +import { + RiArrowDownSLine, + RiArrowRightSLine, + RiBuildingLine, + RiGlobalLine, + RiLockLine, + RiPlanetLine, + RiPlayCircleLine, + RiPlayList2Line, + RiTerminalBoxLine, + RiVerifiedBadgeLine, +} from '@remixicon/react' +import { useKeyPress } from 'ahooks' +import Divider from '../../base/divider' +import Loading from '../../base/loading' +import Toast from '../../base/toast' +import Tooltip from '../../base/tooltip' +import { getKeyboardKeyCodeBySystem, getKeyboardKeyNameBySystem } from '../../workflow/utils' +import AccessControl from '../app-access-control' +import type { ModelAndParameter } from '../configuration/debug/types' +import PublishWithMultipleModel from './publish-with-multiple-model' +import SuggestedAction from './suggested-action' import EmbeddedModal from '@/app/components/app/overview/embedded' import { useStore as useAppStore } from '@/app/components/app/store' import Button from '@/app/components/base/button' @@ -14,41 +44,12 @@ import { useGlobalPublicStore } from '@/context/global-public-context' import { useFormatTimeFromNow } from '@/hooks/use-format-time-from-now' import { AccessMode } from '@/models/access-control' import { useAppWhiteListSubjects, useGetUserCanAccessApp } from '@/service/access-control' -import { fetchAppDetail } from '@/service/apps' +import { fetchAppDetailDirect } from '@/service/apps' import { fetchInstalledAppList } from '@/service/explore' import { AppModeEnum } from '@/types/app' import type { PublishWorkflowParams } from '@/types/workflow' import { basePath } from '@/utils/var' -import { - RiArrowDownSLine, - RiArrowRightSLine, - RiBuildingLine, - RiGlobalLine, - RiLockLine, - RiPlanetLine, - RiPlayCircleLine, - RiPlayList2Line, - RiTerminalBoxLine, - RiVerifiedBadgeLine, -} from '@remixicon/react' -import { useKeyPress } from 'ahooks' -import { - memo, - useCallback, - useEffect, - useMemo, - useState, -} from 'react' -import { useTranslation } from 'react-i18next' -import Divider from '../../base/divider' -import Loading from '../../base/loading' -import Toast from '../../base/toast' -import Tooltip from '../../base/tooltip' -import { getKeyboardKeyCodeBySystem, getKeyboardKeyNameBySystem } from '../../workflow/utils' -import AccessControl from '../app-access-control' -import type { ModelAndParameter } from '../configuration/debug/types' -import PublishWithMultipleModel from './publish-with-multiple-model' -import SuggestedAction from './suggested-action' +import UpgradeBtn from '@/app/components/billing/upgrade-btn' const ACCESS_MODE_MAP: Record = { [AccessMode.ORGANIZATION]: { @@ -105,6 +106,8 @@ export type AppPublisherProps = { onRefreshData?: () => void workflowToolAvailable?: boolean missingStartNode?: boolean + hasTriggerNode?: boolean // Whether workflow currently contains any trigger nodes (used to hide missing-start CTA when triggers exist). + startNodeLimitExceeded?: boolean } const PUBLISH_SHORTCUT = ['ctrl', '⇧', 'P'] @@ -125,6 +128,8 @@ const AppPublisher = ({ onRefreshData, workflowToolAvailable = true, missingStartNode = false, + hasTriggerNode = false, + startNodeLimitExceeded = false, }: AppPublisherProps) => { const { t } = useTranslation() @@ -222,11 +227,16 @@ const AppPublisher = ({ } }, [appDetail?.id]) - const handleAccessControlUpdate = useCallback(() => { - fetchAppDetail({ url: '/apps', id: appDetail!.id }).then((res) => { + const handleAccessControlUpdate = useCallback(async () => { + if (!appDetail) + return + try { + const res = await fetchAppDetailDirect({ url: '/apps', id: appDetail.id }) setAppDetail(res) + } + finally { setShowAppAccessControl(false) - }) + } }, [appDetail, setAppDetail]) useKeyPress(`${getKeyboardKeyCodeBySystem('ctrl')}.shift.p`, (e) => { @@ -239,6 +249,13 @@ const AppPublisher = ({ const hasPublishedVersion = !!publishedAt const workflowToolDisabled = !hasPublishedVersion || !workflowToolAvailable const workflowToolMessage = workflowToolDisabled ? t('workflow.common.workflowAsToolDisabledHint') : undefined + const showStartNodeLimitHint = Boolean(startNodeLimitExceeded) + const upgradeHighlightStyle = useMemo(() => ({ + background: 'linear-gradient(97deg, var(--components-input-border-active-prompt-1, rgba(11, 165, 236, 0.95)) -3.64%, var(--components-input-border-active-prompt-2, rgba(21, 90, 239, 0.95)) 45.14%)', + WebkitBackgroundClip: 'text', + backgroundClip: 'text', + WebkitTextFillColor: 'transparent', + }), []) return ( <> @@ -297,29 +314,49 @@ const AppPublisher = ({ /> ) : ( -
- ) - } - + ) + } + + {showStartNodeLimitHint && ( +
+

+ {t('workflow.publishLimit.startNodeTitlePrefix')} + {t('workflow.publishLimit.startNodeTitleSuffix')} +

+

+ {t('workflow.publishLimit.startNodeDesc')} +

+ +
+ )} + ) }
@@ -345,84 +382,88 @@ const AppPublisher = ({
{!isAppAccessSet &&

{t('app.publishApp.notSetDesc')}

}
} -
- - } - > - {t('workflow.common.runApp')} - - - {appDetail?.mode === AppModeEnum.WORKFLOW || appDetail?.mode === AppModeEnum.COMPLETION - ? ( + { + // Hide run/batch run app buttons when there is a trigger node. + !hasTriggerNode && ( +
} + link={appURL} + icon={} > - {t('workflow.common.batchRunApp')} + {t('workflow.common.runApp')} - ) - : ( - { - setEmbeddingModalOpen(true) - handleTrigger() - }} - disabled={!publishedAt} - icon={} - > - {t('workflow.common.embedIntoSite')} - - )} - - { - if (publishedAt) - handleOpenInExplore() - }} - disabled={disabledFunctionButton} - icon={} - > - {t('workflow.common.openInExplore')} - - - - } - > - {t('workflow.common.accessAPIReference')} - - - {appDetail?.mode === AppModeEnum.WORKFLOW && ( - + {appDetail?.mode === AppModeEnum.WORKFLOW || appDetail?.mode === AppModeEnum.COMPLETION + ? ( + + } + > + {t('workflow.common.batchRunApp')} + + + ) + : ( + { + setEmbeddingModalOpen(true) + handleTrigger() + }} + disabled={!publishedAt} + icon={} + > + {t('workflow.common.embedIntoSite')} + + )} + + { + if (publishedAt) + handleOpenInExplore() + }} + disabled={disabledFunctionButton} + icon={} + > + {t('workflow.common.openInExplore')} + + + + } + > + {t('workflow.common.accessAPIReference')} + + + {appDetail?.mode === AppModeEnum.WORKFLOW && ( + + )} +
)} -
}
diff --git a/web/app/components/app/configuration/base/icons/remove-icon/index.tsx b/web/app/components/app/configuration/base/icons/remove-icon/index.tsx deleted file mode 100644 index f4b30a9605..0000000000 --- a/web/app/components/app/configuration/base/icons/remove-icon/index.tsx +++ /dev/null @@ -1,31 +0,0 @@ -'use client' -import React, { useState } from 'react' -import cn from '@/utils/classnames' - -type IRemoveIconProps = { - className?: string - isHoverStatus?: boolean - onClick: () => void -} - -const RemoveIcon = ({ - className, - isHoverStatus, - onClick, -}: IRemoveIconProps) => { - const [isHovered, setIsHovered] = useState(false) - const computedIsHovered = isHoverStatus || isHovered - return ( -
setIsHovered(true)} - onMouseLeave={() => setIsHovered(false)} - onClick={onClick} - > - - - -
- ) -} -export default React.memo(RemoveIcon) diff --git a/web/app/components/app/configuration/config/agent/agent-tools/setting-built-in-tool.tsx b/web/app/components/app/configuration/config/agent/agent-tools/setting-built-in-tool.tsx index d4ef4c55b7..ef28dd222c 100644 --- a/web/app/components/app/configuration/config/agent/agent-tools/setting-built-in-tool.tsx +++ b/web/app/components/app/configuration/config/agent/agent-tools/setting-built-in-tool.tsx @@ -215,6 +215,7 @@ const SettingBuiltInTool: FC = ({ pluginPayload={{ provider: collection.name, category: AuthCategory.tool, + providerType: collection.type, detail: collection as any, }} credentialId={credentialId} diff --git a/web/app/components/app/configuration/config/automatic/get-automatic-res.tsx b/web/app/components/app/configuration/config/automatic/get-automatic-res.tsx index 33ff7b7b64..dfcaabf017 100644 --- a/web/app/components/app/configuration/config/automatic/get-automatic-res.tsx +++ b/web/app/components/app/configuration/config/automatic/get-automatic-res.tsx @@ -298,7 +298,6 @@ const GetAutomaticRes: FC = ({ portalToFollowElemContentClassName='z-[1000]' isAdvancedMode={true} provider={model.provider} - mode={model.mode} completionParams={model.completion_params} modelId={model.name} setModel={handleModelChange} diff --git a/web/app/components/app/configuration/config/code-generator/get-code-generator-res.tsx b/web/app/components/app/configuration/config/code-generator/get-code-generator-res.tsx index 3323ac6ca7..3612f89b02 100644 --- a/web/app/components/app/configuration/config/code-generator/get-code-generator-res.tsx +++ b/web/app/components/app/configuration/config/code-generator/get-code-generator-res.tsx @@ -214,7 +214,6 @@ export const GetCodeGeneratorResModal: FC = ( portalToFollowElemContentClassName='z-[1000]' isAdvancedMode={true} provider={model.provider} - mode={model.mode} completionParams={model.completion_params} modelId={model.name} setModel={handleModelChange} diff --git a/web/app/components/app/configuration/dataset-config/card-item/index.tsx b/web/app/components/app/configuration/dataset-config/card-item/index.tsx index 1220c75ed6..85d46122a3 100644 --- a/web/app/components/app/configuration/dataset-config/card-item/index.tsx +++ b/web/app/components/app/configuration/dataset-config/card-item/index.tsx @@ -1,58 +1,112 @@ 'use client' import type { FC } from 'react' -import React from 'react' +import React, { useState } from 'react' +import { + RiDeleteBinLine, + RiEditLine, +} from '@remixicon/react' import { useTranslation } from 'react-i18next' -import TypeIcon from '../type-icon' -import RemoveIcon from '../../base/icons/remove-icon' -import s from './style.module.css' -import cn from '@/utils/classnames' +import SettingsModal from '../settings-modal' import type { DataSet } from '@/models/datasets' -import { formatNumber } from '@/utils/format' -import Tooltip from '@/app/components/base/tooltip' +import ActionButton, { ActionButtonState } from '@/app/components/base/action-button' +import Drawer from '@/app/components/base/drawer' +import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' +import Badge from '@/app/components/base/badge' +import { useKnowledge } from '@/hooks/use-knowledge' +import cn from '@/utils/classnames' +import AppIcon from '@/app/components/base/app-icon' -export type ICardItemProps = { +type ItemProps = { className?: string config: DataSet onRemove: (id: string) => void readonly?: boolean + onSave: (newDataset: DataSet) => void + editable?: boolean } -const CardItem: FC = ({ - className, + +const Item: FC = ({ config, + onSave, onRemove, - readonly, + editable = true, }) => { + const media = useBreakpoints() + const isMobile = media === MediaType.mobile + const [showSettingsModal, setShowSettingsModal] = useState(false) + const { formatIndexingTechniqueAndMethod } = useKnowledge() const { t } = useTranslation() - return ( -
-
-
- -
-
-
-
{config.name}
- {!config.embedding_available && ( - - {t('dataset.unavailable')} - - )} -
-
- {formatNumber(config.word_count)} {t('appDebug.feature.dataSet.words')} · {formatNumber(config.document_count)} {t('appDebug.feature.dataSet.textBlocks')} -
-
-
+ const handleSave = (newDataset: DataSet) => { + onSave(newDataset) + setShowSettingsModal(false) + } - {!readonly && onRemove(config.id)} />} -
+ const [isDeleting, setIsDeleting] = useState(false) + + const iconInfo = config.icon_info || { + icon: '📙', + icon_type: 'emoji', + icon_background: '#FFF4ED', + icon_url: '', + } + + return ( +
+
+ +
{config.name}
+
+
+ { + editable && { + e.stopPropagation() + setShowSettingsModal(true) + }} + > + + + } + onRemove(config.id)} + state={isDeleting ? ActionButtonState.Destructive : ActionButtonState.Default} + onMouseEnter={() => setIsDeleting(true)} + onMouseLeave={() => setIsDeleting(false)} + > + + +
+ { + config.indexing_technique && + } + { + config.provider === 'external' && + } + setShowSettingsModal(false)} footer={null} mask={isMobile} panelClassName='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'> + setShowSettingsModal(false)} + onSave={handleSave} + /> + +
) } -export default React.memo(CardItem) + +export default Item diff --git a/web/app/components/app/configuration/dataset-config/card-item/item.tsx b/web/app/components/app/configuration/dataset-config/card-item/item.tsx deleted file mode 100644 index 85d46122a3..0000000000 --- a/web/app/components/app/configuration/dataset-config/card-item/item.tsx +++ /dev/null @@ -1,112 +0,0 @@ -'use client' -import type { FC } from 'react' -import React, { useState } from 'react' -import { - RiDeleteBinLine, - RiEditLine, -} from '@remixicon/react' -import { useTranslation } from 'react-i18next' -import SettingsModal from '../settings-modal' -import type { DataSet } from '@/models/datasets' -import ActionButton, { ActionButtonState } from '@/app/components/base/action-button' -import Drawer from '@/app/components/base/drawer' -import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' -import Badge from '@/app/components/base/badge' -import { useKnowledge } from '@/hooks/use-knowledge' -import cn from '@/utils/classnames' -import AppIcon from '@/app/components/base/app-icon' - -type ItemProps = { - className?: string - config: DataSet - onRemove: (id: string) => void - readonly?: boolean - onSave: (newDataset: DataSet) => void - editable?: boolean -} - -const Item: FC = ({ - config, - onSave, - onRemove, - editable = true, -}) => { - const media = useBreakpoints() - const isMobile = media === MediaType.mobile - const [showSettingsModal, setShowSettingsModal] = useState(false) - const { formatIndexingTechniqueAndMethod } = useKnowledge() - const { t } = useTranslation() - - const handleSave = (newDataset: DataSet) => { - onSave(newDataset) - setShowSettingsModal(false) - } - - const [isDeleting, setIsDeleting] = useState(false) - - const iconInfo = config.icon_info || { - icon: '📙', - icon_type: 'emoji', - icon_background: '#FFF4ED', - icon_url: '', - } - - return ( -
-
- -
{config.name}
-
-
- { - editable && { - e.stopPropagation() - setShowSettingsModal(true) - }} - > - - - } - onRemove(config.id)} - state={isDeleting ? ActionButtonState.Destructive : ActionButtonState.Default} - onMouseEnter={() => setIsDeleting(true)} - onMouseLeave={() => setIsDeleting(false)} - > - - -
- { - config.indexing_technique && - } - { - config.provider === 'external' && - } - setShowSettingsModal(false)} footer={null} mask={isMobile} panelClassName='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'> - setShowSettingsModal(false)} - onSave={handleSave} - /> - -
- ) -} - -export default Item diff --git a/web/app/components/app/configuration/dataset-config/card-item/style.module.css b/web/app/components/app/configuration/dataset-config/card-item/style.module.css deleted file mode 100644 index da07056cbc..0000000000 --- a/web/app/components/app/configuration/dataset-config/card-item/style.module.css +++ /dev/null @@ -1,22 +0,0 @@ -.card { - box-shadow: 0px 1px 2px rgba(16, 24, 40, 0.05); - width: 100%; -} - -.card:hover { - box-shadow: 0px 4px 8px -2px rgba(16, 24, 40, 0.1), 0px 2px 4px -2px rgba(16, 24, 40, 0.06); -} - -.btnWrap { - padding-left: 64px; - visibility: hidden; - background: linear-gradient(270deg, #FFF 49.99%, rgba(255, 255, 255, 0.00) 98.1%); -} - -.card:hover .btnWrap { - visibility: visible; -} - -.settingBtn:hover { - background-color: rgba(0, 0, 0, 0.05); -} diff --git a/web/app/components/app/configuration/dataset-config/index.tsx b/web/app/components/app/configuration/dataset-config/index.tsx index 489ea1207b..bf81858565 100644 --- a/web/app/components/app/configuration/dataset-config/index.tsx +++ b/web/app/components/app/configuration/dataset-config/index.tsx @@ -9,7 +9,7 @@ import { v4 as uuid4 } from 'uuid' import { useFormattingChangedDispatcher } from '../debug/hooks' import FeaturePanel from '../base/feature-panel' import OperationBtn from '../base/operation-btn' -import CardItem from './card-item/item' +import CardItem from './card-item' import ParamsConfig from './params-config' import ContextVar from './context-var' import ConfigContext from '@/context/debug-configuration' diff --git a/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx b/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx index 1558d32fc6..8e06d6c901 100644 --- a/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx +++ b/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx @@ -368,7 +368,6 @@ const ConfigContent: FC = ({ popupClassName='!w-[387px]' portalToFollowElemContentClassName='!z-[1002]' isAdvancedMode={true} - mode={model?.mode} provider={model?.provider} completionParams={model?.completion_params} modelId={model?.name} diff --git a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx index 62f1010b54..93d0384aee 100644 --- a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx +++ b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx @@ -16,6 +16,7 @@ import { useToastContext } from '@/app/components/base/toast' import { updateDatasetSetting } from '@/service/datasets' import { useAppContext } from '@/context/app-context' import { useModalContext } from '@/context/modal-context' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' import type { RetrievalConfig } from '@/types/app' import RetrievalSettings from '@/app/components/datasets/external-knowledge-base/create/RetrievalSettings' import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config' @@ -277,7 +278,7 @@ const SettingsModal: FC = ({
{t('datasetSettings.form.embeddingModelTip')} - setShowAccountSettingModal({ payload: 'provider' })}>{t('datasetSettings.form.embeddingModelTipLink')} + setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.PROVIDER })}>{t('datasetSettings.form.embeddingModelTipLink')}
diff --git a/web/app/components/app/configuration/dataset-config/type-icon/index.tsx b/web/app/components/app/configuration/dataset-config/type-icon/index.tsx deleted file mode 100644 index 65951f662f..0000000000 --- a/web/app/components/app/configuration/dataset-config/type-icon/index.tsx +++ /dev/null @@ -1,33 +0,0 @@ -'use client' -import type { FC } from 'react' -import React from 'react' - -export type ITypeIconProps = { - type: 'upload_file' - size?: 'md' | 'lg' -} - -// data_source_type: current only support upload_file -const Icon = ({ type, size = 'lg' }: ITypeIconProps) => { - const len = size === 'lg' ? 32 : 24 - const iconMap = { - upload_file: ( - - - - - - ), - } - return iconMap[type] -} - -const TypeIcon: FC = ({ - type, - size = 'lg', -}) => { - return ( - - ) -} -export default React.memo(TypeIcon) diff --git a/web/app/components/app/configuration/debug/debug-with-multiple-model/model-parameter-trigger.tsx b/web/app/components/app/configuration/debug/debug-with-multiple-model/model-parameter-trigger.tsx index 17d04acdc7..e7c4d98733 100644 --- a/web/app/components/app/configuration/debug/debug-with-multiple-model/model-parameter-trigger.tsx +++ b/web/app/components/app/configuration/debug/debug-with-multiple-model/model-parameter-trigger.tsx @@ -26,7 +26,6 @@ const ModelParameterTrigger: FC = ({ }) => { const { t } = useTranslation() const { - mode, isAdvancedMode, } = useDebugConfigurationContext() const { @@ -57,7 +56,6 @@ const ModelParameterTrigger: FC = ({ return ( = ({ const config: TextGenerationConfig = { pre_prompt: !isAdvancedMode ? modelConfig.configs.prompt_template : '', prompt_type: promptMode, - chat_prompt_config: isAdvancedMode ? chatPromptConfig : {}, - completion_prompt_config: isAdvancedMode ? completionPromptConfig : {}, + chat_prompt_config: isAdvancedMode ? chatPromptConfig : cloneDeep(DEFAULT_CHAT_PROMPT_CONFIG), + completion_prompt_config: isAdvancedMode ? completionPromptConfig : cloneDeep(DEFAULT_COMPLETION_PROMPT_CONFIG), user_input_form: promptVariablesToUserInputsForm(modelConfig.configs.prompt_variables), dataset_query_variable: contextVar || '', // features @@ -74,6 +75,7 @@ const TextGenerationItem: FC = ({ datasets: [...postDatasets], } as any, }, + system_parameters: modelConfig.system_parameters, } const { completion, diff --git a/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx b/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx index d439b00939..506e18cc62 100644 --- a/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx +++ b/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx @@ -6,7 +6,7 @@ import { import Chat from '@/app/components/base/chat/chat' import { useChat } from '@/app/components/base/chat/chat/hooks' import { useDebugConfigurationContext } from '@/context/debug-configuration' -import type { ChatConfig, ChatItem, ChatItemInTree, OnSend } from '@/app/components/base/chat/types' +import type { ChatConfig, ChatItem, OnSend } from '@/app/components/base/chat/types' import { useProviderContext } from '@/context/provider-context' import { fetchConversationMessages, @@ -126,7 +126,7 @@ const DebugWithSingleModel = ( ) }, [appId, chatList, checkCanSend, completionParams, config, handleSend, inputs, modelConfig.mode, modelConfig.model_id, modelConfig.provider, textGenerationModelList]) - const doRegenerate = useCallback((chatItem: ChatItemInTree, editedQuestion?: { message: string, files?: FileEntity[] }) => { + const doRegenerate = useCallback((chatItem: ChatItem, editedQuestion?: { message: string, files?: FileEntity[] }) => { const question = editedQuestion ? chatItem : chatList.find(item => item.id === chatItem.parentMessageId)! const parentAnswer = chatList.find(item => item.id === question.parentMessageId) doSend(editedQuestion ? editedQuestion.message : question.content, diff --git a/web/app/components/app/configuration/debug/hooks.tsx b/web/app/components/app/configuration/debug/hooks.tsx index 12022e706a..9f628c46af 100644 --- a/web/app/components/app/configuration/debug/hooks.tsx +++ b/web/app/components/app/configuration/debug/hooks.tsx @@ -12,12 +12,15 @@ import type { ChatConfig, ChatItem, } from '@/app/components/base/chat/types' +import cloneDeep from 'lodash-es/cloneDeep' import { AgentStrategy, } from '@/types/app' +import { SupportUploadFileTypes } from '@/app/components/workflow/types' import { promptVariablesToUserInputsForm } from '@/utils/model-config' import { useDebugConfigurationContext } from '@/context/debug-configuration' import { useEventEmitterContextContext } from '@/context/event-emitter' +import { DEFAULT_CHAT_PROMPT_CONFIG, DEFAULT_COMPLETION_PROMPT_CONFIG } from '@/config' export const useDebugWithSingleOrMultipleModel = (appId: string) => { const localeDebugWithSingleOrMultipleModelConfigs = localStorage.getItem('app-debug-with-single-or-multiple-models') @@ -95,16 +98,14 @@ export const useConfigFromDebugContext = () => { const config: ChatConfig = { pre_prompt: !isAdvancedMode ? modelConfig.configs.prompt_template : '', prompt_type: promptMode, - chat_prompt_config: isAdvancedMode ? chatPromptConfig : {}, - completion_prompt_config: isAdvancedMode ? completionPromptConfig : {}, + chat_prompt_config: isAdvancedMode ? chatPromptConfig : cloneDeep(DEFAULT_CHAT_PROMPT_CONFIG), + completion_prompt_config: isAdvancedMode ? completionPromptConfig : cloneDeep(DEFAULT_COMPLETION_PROMPT_CONFIG), user_input_form: promptVariablesToUserInputsForm(modelConfig.configs.prompt_variables), dataset_query_variable: contextVar || '', opening_statement: introduction, - more_like_this: { - enabled: false, - }, + more_like_this: modelConfig.more_like_this ?? { enabled: false }, suggested_questions: openingSuggestedQuestions, - suggested_questions_after_answer: suggestedQuestionsAfterAnswerConfig, + suggested_questions_after_answer: suggestedQuestionsAfterAnswerConfig ?? { enabled: false }, text_to_speech: textToSpeechConfig, speech_to_text: speechToTextConfig, retriever_resource: citationConfig, @@ -121,8 +122,13 @@ export const useConfigFromDebugContext = () => { }, file_upload: { image: visionConfig, + allowed_file_upload_methods: visionConfig.transfer_methods ?? [], + allowed_file_types: [SupportUploadFileTypes.image], + max_length: visionConfig.number_limits ?? 0, + number_limits: visionConfig.number_limits, }, annotation_reply: annotationConfig, + system_parameters: modelConfig.system_parameters, supportAnnotation: true, appId, diff --git a/web/app/components/app/configuration/debug/index.tsx b/web/app/components/app/configuration/debug/index.tsx index e3a9776253..63df8edeae 100644 --- a/web/app/components/app/configuration/debug/index.tsx +++ b/web/app/components/app/configuration/debug/index.tsx @@ -3,6 +3,7 @@ import type { FC } from 'react' import { useTranslation } from 'react-i18next' import React, { useCallback, useEffect, useRef, useState } from 'react' import { produce, setAutoFreeze } from 'immer' +import cloneDeep from 'lodash-es/cloneDeep' import { useBoolean } from 'ahooks' import { RiAddLine, @@ -36,7 +37,7 @@ import ActionButton, { ActionButtonState } from '@/app/components/base/action-bu import type { ModelConfig as BackendModelConfig, VisionFile, VisionSettings } from '@/types/app' import { formatBooleanInputs, promptVariablesToUserInputsForm } from '@/utils/model-config' import TextGeneration from '@/app/components/app/text-generate/item' -import { IS_CE_EDITION } from '@/config' +import { DEFAULT_CHAT_PROMPT_CONFIG, DEFAULT_COMPLETION_PROMPT_CONFIG, IS_CE_EDITION } from '@/config' import type { Inputs } from '@/models/debug' import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks' import { ModelFeatureEnum, ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' @@ -90,6 +91,7 @@ const Debug: FC = ({ completionParams, hasSetContextVar, datasetConfigs, + externalDataToolsConfig, } = useContext(ConfigContext) const { eventEmitter } = useEventEmitterContextContext() const { data: text2speechDefaultModel } = useDefaultModel(ModelTypeEnum.textEmbedding) @@ -223,8 +225,8 @@ const Debug: FC = ({ const postModelConfig: BackendModelConfig = { pre_prompt: !isAdvancedMode ? modelConfig.configs.prompt_template : '', prompt_type: promptMode, - chat_prompt_config: {}, - completion_prompt_config: {}, + chat_prompt_config: isAdvancedMode ? chatPromptConfig : cloneDeep(DEFAULT_CHAT_PROMPT_CONFIG), + completion_prompt_config: isAdvancedMode ? completionPromptConfig : cloneDeep(DEFAULT_COMPLETION_PROMPT_CONFIG), user_input_form: promptVariablesToUserInputsForm(modelConfig.configs.prompt_variables), dataset_query_variable: contextVar || '', dataset_configs: { @@ -251,11 +253,8 @@ const Debug: FC = ({ suggested_questions_after_answer: suggestedQuestionsAfterAnswerConfig, speech_to_text: speechToTextConfig, retriever_resource: citationConfig, - } - - if (isAdvancedMode) { - postModelConfig.chat_prompt_config = chatPromptConfig - postModelConfig.completion_prompt_config = completionPromptConfig + system_parameters: modelConfig.system_parameters, + external_data_tools: externalDataToolsConfig, } const data: Record = { diff --git a/web/app/components/app/configuration/hooks/use-advanced-prompt-config.ts b/web/app/components/app/configuration/hooks/use-advanced-prompt-config.ts index b0a6ccaa26..f7c45be9fd 100644 --- a/web/app/components/app/configuration/hooks/use-advanced-prompt-config.ts +++ b/web/app/components/app/configuration/hooks/use-advanced-prompt-config.ts @@ -10,7 +10,7 @@ import { fetchPromptTemplate } from '@/service/debug' import type { FormValue } from '@/app/components/header/account-setting/model-provider-page/declarations' type Param = { - appMode: AppModeEnum + appMode?: AppModeEnum modelModeType: ModelModeType modelName: string promptMode: PromptMode @@ -104,6 +104,9 @@ const useAdvancedPromptConfig = ({ const migrateToDefaultPrompt = async (isMigrateToCompetition?: boolean, toModelModeType?: ModelModeType) => { const mode = modelModeType const toReplacePrePrompt = prePrompt || '' + if (!appMode) + return + if (!isAdvancedPrompt) { const { chat_prompt_config, completion_prompt_config, stop } = await fetchPromptTemplate({ appMode, @@ -122,7 +125,6 @@ const useAdvancedPromptConfig = ({ }) setChatPromptConfig(newPromptConfig) } - else { const newPromptConfig = produce(completion_prompt_config, (draft) => { draft.prompt.text = draft.prompt.text.replace(PRE_PROMPT_PLACEHOLDER_TEXT, toReplacePrePrompt) diff --git a/web/app/components/app/configuration/index.tsx b/web/app/components/app/configuration/index.tsx index de4787397b..afe640278e 100644 --- a/web/app/components/app/configuration/index.tsx +++ b/web/app/components/app/configuration/index.tsx @@ -36,14 +36,14 @@ import type { } from '@/models/debug' import type { ExternalDataTool } from '@/models/common' import type { DataSet } from '@/models/datasets' -import type { ModelConfig as BackendModelConfig, VisionSettings } from '@/types/app' +import type { ModelConfig as BackendModelConfig, UserInputFormItem, VisionSettings } from '@/types/app' import ConfigContext from '@/context/debug-configuration' import Config from '@/app/components/app/configuration/config' import Debug from '@/app/components/app/configuration/debug' import Confirm from '@/app/components/base/confirm' import { ModelFeatureEnum, ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' import { ToastContext } from '@/app/components/base/toast' -import { fetchAppDetail, updateAppModelConfig } from '@/service/apps' +import { fetchAppDetailDirect, updateAppModelConfig } from '@/service/apps' import { promptVariablesToUserInputsForm, userInputsFormToPromptVariables } from '@/utils/model-config' import { fetchDatasets } from '@/service/datasets' import { useProviderContext } from '@/context/provider-context' @@ -52,6 +52,7 @@ import { PromptMode } from '@/models/debug' import { ANNOTATION_DEFAULT, DATASET_DEFAULT, DEFAULT_AGENT_SETTING, DEFAULT_CHAT_PROMPT_CONFIG, DEFAULT_COMPLETION_PROMPT_CONFIG } from '@/config' import SelectDataSet from '@/app/components/app/configuration/dataset-config/select-dataset' import { useModalContext } from '@/context/modal-context' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' import Drawer from '@/app/components/base/drawer' import ModelParameterModal from '@/app/components/header/account-setting/model-provider-page/model-parameter-modal' @@ -110,7 +111,7 @@ const Configuration: FC = () => { const pathname = usePathname() const matched = pathname.match(/\/app\/([^/]+)/) const appId = (matched?.length && matched[1]) ? matched[1] : '' - const [mode, setMode] = useState() + const [mode, setMode] = useState(AppModeEnum.CHAT) const [publishedConfig, setPublishedConfig] = useState(null) const [conversationId, setConversationId] = useState('') @@ -186,6 +187,8 @@ const Configuration: FC = () => { prompt_template: '', prompt_variables: [] as PromptVariable[], }, + chat_prompt_config: clone(DEFAULT_CHAT_PROMPT_CONFIG), + completion_prompt_config: clone(DEFAULT_COMPLETION_PROMPT_CONFIG), more_like_this: null, opening_statement: '', suggested_questions: [], @@ -196,6 +199,14 @@ const Configuration: FC = () => { suggested_questions_after_answer: null, retriever_resource: null, annotation_reply: null, + external_data_tools: [], + system_parameters: { + audio_file_size_limit: 0, + file_size_limit: 0, + image_file_size_limit: 0, + video_file_size_limit: 0, + workflow_file_upload_limit: 0, + }, dataSets: [], agentConfig: DEFAULT_AGENT_SETTING, }) @@ -543,169 +554,171 @@ const Configuration: FC = () => { }) } setCollectionList(collectionList) - fetchAppDetail({ url: '/apps', id: appId }).then(async (res: any) => { - setMode(res.mode as AppModeEnum) - const modelConfig = res.model_config - const promptMode = modelConfig.prompt_type === PromptMode.advanced ? PromptMode.advanced : PromptMode.simple - doSetPromptMode(promptMode) - if (promptMode === PromptMode.advanced) { - if (modelConfig.chat_prompt_config && modelConfig.chat_prompt_config.prompt.length > 0) - setChatPromptConfig(modelConfig.chat_prompt_config) - else - setChatPromptConfig(clone(DEFAULT_CHAT_PROMPT_CONFIG)) - setCompletionPromptConfig(modelConfig.completion_prompt_config || clone(DEFAULT_COMPLETION_PROMPT_CONFIG) as any) - setCanReturnToSimpleMode(false) - } + const res = await fetchAppDetailDirect({ url: '/apps', id: appId }) + setMode(res.mode as AppModeEnum) + const modelConfig = res.model_config as BackendModelConfig + const promptMode = modelConfig.prompt_type === PromptMode.advanced ? PromptMode.advanced : PromptMode.simple + doSetPromptMode(promptMode) + if (promptMode === PromptMode.advanced) { + if (modelConfig.chat_prompt_config && modelConfig.chat_prompt_config.prompt.length > 0) + setChatPromptConfig(modelConfig.chat_prompt_config) + else + setChatPromptConfig(clone(DEFAULT_CHAT_PROMPT_CONFIG)) + setCompletionPromptConfig(modelConfig.completion_prompt_config || clone(DEFAULT_COMPLETION_PROMPT_CONFIG) as any) + setCanReturnToSimpleMode(false) + } - const model = res.model_config.model + const model = modelConfig.model - let datasets: any = null + let datasets: any = null // old dataset struct - if (modelConfig.agent_mode?.tools?.find(({ dataset }: any) => dataset?.enabled)) - datasets = modelConfig.agent_mode?.tools.filter(({ dataset }: any) => dataset?.enabled) + if (modelConfig.agent_mode?.tools?.find(({ dataset }: any) => dataset?.enabled)) + datasets = modelConfig.agent_mode?.tools.filter(({ dataset }: any) => dataset?.enabled) // new dataset struct - else if (modelConfig.dataset_configs.datasets?.datasets?.length > 0) - datasets = modelConfig.dataset_configs?.datasets?.datasets + else if (modelConfig.dataset_configs.datasets?.datasets?.length > 0) + datasets = modelConfig.dataset_configs?.datasets?.datasets - if (dataSets && datasets?.length && datasets?.length > 0) { - const { data: dataSetsWithDetail } = await fetchDatasets({ url: '/datasets', params: { page: 1, ids: datasets.map(({ dataset }: any) => dataset.id) } }) - datasets = dataSetsWithDetail - setDataSets(datasets) - } + if (dataSets && datasets?.length && datasets?.length > 0) { + const { data: dataSetsWithDetail } = await fetchDatasets({ url: '/datasets', params: { page: 1, ids: datasets.map(({ dataset }: any) => dataset.id) } }) + datasets = dataSetsWithDetail + setDataSets(datasets) + } - setIntroduction(modelConfig.opening_statement) - setSuggestedQuestions(modelConfig.suggested_questions || []) - if (modelConfig.more_like_this) - setMoreLikeThisConfig(modelConfig.more_like_this) + setIntroduction(modelConfig.opening_statement) + setSuggestedQuestions(modelConfig.suggested_questions || []) + if (modelConfig.more_like_this) + setMoreLikeThisConfig(modelConfig.more_like_this) - if (modelConfig.suggested_questions_after_answer) - setSuggestedQuestionsAfterAnswerConfig(modelConfig.suggested_questions_after_answer) + if (modelConfig.suggested_questions_after_answer) + setSuggestedQuestionsAfterAnswerConfig(modelConfig.suggested_questions_after_answer) - if (modelConfig.speech_to_text) - setSpeechToTextConfig(modelConfig.speech_to_text) + if (modelConfig.speech_to_text) + setSpeechToTextConfig(modelConfig.speech_to_text) - if (modelConfig.text_to_speech) - setTextToSpeechConfig(modelConfig.text_to_speech) + if (modelConfig.text_to_speech) + setTextToSpeechConfig(modelConfig.text_to_speech) - if (modelConfig.retriever_resource) - setCitationConfig(modelConfig.retriever_resource) + if (modelConfig.retriever_resource) + setCitationConfig(modelConfig.retriever_resource) - if (modelConfig.annotation_reply) { - let annotationConfig = modelConfig.annotation_reply - if (modelConfig.annotation_reply.enabled) { - annotationConfig = { - ...modelConfig.annotation_reply, - embedding_model: { - ...modelConfig.annotation_reply.embedding_model, - embedding_provider_name: correctModelProvider(modelConfig.annotation_reply.embedding_model.embedding_provider_name), - }, - } + if (modelConfig.annotation_reply) { + let annotationConfig = modelConfig.annotation_reply + if (modelConfig.annotation_reply.enabled) { + annotationConfig = { + ...modelConfig.annotation_reply, + embedding_model: { + ...modelConfig.annotation_reply.embedding_model, + embedding_provider_name: correctModelProvider(modelConfig.annotation_reply.embedding_model.embedding_provider_name), + }, } - setAnnotationConfig(annotationConfig, true) } + setAnnotationConfig(annotationConfig, true) + } - if (modelConfig.sensitive_word_avoidance) - setModerationConfig(modelConfig.sensitive_word_avoidance) + if (modelConfig.sensitive_word_avoidance) + setModerationConfig(modelConfig.sensitive_word_avoidance) - if (modelConfig.external_data_tools) - setExternalDataToolsConfig(modelConfig.external_data_tools) + if (modelConfig.external_data_tools) + setExternalDataToolsConfig(modelConfig.external_data_tools) - const config = { - modelConfig: { - provider: correctModelProvider(model.provider), - model_id: model.name, - mode: model.mode, - configs: { - prompt_template: modelConfig.pre_prompt || '', - prompt_variables: userInputsFormToPromptVariables( - [ - ...modelConfig.user_input_form, - ...( - modelConfig.external_data_tools?.length - ? modelConfig.external_data_tools.map((item: any) => { - return { - external_data_tool: { - variable: item.variable as string, - label: item.label as string, - enabled: item.enabled, - type: item.type as string, - config: item.config, - required: true, - icon: item.icon, - icon_background: item.icon_background, - }, - } - }) - : [] - ), - ], - modelConfig.dataset_query_variable, - ), - }, - more_like_this: modelConfig.more_like_this, - opening_statement: modelConfig.opening_statement, - suggested_questions: modelConfig.suggested_questions, - sensitive_word_avoidance: modelConfig.sensitive_word_avoidance, - speech_to_text: modelConfig.speech_to_text, - text_to_speech: modelConfig.text_to_speech, - file_upload: modelConfig.file_upload, - suggested_questions_after_answer: modelConfig.suggested_questions_after_answer, - retriever_resource: modelConfig.retriever_resource, - annotation_reply: modelConfig.annotation_reply, - external_data_tools: modelConfig.external_data_tools, - dataSets: datasets || [], - agentConfig: res.mode === AppModeEnum.AGENT_CHAT ? { - max_iteration: DEFAULT_AGENT_SETTING.max_iteration, - ...modelConfig.agent_mode, - // remove dataset - enabled: true, // modelConfig.agent_mode?.enabled is not correct. old app: the value of app with dataset's is always true - tools: modelConfig.agent_mode?.tools.filter((tool: any) => { - return !tool.dataset - }).map((tool: any) => { - const toolInCollectionList = collectionList.find(c => tool.provider_id === c.id) - return { - ...tool, - isDeleted: res.deleted_tools?.some((deletedTool: any) => deletedTool.id === tool.id && deletedTool.tool_name === tool.tool_name), - notAuthor: toolInCollectionList?.is_team_authorization === false, - ...(tool.provider_type === 'builtin' ? { - provider_id: correctToolProvider(tool.provider_name, !!toolInCollectionList), - provider_name: correctToolProvider(tool.provider_name, !!toolInCollectionList), - } : {}), - } - }), - } : DEFAULT_AGENT_SETTING, + const config: PublishConfig = { + modelConfig: { + provider: correctModelProvider(model.provider), + model_id: model.name, + mode: model.mode, + configs: { + prompt_template: modelConfig.pre_prompt || '', + prompt_variables: userInputsFormToPromptVariables( + ([ + ...modelConfig.user_input_form, + ...( + modelConfig.external_data_tools?.length + ? modelConfig.external_data_tools.map((item: any) => { + return { + external_data_tool: { + variable: item.variable as string, + label: item.label as string, + enabled: item.enabled, + type: item.type as string, + config: item.config, + required: true, + icon: item.icon, + icon_background: item.icon_background, + }, + } + }) + : [] + ), + ]) as unknown as UserInputFormItem[], + modelConfig.dataset_query_variable, + ), }, - completionParams: model.completion_params, - } + more_like_this: modelConfig.more_like_this ?? { enabled: false }, + opening_statement: modelConfig.opening_statement, + suggested_questions: modelConfig.suggested_questions ?? [], + sensitive_word_avoidance: modelConfig.sensitive_word_avoidance, + speech_to_text: modelConfig.speech_to_text, + text_to_speech: modelConfig.text_to_speech, + file_upload: modelConfig.file_upload ?? null, + suggested_questions_after_answer: modelConfig.suggested_questions_after_answer ?? { enabled: false }, + retriever_resource: modelConfig.retriever_resource, + annotation_reply: modelConfig.annotation_reply ?? null, + external_data_tools: modelConfig.external_data_tools ?? [], + system_parameters: modelConfig.system_parameters, + dataSets: datasets || [], + agentConfig: res.mode === AppModeEnum.AGENT_CHAT ? { + max_iteration: DEFAULT_AGENT_SETTING.max_iteration, + ...modelConfig.agent_mode, + // remove dataset + enabled: true, // modelConfig.agent_mode?.enabled is not correct. old app: the value of app with dataset's is always true + tools: (modelConfig.agent_mode?.tools ?? []).filter((tool: any) => { + return !tool.dataset + }).map((tool: any) => { + const toolInCollectionList = collectionList.find(c => tool.provider_id === c.id) + return { + ...tool, + isDeleted: res.deleted_tools?.some((deletedTool: any) => deletedTool.id === tool.id && deletedTool.tool_name === tool.tool_name) ?? false, + notAuthor: toolInCollectionList?.is_team_authorization === false, + ...(tool.provider_type === 'builtin' ? { + provider_id: correctToolProvider(tool.provider_name, !!toolInCollectionList), + provider_name: correctToolProvider(tool.provider_name, !!toolInCollectionList), + } : {}), + } + }), + strategy: modelConfig.agent_mode?.strategy ?? AgentStrategy.react, + } : DEFAULT_AGENT_SETTING, + }, + completionParams: model.completion_params, + } - if (modelConfig.file_upload) - handleSetVisionConfig(modelConfig.file_upload.image, true) + if (modelConfig.file_upload) + handleSetVisionConfig(modelConfig.file_upload.image, true) - syncToPublishedConfig(config) - setPublishedConfig(config) - const retrievalConfig = getMultipleRetrievalConfig({ - ...modelConfig.dataset_configs, - reranking_model: modelConfig.dataset_configs.reranking_model && { - provider: modelConfig.dataset_configs.reranking_model.reranking_provider_name, - model: modelConfig.dataset_configs.reranking_model.reranking_model_name, - }, - }, datasets, datasets, { - provider: currentRerankProvider?.provider, - model: currentRerankModel?.model, - }) - setDatasetConfigs({ - retrieval_model: RETRIEVE_TYPE.multiWay, - ...modelConfig.dataset_configs, - ...retrievalConfig, - ...(retrievalConfig.reranking_model ? { - reranking_model: { - reranking_model_name: retrievalConfig.reranking_model.model, - reranking_provider_name: correctModelProvider(retrievalConfig.reranking_model.provider), - }, - } : {}), - }) - setHasFetchedDetail(true) + syncToPublishedConfig(config) + setPublishedConfig(config) + const retrievalConfig = getMultipleRetrievalConfig({ + ...modelConfig.dataset_configs, + reranking_model: modelConfig.dataset_configs.reranking_model && { + provider: modelConfig.dataset_configs.reranking_model.reranking_provider_name, + model: modelConfig.dataset_configs.reranking_model.reranking_model_name, + }, + }, datasets, datasets, { + provider: currentRerankProvider?.provider, + model: currentRerankModel?.model, }) + const datasetConfigsToSet = { + ...modelConfig.dataset_configs, + ...retrievalConfig, + ...(retrievalConfig.reranking_model ? { + reranking_model: { + reranking_model_name: retrievalConfig.reranking_model.model, + reranking_provider_name: correctModelProvider(retrievalConfig.reranking_model.provider), + }, + } : {}), + } as DatasetConfigs + datasetConfigsToSet.retrieval_model = datasetConfigsToSet.retrieval_model ?? RETRIEVE_TYPE.multiWay + setDatasetConfigs(datasetConfigsToSet) + setHasFetchedDetail(true) })() }, [appId]) @@ -780,8 +793,8 @@ const Configuration: FC = () => { // Simple Mode prompt pre_prompt: !isAdvancedMode ? promptTemplate : '', prompt_type: promptMode, - chat_prompt_config: {}, - completion_prompt_config: {}, + chat_prompt_config: isAdvancedMode ? chatPromptConfig : clone(DEFAULT_CHAT_PROMPT_CONFIG), + completion_prompt_config: isAdvancedMode ? completionPromptConfig : clone(DEFAULT_COMPLETION_PROMPT_CONFIG), user_input_form: promptVariablesToUserInputsForm(promptVariables), dataset_query_variable: contextVar || '', // features @@ -798,6 +811,7 @@ const Configuration: FC = () => { ...modelConfig.agentConfig, strategy: isFunctionCall ? AgentStrategy.functionCall : AgentStrategy.react, }, + external_data_tools: externalDataToolsConfig, model: { provider: modelAndParameter?.provider || modelConfig.provider, name: modelId, @@ -810,11 +824,7 @@ const Configuration: FC = () => { datasets: [...postDatasets], } as any, }, - } - - if (isAdvancedMode) { - data.chat_prompt_config = chatPromptConfig - data.completion_prompt_config = completionPromptConfig + system_parameters: modelConfig.system_parameters, } await updateAppModelConfig({ url: `/apps/${appId}/model-config`, body: data }) @@ -974,7 +984,6 @@ const Configuration: FC = () => { <> {
setShowAccountSettingModal({ payload: 'provider' })} + onSetting={() => setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.PROVIDER })} inputs={inputs} modelParameterParams={{ setModel: setModel as any, @@ -1033,7 +1042,7 @@ const Configuration: FC = () => { content={t('appDebug.trailUseGPT4Info.description')} isShow={showUseGPT4Confirm} onConfirm={() => { - setShowAccountSettingModal({ payload: 'provider' }) + setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.PROVIDER }) setShowUseGPT4Confirm(false) }} onCancel={() => setShowUseGPT4Confirm(false)} @@ -1065,7 +1074,7 @@ const Configuration: FC = () => { setShowAccountSettingModal({ payload: 'provider' })} + onSetting={() => setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.PROVIDER })} inputs={inputs} modelParameterParams={{ setModel: setModel as any, diff --git a/web/app/components/app/log/empty-element.tsx b/web/app/components/app/log/empty-element.tsx new file mode 100644 index 0000000000..ddddacd873 --- /dev/null +++ b/web/app/components/app/log/empty-element.tsx @@ -0,0 +1,42 @@ +'use client' +import type { FC, SVGProps } from 'react' +import React from 'react' +import Link from 'next/link' +import { Trans, useTranslation } from 'react-i18next' +import { basePath } from '@/utils/var' +import { getRedirectionPath } from '@/utils/app-redirection' +import type { App } from '@/types/app' +import { AppModeEnum } from '@/types/app' + +const ThreeDotsIcon = ({ className }: SVGProps) => { + return + + +} + +const EmptyElement: FC<{ appDetail: App }> = ({ appDetail }) => { + const { t } = useTranslation() + + const getWebAppType = (appType: AppModeEnum) => { + if (appType !== AppModeEnum.COMPLETION && appType !== AppModeEnum.WORKFLOW) + return AppModeEnum.CHAT + return appType + } + + return
+
+ {t('appLog.table.empty.element.title')} +
+ , + testLink: , + }} + /> +
+
+
+} + +export default React.memo(EmptyElement) diff --git a/web/app/components/app/log/index.tsx b/web/app/components/app/log/index.tsx index 2ce315a5d0..55a3f7d12d 100644 --- a/web/app/components/app/log/index.tsx +++ b/web/app/components/app/log/index.tsx @@ -1,16 +1,14 @@ 'use client' -import type { FC, SVGProps } from 'react' +import type { FC } from 'react' import React, { useState } from 'react' import useSWR from 'swr' -import Link from 'next/link' -import { usePathname } from 'next/navigation' import { useDebounce } from 'ahooks' import { omit } from 'lodash-es' import dayjs from 'dayjs' -import { basePath } from '@/utils/var' -import { Trans, useTranslation } from 'react-i18next' +import { useTranslation } from 'react-i18next' import List from './list' import Filter, { TIME_PERIOD_MAPPING } from './filter' +import EmptyElement from './empty-element' import Pagination from '@/app/components/base/pagination' import Loading from '@/app/components/base/loading' import { fetchChatConversations, fetchCompletionConversations } from '@/service/log' @@ -28,30 +26,6 @@ export type QueryParam = { sort_by?: string } -const ThreeDotsIcon = ({ className }: SVGProps) => { - return - - -} - -const EmptyElement: FC<{ appUrl: string }> = ({ appUrl }) => { - const { t } = useTranslation() - const pathname = usePathname() - const pathSegments = pathname.split('/') - pathSegments.pop() - return
-
- {t('appLog.table.empty.element.title')} -
- , testLink: }} - /> -
-
-
-} - const Logs: FC = ({ appDetail }) => { const { t } = useTranslation() const [queryParams, setQueryParams] = useState({ @@ -79,12 +53,6 @@ const Logs: FC = ({ appDetail }) => { ...omit(debouncedQueryParams, ['period']), } - const getWebAppType = (appType: AppModeEnum) => { - if (appType !== AppModeEnum.COMPLETION && appType !== AppModeEnum.WORKFLOW) - return AppModeEnum.CHAT - return appType - } - // When the details are obtained, proceed to the next request const { data: chatConversations, mutate: mutateChatList } = useSWR(() => isChatMode ? { @@ -111,7 +79,7 @@ const Logs: FC = ({ appDetail }) => { ? : total > 0 ? - : + : } {/* Show Pagination only if the total is more than the limit */} {(total && total > APP_PAGE_LIMIT) diff --git a/web/app/components/app/log/list.tsx b/web/app/components/app/log/list.tsx index ba0539cfea..d21d35eeee 100644 --- a/web/app/components/app/log/list.tsx +++ b/web/app/components/app/log/list.tsx @@ -14,6 +14,7 @@ import timezone from 'dayjs/plugin/timezone' import { createContext, useContext } from 'use-context-selector' import { useShallow } from 'zustand/react/shallow' import { useTranslation } from 'react-i18next' +import { usePathname, useRouter, useSearchParams } from 'next/navigation' import type { ChatItemInTree } from '../../base/chat/types' import Indicator from '../../header/indicator' import VarPanel from './var-panel' @@ -41,8 +42,11 @@ import { getProcessedFilesFromResponse } from '@/app/components/base/file-upload import cn from '@/utils/classnames' import { noop } from 'lodash-es' import PromptLogModal from '../../base/prompt-log-modal' +import { WorkflowContextProvider } from '@/app/components/workflow/context' type AppStoreState = ReturnType +type ConversationListItem = ChatConversationGeneralDetail | CompletionConversationGeneralDetail +type ConversationSelection = ConversationListItem | { id: string; isPlaceholder?: true } dayjs.extend(utc) dayjs.extend(timezone) @@ -203,7 +207,7 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) { const { formatTime } = useTimestamp() const { onClose, appDetail } = useContext(DrawerContext) const { notify } = useContext(ToastContext) - const { currentLogItem, setCurrentLogItem, showMessageLogModal, setShowMessageLogModal, showPromptLogModal, setShowPromptLogModal, currentLogModalActiveTab } = useAppStore(useShallow(state => ({ + const { currentLogItem, setCurrentLogItem, showMessageLogModal, setShowMessageLogModal, showPromptLogModal, setShowPromptLogModal, currentLogModalActiveTab } = useAppStore(useShallow((state: AppStoreState) => ({ currentLogItem: state.currentLogItem, setCurrentLogItem: state.setCurrentLogItem, showMessageLogModal: state.showMessageLogModal, @@ -776,15 +780,17 @@ function DetailPanel({ detail, onFeedback }: IDetailPanel) { }
{showMessageLogModal && ( - { - setCurrentLogItem() - setShowMessageLogModal(false) - }} - defaultTab={currentLogModalActiveTab} - /> + + { + setCurrentLogItem() + setShowMessageLogModal(false) + }} + defaultTab={currentLogModalActiveTab} + /> + )} {!isChatMode && showPromptLogModal && ( = ({ logs, appDetail, onRefresh }) => { const { t } = useTranslation() const { formatTime } = useTimestamp() + const router = useRouter() + const pathname = usePathname() + const searchParams = useSearchParams() + const conversationIdInUrl = searchParams.get('conversation_id') ?? undefined const media = useBreakpoints() const isMobile = media === MediaType.mobile const [showDrawer, setShowDrawer] = useState(false) // Whether to display the chat details drawer - const [currentConversation, setCurrentConversation] = useState() // Currently selected conversation + const [currentConversation, setCurrentConversation] = useState() // Currently selected conversation + const closingConversationIdRef = useRef(null) + const pendingConversationIdRef = useRef(null) + const pendingConversationCacheRef = useRef(undefined) const isChatMode = appDetail.mode !== AppModeEnum.COMPLETION // Whether the app is a chat app const isChatflow = appDetail.mode === AppModeEnum.ADVANCED_CHAT // Whether the app is a chatflow app const { setShowPromptLogModal, setShowAgentLogModal, setShowMessageLogModal } = useAppStore(useShallow((state: AppStoreState) => ({ @@ -909,6 +922,92 @@ const ConversationList: FC = ({ logs, appDetail, onRefresh }) setShowMessageLogModal: state.setShowMessageLogModal, }))) + const activeConversationId = conversationIdInUrl ?? pendingConversationIdRef.current ?? currentConversation?.id + + const buildUrlWithConversation = useCallback((conversationId?: string) => { + const params = new URLSearchParams(searchParams.toString()) + if (conversationId) + params.set('conversation_id', conversationId) + else + params.delete('conversation_id') + + const queryString = params.toString() + return queryString ? `${pathname}?${queryString}` : pathname + }, [pathname, searchParams]) + + const handleRowClick = useCallback((log: ConversationListItem) => { + if (conversationIdInUrl === log.id) { + if (!showDrawer) + setShowDrawer(true) + + if (!currentConversation || currentConversation.id !== log.id) + setCurrentConversation(log) + return + } + + pendingConversationIdRef.current = log.id + pendingConversationCacheRef.current = log + if (!showDrawer) + setShowDrawer(true) + + if (currentConversation?.id !== log.id) + setCurrentConversation(undefined) + + router.push(buildUrlWithConversation(log.id), { scroll: false }) + }, [buildUrlWithConversation, conversationIdInUrl, currentConversation, router, showDrawer]) + + const currentConversationId = currentConversation?.id + + useEffect(() => { + if (!conversationIdInUrl) { + if (pendingConversationIdRef.current) + return + + if (showDrawer || currentConversationId) { + setShowDrawer(false) + setCurrentConversation(undefined) + } + closingConversationIdRef.current = null + pendingConversationCacheRef.current = undefined + return + } + + if (closingConversationIdRef.current === conversationIdInUrl) + return + + if (pendingConversationIdRef.current === conversationIdInUrl) + pendingConversationIdRef.current = null + + const matchedConversation = logs?.data?.find((item: ConversationListItem) => item.id === conversationIdInUrl) + const nextConversation: ConversationSelection = matchedConversation + ?? pendingConversationCacheRef.current + ?? { id: conversationIdInUrl, isPlaceholder: true } + + if (!showDrawer) + setShowDrawer(true) + + if (!currentConversation || currentConversation.id !== conversationIdInUrl || (!('created_at' in currentConversation) && matchedConversation)) + setCurrentConversation(nextConversation) + + if (pendingConversationCacheRef.current?.id === conversationIdInUrl || matchedConversation) + pendingConversationCacheRef.current = undefined + }, [conversationIdInUrl, currentConversation, isChatMode, logs?.data, showDrawer]) + + const onCloseDrawer = useCallback(() => { + onRefresh() + setShowDrawer(false) + setCurrentConversation(undefined) + setShowPromptLogModal(false) + setShowAgentLogModal(false) + setShowMessageLogModal(false) + pendingConversationIdRef.current = null + pendingConversationCacheRef.current = undefined + closingConversationIdRef.current = conversationIdInUrl ?? null + + if (conversationIdInUrl) + router.replace(buildUrlWithConversation(), { scroll: false }) + }, [buildUrlWithConversation, conversationIdInUrl, onRefresh, router, setShowAgentLogModal, setShowMessageLogModal, setShowPromptLogModal]) + // Annotated data needs to be highlighted const renderTdValue = (value: string | number | null, isEmptyStyle: boolean, isHighlight = false, annotation?: LogAnnotation) => { return ( @@ -927,21 +1026,12 @@ const ConversationList: FC = ({ logs, appDetail, onRefresh }) ) } - const onCloseDrawer = () => { - onRefresh() - setShowDrawer(false) - setCurrentConversation(undefined) - setShowPromptLogModal(false) - setShowAgentLogModal(false) - setShowMessageLogModal(false) - } - if (!logs) return return ( -
- +
+
@@ -962,11 +1052,8 @@ const ConversationList: FC = ({ logs, appDetail, onRefresh }) const rightValue = get(log, isChatMode ? 'message_count' : 'message.answer') return { - setShowDrawer(true) - setCurrentConversation(log) - }}> + className={cn('cursor-pointer border-b border-divider-subtle hover:bg-background-default-hover', activeConversationId !== log.id ? '' : 'bg-background-default-hover')} + onClick={() => handleRowClick(log)}> {isWorkflow && ( )} diff --git a/web/app/components/app/workflow-log/trigger-by-display.tsx b/web/app/components/app/workflow-log/trigger-by-display.tsx index e9355c288d..1411503cc2 100644 --- a/web/app/components/app/workflow-log/trigger-by-display.tsx +++ b/web/app/components/app/workflow-log/trigger-by-display.tsx @@ -11,15 +11,23 @@ import { } from '@/app/components/base/icons/src/vender/workflow' import BlockIcon from '@/app/components/workflow/block-icon' import { BlockEnum } from '@/app/components/workflow/types' +import useTheme from '@/hooks/use-theme' +import type { TriggerMetadata } from '@/models/log' +import { WorkflowRunTriggeredFrom } from '@/models/log' +import { Theme } from '@/types/app' type TriggerByDisplayProps = { - triggeredFrom: string + triggeredFrom: WorkflowRunTriggeredFrom className?: string showText?: boolean + triggerMetadata?: TriggerMetadata } -const getTriggerDisplayName = (triggeredFrom: string, t: any) => { - const nameMap: Record = { +const getTriggerDisplayName = (triggeredFrom: WorkflowRunTriggeredFrom, t: any, metadata?: TriggerMetadata) => { + if (triggeredFrom === WorkflowRunTriggeredFrom.PLUGIN && metadata?.event_name) + return metadata.event_name + + const nameMap: Record = { 'debugging': t('appLog.triggerBy.debugging'), 'app-run': t('appLog.triggerBy.appRun'), 'webhook': t('appLog.triggerBy.webhook'), @@ -32,7 +40,27 @@ const getTriggerDisplayName = (triggeredFrom: string, t: any) => { return nameMap[triggeredFrom] || triggeredFrom } -const getTriggerIcon = (triggeredFrom: string) => { +const getPluginIcon = (metadata: TriggerMetadata | undefined, theme: Theme) => { + if (!metadata) + return null + + const icon = theme === Theme.dark + ? metadata.icon_dark || metadata.icon + : metadata.icon || metadata.icon_dark + + if (!icon) + return null + + return ( + + ) +} + +const getTriggerIcon = (triggeredFrom: WorkflowRunTriggeredFrom, metadata: TriggerMetadata | undefined, theme: Theme) => { switch (triggeredFrom) { case 'webhook': return ( @@ -47,9 +75,7 @@ const getTriggerIcon = (triggeredFrom: string) => { ) case 'plugin': - // For plugin triggers in logs, use a generic plugin icon since we don't have specific plugin info - // This matches the standard BlockIcon styling for TriggerPlugin - return ( + return getPluginIcon(metadata, theme) || ( = ({ triggeredFrom, className = '', showText = true, + triggerMetadata, }) => { const { t } = useTranslation() + const { theme } = useTheme() - const displayName = getTriggerDisplayName(triggeredFrom, t) - const icon = getTriggerIcon(triggeredFrom) + const displayName = getTriggerDisplayName(triggeredFrom, t, triggerMetadata) + const icon = getTriggerIcon(triggeredFrom, triggerMetadata, theme) return (
diff --git a/web/app/components/apps/app-card.tsx b/web/app/components/apps/app-card.tsx index 564eb493e5..8356cfd31c 100644 --- a/web/app/components/apps/app-card.tsx +++ b/web/app/components/apps/app-card.tsx @@ -282,21 +282,23 @@ const AppCard = ({ app, onRefresh }: AppCardProps) => { )} { - (!systemFeatures.webapp_auth.enabled) - ? <> - - - - : !(isGettingUserCanAccessApp || !userCanAccessApp?.result) && ( - <> + !app.has_draft_trigger && ( + (!systemFeatures.webapp_auth.enabled) + ? <> - ) + : !(isGettingUserCanAccessApp || !userCanAccessApp?.result) && ( + <> + + + + ) + ) } { diff --git a/web/app/components/apps/footer.tsx b/web/app/components/apps/footer.tsx index 9fed4c8757..16411ffc57 100644 --- a/web/app/components/apps/footer.tsx +++ b/web/app/components/apps/footer.tsx @@ -1,6 +1,6 @@ import React from 'react' import Link from 'next/link' -import { RiDiscordFill, RiGithubFill } from '@remixicon/react' +import { RiDiscordFill, RiDiscussLine, RiGithubFill } from '@remixicon/react' import { useTranslation } from 'react-i18next' type CustomLinkProps = { @@ -38,6 +38,9 @@ const Footer = () => { + + +
) diff --git a/web/app/components/apps/list.tsx b/web/app/components/apps/list.tsx index ce166825fe..4a52505d80 100644 --- a/web/app/components/apps/list.tsx +++ b/web/app/components/apps/list.tsx @@ -145,15 +145,23 @@ const List = () => { return } - if (anchorRef.current) { + if (anchorRef.current && containerRef.current) { + // Calculate dynamic rootMargin: clamps to 100-200px range, using 20% of container height as the base value for better responsiveness + const containerHeight = containerRef.current.clientHeight + const dynamicMargin = Math.max(100, Math.min(containerHeight * 0.2, 200)) // Clamps to 100-200px range, using 20% of container height as the base value + observer = new IntersectionObserver((entries) => { if (entries[0].isIntersecting && !isLoading && !error && hasMore) setSize((size: number) => size + 1) - }, { rootMargin: '100px' }) + }, { + root: containerRef.current, + rootMargin: `${dynamicMargin}px`, + threshold: 0.1, // Trigger when 10% of the anchor element is visible + }) observer.observe(anchorRef.current) } return () => observer?.disconnect() - }, [isLoading, setSize, anchorRef, mutate, data, error]) + }, [isLoading, setSize, data, error]) const { run: handleSearch } = useDebounceFn(() => { setSearchKeywords(keywords) @@ -185,7 +193,7 @@ const List = () => { )} -
+
{ + const originalFetchRef = useRef(null) + const setAppDetail = useAppStore(state => state.setAppDetail) + + useEffect(() => { + setAppDetail({ + id: 'app-1', + name: 'Analytics Agent', + mode: 'agent-chat', + } as any) + + originalFetchRef.current = globalThis.fetch?.bind(globalThis) + + const handler = async (input: RequestInfo | URL, init?: RequestInit) => { + const request = input instanceof Request ? input : new Request(input, init) + const url = request.url + const parsed = new URL(url, window.location.origin) + + if (parsed.pathname.endsWith('/apps/app-1/agent/logs')) { + return new Response(JSON.stringify(MOCK_RESPONSE), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }) + } + + if (originalFetchRef.current) + return originalFetchRef.current(request) + + throw new Error(`Unhandled request: ${url}`) + } + + globalThis.fetch = handler as typeof globalThis.fetch + + return () => { + if (originalFetchRef.current) + globalThis.fetch = originalFetchRef.current + setAppDetail(undefined) + } + }, [setAppDetail]) + + return ( + +
+ { + console.log('Agent log modal closed') + }} + /> +
+
+ ) +} + +const meta = { + title: 'Base/Other/AgentLogModal', + component: AgentLogModalDemo, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Agent execution viewer showing iterations, tool calls, and metadata. Fetch responses are mocked for Storybook.', + }, + }, + }, + args: { + width: 960, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/answer-icon/index.stories.tsx b/web/app/components/base/answer-icon/index.stories.tsx new file mode 100644 index 0000000000..0928d9cda6 --- /dev/null +++ b/web/app/components/base/answer-icon/index.stories.tsx @@ -0,0 +1,107 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import type { ReactNode } from 'react' +import AnswerIcon from '.' + +const SAMPLE_IMAGE = 'data:image/svg+xml;utf8,AI' + +const meta = { + title: 'Base/General/AnswerIcon', + component: AnswerIcon, + parameters: { + docs: { + description: { + component: 'Circular avatar used for assistant answers. Supports emoji, solid background colour, or uploaded imagery.', + }, + }, + }, + tags: ['autodocs'], + args: { + icon: '🤖', + background: '#D5F5F6', + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +const StoryWrapper = (children: ReactNode) => ( +
+ {children} +
+) + +export const Default: Story = { + render: args => StoryWrapper( +
+ +
, + ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +
+ +
+ `.trim(), + }, + }, + }, +} + +export const CustomEmoji: Story = { + render: args => StoryWrapper( + <> +
+ +
+
+ +
+ , + ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +
+
+ +
+
+ +
+
+ `.trim(), + }, + }, + }, +} + +export const ImageIcon: Story = { + render: args => StoryWrapper( +
+ +
, + ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/app-icon-picker/index.stories.tsx b/web/app/components/base/app-icon-picker/index.stories.tsx new file mode 100644 index 0000000000..bd0ec0e200 --- /dev/null +++ b/web/app/components/base/app-icon-picker/index.stories.tsx @@ -0,0 +1,91 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import AppIconPicker, { type AppIconSelection } from '.' + +const meta = { + title: 'Base/Data Entry/AppIconPicker', + component: AppIconPicker, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Modal workflow for choosing an application avatar. Users can switch between emoji selections and image uploads (when enabled).', + }, + }, + nextjs: { + appDirectory: true, + navigation: { + pathname: '/apps/demo-app/icon-picker', + params: { appId: 'demo-app' }, + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +const AppIconPickerDemo = () => { + const [open, setOpen] = useState(false) + const [selection, setSelection] = useState(null) + + return ( +
+ + +
+
Selection preview
+
+          {selection ? JSON.stringify(selection, null, 2) : 'No icon selected yet.'}
+        
+
+ + {open && ( + { + setSelection(result) + setOpen(false) + }} + onClose={() => setOpen(false)} + /> + )} +
+ ) +} + +export const Playground: Story = { + render: () => , + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +const [open, setOpen] = useState(false) +const [selection, setSelection] = useState(null) + +return ( + <> + + {open && ( + { + setSelection(result) + setOpen(false) + }} + onClose={() => setOpen(false)} + /> + )} + +) + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/app-icon/index.stories.tsx b/web/app/components/base/app-icon/index.stories.tsx new file mode 100644 index 0000000000..9fdffb54b0 --- /dev/null +++ b/web/app/components/base/app-icon/index.stories.tsx @@ -0,0 +1,108 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import type { ComponentProps } from 'react' +import AppIcon from '.' + +const meta = { + title: 'Base/General/AppIcon', + component: AppIcon, + parameters: { + docs: { + description: { + component: 'Reusable avatar for applications and workflows. Supports emoji or uploaded imagery, rounded mode, edit overlays, and multiple sizes.', + }, + }, + }, + tags: ['autodocs'], + args: { + icon: '🧭', + background: '#FFEAD5', + size: 'medium', + rounded: false, + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Default: Story = { + render: args => ( +
+ + +
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` + + + `.trim(), + }, + }, + }, +} + +export const Sizes: Story = { + render: (args) => { + const sizes: Array['size']> = ['xs', 'tiny', 'small', 'medium', 'large', 'xl', 'xxl'] + return ( +
+ {sizes.map(size => ( +
+ + {size} +
+ ))} +
+ ) + }, + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +{(['xs','tiny','small','medium','large','xl','xxl'] as const).map(size => ( + +))} + `.trim(), + }, + }, + }, +} + +export const WithEditOverlay: Story = { + render: args => ( +
+ + +
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` + + + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/app-icon/style.module.css b/web/app/components/base/app-icon/style.module.css deleted file mode 100644 index 4ee84fb444..0000000000 --- a/web/app/components/base/app-icon/style.module.css +++ /dev/null @@ -1,23 +0,0 @@ -.appIcon { - @apply flex items-center justify-center relative w-9 h-9 text-lg rounded-lg grow-0 shrink-0; -} - -.appIcon.large { - @apply w-10 h-10; -} - -.appIcon.small { - @apply w-8 h-8; -} - -.appIcon.tiny { - @apply w-6 h-6 text-base; -} - -.appIcon.xs { - @apply w-5 h-5 text-base; -} - -.appIcon.rounded { - @apply rounded-full; -} diff --git a/web/app/components/base/audio-btn/index.stories.tsx b/web/app/components/base/audio-btn/index.stories.tsx index 8dc82d3413..1c989b80a6 100644 --- a/web/app/components/base/audio-btn/index.stories.tsx +++ b/web/app/components/base/audio-btn/index.stories.tsx @@ -20,7 +20,7 @@ const StoryWrapper = (props: ComponentProps) => { } const meta = { - title: 'Base/Button/AudioBtn', + title: 'Base/General/AudioBtn', component: AudioBtn, tags: ['autodocs'], parameters: { diff --git a/web/app/components/base/audio-gallery/index.stories.tsx b/web/app/components/base/audio-gallery/index.stories.tsx new file mode 100644 index 0000000000..539ab9e332 --- /dev/null +++ b/web/app/components/base/audio-gallery/index.stories.tsx @@ -0,0 +1,37 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import AudioGallery from '.' + +const AUDIO_SOURCES = [ + 'https://interactive-examples.mdn.mozilla.net/media/cc0-audio/t-rex-roar.mp3', +] + +const meta = { + title: 'Base/Data Display/AudioGallery', + component: AudioGallery, + parameters: { + docs: { + description: { + component: 'List of audio players that render waveform previews and playback controls for each source.', + }, + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, + tags: ['autodocs'], + args: { + srcs: AUDIO_SOURCES, + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Default: Story = {} diff --git a/web/app/components/base/auto-height-textarea/index.stories.tsx b/web/app/components/base/auto-height-textarea/index.stories.tsx index a9234fac9d..d0f36e4736 100644 --- a/web/app/components/base/auto-height-textarea/index.stories.tsx +++ b/web/app/components/base/auto-height-textarea/index.stories.tsx @@ -3,7 +3,7 @@ import { useState } from 'react' import AutoHeightTextarea from '.' const meta = { - title: 'Base/Input/AutoHeightTextarea', + title: 'Base/Data Entry/AutoHeightTextarea', component: AutoHeightTextarea, parameters: { layout: 'centered', diff --git a/web/app/components/base/avatar/index.stories.tsx b/web/app/components/base/avatar/index.stories.tsx new file mode 100644 index 0000000000..1b3dc3eb3b --- /dev/null +++ b/web/app/components/base/avatar/index.stories.tsx @@ -0,0 +1,73 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import Avatar from '.' + +const meta = { + title: 'Base/Data Display/Avatar', + component: Avatar, + parameters: { + docs: { + description: { + component: 'Initials or image-based avatar used across contacts and member lists. Falls back to the first letter when the image fails to load.', + }, + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, + tags: ['autodocs'], + args: { + name: 'Alex Doe', + avatar: 'https://cloud.dify.ai/logo/logo.svg', + size: 40, + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Default: Story = {} + +export const WithFallback: Story = { + args: { + avatar: null, + name: 'Fallback', + }, + parameters: { + docs: { + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, +} + +export const CustomSizes: Story = { + render: args => ( +
+ {[24, 32, 48, 64].map(size => ( +
+ + {size}px +
+ ))} +
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +{[24, 32, 48, 64].map(size => ( + +))} + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/badge/index.stories.tsx b/web/app/components/base/badge/index.stories.tsx new file mode 100644 index 0000000000..e1fe8cb271 --- /dev/null +++ b/web/app/components/base/badge/index.stories.tsx @@ -0,0 +1,73 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import Badge from '../badge' + +const meta = { + title: 'Base/Data Display/Badge', + component: Badge, + parameters: { + docs: { + description: { + component: 'Compact label used for statuses and counts. Supports uppercase styling and optional red corner marks.', + }, + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, + tags: ['autodocs'], + args: { + text: 'beta', + uppercase: true, + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Default: Story = {} + +export const WithCornerMark: Story = { + args: { + text: 'new', + hasRedCornerMark: true, + }, + parameters: { + docs: { + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, +} + +export const CustomContent: Story = { + render: args => ( + + + + Production + + + ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` + + + + Production + + + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/block-input/index.stories.tsx b/web/app/components/base/block-input/index.stories.tsx index 5f1967b9d0..d05cc221b6 100644 --- a/web/app/components/base/block-input/index.stories.tsx +++ b/web/app/components/base/block-input/index.stories.tsx @@ -3,7 +3,7 @@ import { useState } from 'react' import BlockInput from '.' const meta = { - title: 'Base/Input/BlockInput', + title: 'Base/Data Entry/BlockInput', component: BlockInput, parameters: { layout: 'centered', diff --git a/web/app/components/base/button/add-button.stories.tsx b/web/app/components/base/button/add-button.stories.tsx index a46441aefe..edd52b2b78 100644 --- a/web/app/components/base/button/add-button.stories.tsx +++ b/web/app/components/base/button/add-button.stories.tsx @@ -2,7 +2,7 @@ import type { Meta, StoryObj } from '@storybook/nextjs' import AddButton from './add-button' const meta = { - title: 'Base/Button/AddButton', + title: 'Base/General/AddButton', component: AddButton, parameters: { layout: 'centered', diff --git a/web/app/components/base/button/index.stories.tsx b/web/app/components/base/button/index.stories.tsx index f369e2f71a..02d20b4af4 100644 --- a/web/app/components/base/button/index.stories.tsx +++ b/web/app/components/base/button/index.stories.tsx @@ -4,7 +4,7 @@ import { RocketLaunchIcon } from '@heroicons/react/20/solid' import { Button } from '.' const meta = { - title: 'Base/Button/Button', + title: 'Base/General/Button', component: Button, parameters: { layout: 'centered', diff --git a/web/app/components/base/button/sync-button.stories.tsx b/web/app/components/base/button/sync-button.stories.tsx index d55a7acf47..dcfbf6daf3 100644 --- a/web/app/components/base/button/sync-button.stories.tsx +++ b/web/app/components/base/button/sync-button.stories.tsx @@ -2,7 +2,7 @@ import type { Meta, StoryObj } from '@storybook/nextjs' import SyncButton from './sync-button' const meta = { - title: 'Base/Button/SyncButton', + title: 'Base/General/SyncButton', component: SyncButton, parameters: { layout: 'centered', diff --git a/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx b/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx index 29b27a60ad..302fb9a3c7 100644 --- a/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx +++ b/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx @@ -3,7 +3,6 @@ import Chat from '../chat' import type { ChatConfig, ChatItem, - ChatItemInTree, OnSend, } from '../types' import { useChat } from '../chat/hooks' @@ -149,7 +148,7 @@ const ChatWrapper = () => { ) }, [chatList, handleNewConversationCompleted, handleSend, currentConversationId, currentConversationInputs, newConversationInputs, isInstalledApp, appId]) - const doRegenerate = useCallback((chatItem: ChatItemInTree, editedQuestion?: { message: string, files?: FileEntity[] }) => { + const doRegenerate = useCallback((chatItem: ChatItem, editedQuestion?: { message: string, files?: FileEntity[] }) => { const question = editedQuestion ? chatItem : chatList.find(item => item.id === chatItem.parentMessageId)! const parentAnswer = chatList.find(item => item.id === question.parentMessageId) doSend(editedQuestion ? editedQuestion.message : question.content, diff --git a/web/app/components/base/chat/chat-with-history/inputs-form/content.tsx b/web/app/components/base/chat/chat-with-history/inputs-form/content.tsx index 392bdf2b77..c7785ebd89 100644 --- a/web/app/components/base/chat/chat-with-history/inputs-form/content.tsx +++ b/web/app/components/base/chat/chat-with-history/inputs-form/content.tsx @@ -49,7 +49,7 @@ const InputsFormContent = ({ showTip }: Props) => {
{form.label}
{!form.required && ( -
{t('appDebug.variableTable.optional')}
+
{t('workflow.panel.optional')}
)}
)} diff --git a/web/app/components/base/chat/chat/answer/index.stories.tsx b/web/app/components/base/chat/chat/answer/index.stories.tsx index 822bdf7326..95bc3bd5c0 100644 --- a/web/app/components/base/chat/chat/answer/index.stories.tsx +++ b/web/app/components/base/chat/chat/answer/index.stories.tsx @@ -6,7 +6,7 @@ import { markdownContentSVG } from './__mocks__/markdownContentSVG' import Answer from '.' const meta = { - title: 'Base/Chat/Chat Answer', + title: 'Base/Other/Chat Answer', component: Answer, parameters: { layout: 'fullscreen', diff --git a/web/app/components/base/chat/chat/hooks.ts b/web/app/components/base/chat/chat/hooks.ts index 665e7e8bc3..a10b359724 100644 --- a/web/app/components/base/chat/chat/hooks.ts +++ b/web/app/components/base/chat/chat/hooks.ts @@ -29,6 +29,7 @@ import type { Annotation } from '@/models/log' import { WorkflowRunningStatus } from '@/app/components/workflow/types' import useTimestamp from '@/hooks/use-timestamp' import { AudioPlayerManager } from '@/app/components/base/audio-btn/audio.player.manager' +import type AudioPlayer from '@/app/components/base/audio-btn/audio' import type { FileEntity } from '@/app/components/base/file-uploader/types' import { getProcessedFiles, @@ -308,7 +309,15 @@ export const useChat = ( else ttsUrl = `/apps/${params.appId}/text-to-audio` } - const player = AudioPlayerManager.getInstance().getAudioPlayer(ttsUrl, ttsIsPublic, uuidV4(), 'none', 'none', noop) + // Lazy initialization: Only create AudioPlayer when TTS is actually needed + // This prevents opening audio channel unnecessarily + let player: AudioPlayer | null = null + const getOrCreatePlayer = () => { + if (!player) + player = AudioPlayerManager.getInstance().getAudioPlayer(ttsUrl, ttsIsPublic, uuidV4(), 'none', 'none', noop) + + return player + } ssePost( url, { @@ -582,11 +591,16 @@ export const useChat = ( onTTSChunk: (messageId: string, audio: string) => { if (!audio || audio === '') return - player.playAudioWithAudio(audio, true) - AudioPlayerManager.getInstance().resetMsgId(messageId) + const audioPlayer = getOrCreatePlayer() + if (audioPlayer) { + audioPlayer.playAudioWithAudio(audio, true) + AudioPlayerManager.getInstance().resetMsgId(messageId) + } }, onTTSEnd: (messageId: string, audio: string) => { - player.playAudioWithAudio(audio, false) + const audioPlayer = getOrCreatePlayer() + if (audioPlayer) + audioPlayer.playAudioWithAudio(audio, false) }, onLoopStart: ({ data: loopStartedData }) => { responseItem.workflowProcess!.tracing!.push({ diff --git a/web/app/components/base/chat/chat/question.stories.tsx b/web/app/components/base/chat/chat/question.stories.tsx index 0b84ee91a8..f0ee860c89 100644 --- a/web/app/components/base/chat/chat/question.stories.tsx +++ b/web/app/components/base/chat/chat/question.stories.tsx @@ -5,7 +5,7 @@ import Question from './question' import { User } from '@/app/components/base/icons/src/public/avatar' const meta = { - title: 'Base/Chat/Chat Question', + title: 'Base/Other/Chat Question', component: Question, parameters: { layout: 'centered', diff --git a/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx b/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx index 1bb3dbf56f..5fba104d35 100644 --- a/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx +++ b/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx @@ -3,7 +3,6 @@ import Chat from '../chat' import type { ChatConfig, ChatItem, - ChatItemInTree, OnSend, } from '../types' import { useChat } from '../chat/hooks' @@ -147,7 +146,7 @@ const ChatWrapper = () => { ) }, [currentConversationId, currentConversationInputs, newConversationInputs, chatList, handleSend, isInstalledApp, appId, handleNewConversationCompleted]) - const doRegenerate = useCallback((chatItem: ChatItemInTree, editedQuestion?: { message: string, files?: FileEntity[] }) => { + const doRegenerate = useCallback((chatItem: ChatItem, editedQuestion?: { message: string, files?: FileEntity[] }) => { const question = editedQuestion ? chatItem : chatList.find(item => item.id === chatItem.parentMessageId)! const parentAnswer = chatList.find(item => item.id === question.parentMessageId) doSend(editedQuestion ? editedQuestion.message : question.content, diff --git a/web/app/components/base/chat/embedded-chatbot/hooks.tsx b/web/app/components/base/chat/embedded-chatbot/hooks.tsx index cfb221522c..9a9abfbd09 100644 --- a/web/app/components/base/chat/embedded-chatbot/hooks.tsx +++ b/web/app/components/base/chat/embedded-chatbot/hooks.tsx @@ -66,16 +66,20 @@ export const useEmbeddedChatbot = () => { const appInfo = useWebAppStore(s => s.appInfo) const appMeta = useWebAppStore(s => s.appMeta) const appParams = useWebAppStore(s => s.appParams) + const embeddedConversationId = useWebAppStore(s => s.embeddedConversationId) + const embeddedUserId = useWebAppStore(s => s.embeddedUserId) const appId = useMemo(() => appInfo?.app_id, [appInfo]) const [userId, setUserId] = useState() const [conversationId, setConversationId] = useState() + useEffect(() => { - getProcessedSystemVariablesFromUrlParams().then(({ user_id, conversation_id }) => { - setUserId(user_id) - setConversationId(conversation_id) - }) - }, []) + setUserId(embeddedUserId || undefined) + }, [embeddedUserId]) + + useEffect(() => { + setConversationId(embeddedConversationId || undefined) + }, [embeddedConversationId]) useEffect(() => { const setLanguageFromParams = async () => { diff --git a/web/app/components/base/chat/embedded-chatbot/inputs-form/content.tsx b/web/app/components/base/chat/embedded-chatbot/inputs-form/content.tsx index dd65f0ce72..caf4e363ff 100644 --- a/web/app/components/base/chat/embedded-chatbot/inputs-form/content.tsx +++ b/web/app/components/base/chat/embedded-chatbot/inputs-form/content.tsx @@ -49,7 +49,7 @@ const InputsFormContent = ({ showTip }: Props) => {
{form.label}
{!form.required && ( -
{t('appDebug.variableTable.optional')}
+
{t('workflow.panel.optional')}
)}
)} diff --git a/web/app/components/base/chat/types.ts b/web/app/components/base/chat/types.ts index f7f7aa4dce..5b0fe1f248 100644 --- a/web/app/components/base/chat/types.ts +++ b/web/app/components/base/chat/types.ts @@ -85,7 +85,7 @@ export type OnSend = { (message: string, files: FileEntity[] | undefined, isRegenerate: boolean, lastAnswer?: ChatItem | null): void } -export type OnRegenerate = (chatItem: ChatItem) => void +export type OnRegenerate = (chatItem: ChatItem, editedQuestion?: { message: string; files?: FileEntity[] }) => void export type Callback = { onSuccess: () => void diff --git a/web/app/components/base/checkbox/index.stories.tsx b/web/app/components/base/checkbox/index.stories.tsx index ba928baa6f..3f8d4606eb 100644 --- a/web/app/components/base/checkbox/index.stories.tsx +++ b/web/app/components/base/checkbox/index.stories.tsx @@ -13,7 +13,7 @@ const createToggleItem = ( } const meta = { - title: 'Base/Input/Checkbox', + title: 'Base/Data Entry/Checkbox', component: Checkbox, parameters: { layout: 'centered', diff --git a/web/app/components/base/chip/index.stories.tsx b/web/app/components/base/chip/index.stories.tsx new file mode 100644 index 0000000000..46d91c8cd6 --- /dev/null +++ b/web/app/components/base/chip/index.stories.tsx @@ -0,0 +1,99 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import Chip, { type Item } from '.' + +const ITEMS: Item[] = [ + { value: 'all', name: 'All items' }, + { value: 'active', name: 'Active' }, + { value: 'archived', name: 'Archived' }, + { value: 'draft', name: 'Drafts' }, +] + +const meta = { + title: 'Base/Data Entry/Chip', + component: Chip, + parameters: { + docs: { + description: { + component: 'Filter chip with dropdown panel and optional left icon. Commonly used for status pickers in toolbars.', + }, + }, + }, + tags: ['autodocs'], + args: { + items: ITEMS, + value: 'all', + // eslint-disable-next-line no-empty-function + onSelect: () => {}, + // eslint-disable-next-line no-empty-function + onClear: () => {}, + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +const ChipDemo = (props: React.ComponentProps) => { + const [selection, setSelection] = useState(props.value) + + return ( +
+ setSelection(item.value)} + onClear={() => setSelection('all')} + /> +
+ Current value: {selection} +
+
+ ) +} + +export const Playground: Story = { + render: args => , + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +const [selection, setSelection] = useState('all') + + setSelection(item.value)} + onClear={() => setSelection('all')} +/> + `.trim(), + }, + }, + }, +} + +export const WithoutLeftIcon: Story = { + args: { + showLeftIcon: false, + // eslint-disable-next-line no-empty-function + onSelect: () => {}, + // eslint-disable-next-line no-empty-function + onClear: () => {}, + }, + render: args => ( + + ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/confirm/index.stories.tsx b/web/app/components/base/confirm/index.stories.tsx index 9ec21cbd50..12cb46d9e4 100644 --- a/web/app/components/base/confirm/index.stories.tsx +++ b/web/app/components/base/confirm/index.stories.tsx @@ -4,7 +4,7 @@ import Confirm from '.' import Button from '../button' const meta = { - title: 'Base/Dialog/Confirm', + title: 'Base/Feedback/Confirm', component: Confirm, parameters: { layout: 'centered', diff --git a/web/app/components/base/content-dialog/index.stories.tsx b/web/app/components/base/content-dialog/index.stories.tsx index 29b3914704..aaebcad1b7 100644 --- a/web/app/components/base/content-dialog/index.stories.tsx +++ b/web/app/components/base/content-dialog/index.stories.tsx @@ -5,7 +5,7 @@ import ContentDialog from '.' type Props = React.ComponentProps const meta = { - title: 'Base/Dialog/ContentDialog', + title: 'Base/Feedback/ContentDialog', component: ContentDialog, parameters: { layout: 'fullscreen', @@ -29,9 +29,14 @@ const meta = { control: false, description: 'Invoked when the overlay/backdrop is clicked.', }, + children: { + control: false, + table: { disable: true }, + }, }, args: { show: false, + children: null, }, } satisfies Meta @@ -92,6 +97,9 @@ const DemoWrapper = (props: Props) => { } export const Default: Story = { + args: { + children: null, + }, render: args => , } @@ -99,6 +107,7 @@ export const NarrowPanel: Story = { render: args => , args: { className: 'max-w-[420px]', + children: null, }, parameters: { docs: { diff --git a/web/app/components/base/copy-feedback/index.stories.tsx b/web/app/components/base/copy-feedback/index.stories.tsx new file mode 100644 index 0000000000..3bab620aec --- /dev/null +++ b/web/app/components/base/copy-feedback/index.stories.tsx @@ -0,0 +1,54 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import CopyFeedback, { CopyFeedbackNew } from '.' + +const meta = { + title: 'Base/Feedback/CopyFeedback', + component: CopyFeedback, + parameters: { + docs: { + description: { + component: 'Copy-to-clipboard button that shows instant feedback and a tooltip. Includes the original ActionButton wrapper and the newer ghost-button variant.', + }, + }, + }, + tags: ['autodocs'], + args: { + content: 'acc-3f92fa', + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +const CopyDemo = ({ content }: { content: string }) => { + const [value] = useState(content) + return ( +
+
+ Client ID: + {value} + +
+
+ Use the new ghost variant: + +
+
+ ) +} + +export const Playground: Story = { + render: args => , + parameters: { + docs: { + source: { + language: 'tsx', + code: ` + + + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/copy-icon/index.stories.tsx b/web/app/components/base/copy-icon/index.stories.tsx new file mode 100644 index 0000000000..5962773792 --- /dev/null +++ b/web/app/components/base/copy-icon/index.stories.tsx @@ -0,0 +1,68 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import CopyIcon from '.' + +const meta = { + title: 'Base/General/CopyIcon', + component: CopyIcon, + parameters: { + docs: { + description: { + component: 'Interactive copy-to-clipboard glyph that swaps to a checkmark once the content has been copied. Tooltips rely on the app locale.', + }, + }, + }, + tags: ['autodocs'], + args: { + content: 'https://console.dify.ai/apps/12345', + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Default: Story = { + render: args => ( +
+ Hover or click to copy the app link: + +
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +
+ Hover or click to copy the app link: + +
+ `.trim(), + }, + }, + }, +} + +export const InlineUsage: Story = { + render: args => ( +
+

+ Use the copy icon inline with labels or metadata. Clicking the icon copies the value to the clipboard and shows a success tooltip. +

+
+ Client ID + acc-3f92fa + +
+
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/corner-label/index.stories.tsx b/web/app/components/base/corner-label/index.stories.tsx new file mode 100644 index 0000000000..1592f94259 --- /dev/null +++ b/web/app/components/base/corner-label/index.stories.tsx @@ -0,0 +1,53 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import CornerLabel from '.' + +const meta = { + title: 'Base/Data Display/CornerLabel', + component: CornerLabel, + parameters: { + docs: { + description: { + component: 'Decorative label that anchors to card corners. Useful for marking “beta”, “deprecated”, or similar callouts.', + }, + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, + tags: ['autodocs'], + args: { + label: 'beta', + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Default: Story = {} + +export const OnCard: Story = { + render: args => ( +
+ +
+ Showcase how the label sits on a card header. Pair with contextual text or status information. +
+
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +
+ + ...card content... +
+ `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/date-and-time-picker/calendar/index.tsx b/web/app/components/base/date-and-time-picker/calendar/index.tsx index 00612fcb37..03dcb0eda3 100644 --- a/web/app/components/base/date-and-time-picker/calendar/index.tsx +++ b/web/app/components/base/date-and-time-picker/calendar/index.tsx @@ -8,9 +8,10 @@ const Calendar: FC = ({ selectedDate, onDateClick, wrapperClassName, + getIsDateDisabled, }) => { return
- +
{ days.map(day => = ({ day={day} selectedDate={selectedDate} onClick={onDateClick} + isDisabled={getIsDateDisabled ? getIsDateDisabled(day.date) : false} />) }
diff --git a/web/app/components/base/date-and-time-picker/calendar/item.tsx b/web/app/components/base/date-and-time-picker/calendar/item.tsx index 1da8b9b3b5..7132d7bdfb 100644 --- a/web/app/components/base/date-and-time-picker/calendar/item.tsx +++ b/web/app/components/base/date-and-time-picker/calendar/item.tsx @@ -7,6 +7,7 @@ const Item: FC = ({ day, selectedDate, onClick, + isDisabled, }) => { const { date, isCurrentMonth } = day const isSelected = selectedDate?.isSame(date, 'date') @@ -14,11 +15,12 @@ const Item: FC = ({ return (
)} @@ -273,6 +275,7 @@ const DatePicker = ({ days={days} selectedDate={selectedDate} onDateClick={handleDateSelect} + getIsDateDisabled={getIsDateDisabled} /> ) : view === ViewType.yearMonth ? ( - ) : ( + ) + } + { + ![ViewType.date, ViewType.time].includes(view) && ( + +export default meta +type Story = StoryObj + +const DatePickerPlayground = (props: DatePickerProps) => { + const [value, setValue] = useState(props.value) + + return ( +
+ setValue(undefined)} + /> +
+ Selected datetime: {value ? value.format() : 'undefined'} +
+
+ ) +} + +export const Playground: Story = { + render: args => , + args: { + ...meta.args, + needTimePicker: false, + placeholder: 'Select due date', + }, + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +const [value, setValue] = useState(getDateWithTimezone({})) + + setValue(undefined)} +/> + `.trim(), + }, + }, + }, +} + +export const DateOnly: Story = { + render: args => ( + + ), + args: { + ...meta.args, + needTimePicker: false, + placeholder: 'Select due date', + }, + parameters: { + docs: { + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/date-and-time-picker/time-picker/index.spec.tsx b/web/app/components/base/date-and-time-picker/time-picker/index.spec.tsx index 9f641d5e8b..24c7fff52f 100644 --- a/web/app/components/base/date-and-time-picker/time-picker/index.spec.tsx +++ b/web/app/components/base/date-and-time-picker/time-picker/index.spec.tsx @@ -3,6 +3,7 @@ import { fireEvent, render, screen } from '@testing-library/react' import TimePicker from './index' import dayjs from '../utils/dayjs' import { isDayjsObject } from '../utils/dayjs' +import type { TimePickerProps } from '../types' jest.mock('react-i18next', () => ({ useTranslation: () => ({ @@ -39,9 +40,10 @@ jest.mock('@/app/components/base/timezone-label', () => { }) describe('TimePicker', () => { - const baseProps = { + const baseProps: Pick = { onChange: jest.fn(), onClear: jest.fn(), + value: undefined, } beforeEach(() => { diff --git a/web/app/components/base/date-and-time-picker/time-picker/index.tsx b/web/app/components/base/date-and-time-picker/time-picker/index.tsx index 4ad9c2a625..9577a107e5 100644 --- a/web/app/components/base/date-and-time-picker/time-picker/index.tsx +++ b/web/app/components/base/date-and-time-picker/time-picker/index.tsx @@ -39,6 +39,7 @@ const TimePicker = ({ notClearable = false, triggerFullWidth = false, showTimezone = false, + placement = 'bottom-start', }: TimePickerProps) => { const { t } = useTranslation() const [isOpen, setIsOpen] = useState(false) @@ -204,7 +205,7 @@ const TimePicker = ({ {renderTrigger ? (renderTrigger({ diff --git a/web/app/components/base/date-and-time-picker/types.ts b/web/app/components/base/date-and-time-picker/types.ts index 9116e3d937..f79fb83ace 100644 --- a/web/app/components/base/date-and-time-picker/types.ts +++ b/web/app/components/base/date-and-time-picker/types.ts @@ -1,4 +1,5 @@ import type { Dayjs } from 'dayjs' +import type { Placement } from '@floating-ui/react' export enum ViewType { date = 'date', @@ -30,7 +31,8 @@ export type DatePickerProps = { renderTrigger?: (props: TriggerProps) => React.ReactNode minuteFilter?: (minutes: string[]) => string[] popupZIndexClassname?: string - notClearable?: boolean + noConfirm?: boolean + getIsDateDisabled?: (date: Dayjs) => boolean } export type DatePickerHeaderProps = { @@ -66,8 +68,8 @@ export type TimePickerProps = { popupClassName?: string notClearable?: boolean triggerFullWidth?: boolean - /** Show timezone label inline with the time picker */ showTimezone?: boolean + placement?: Placement } export type TimePickerFooterProps = { @@ -85,12 +87,14 @@ export type CalendarProps = { selectedDate: Dayjs | undefined onDateClick: (date: Dayjs) => void wrapperClassName?: string + getIsDateDisabled?: (date: Dayjs) => boolean } export type CalendarItemProps = { day: Day selectedDate: Dayjs | undefined onClick: (date: Dayjs) => void + isDisabled: boolean } export type TimeOptionsProps = { diff --git a/web/app/components/base/date-and-time-picker/utils/dayjs.ts b/web/app/components/base/date-and-time-picker/utils/dayjs.ts index 6b032b3929..b05e725985 100644 --- a/web/app/components/base/date-and-time-picker/utils/dayjs.ts +++ b/web/app/components/base/date-and-time-picker/utils/dayjs.ts @@ -161,7 +161,7 @@ export const toDayjs = (value: string | Dayjs | undefined, options: ToDayjsOptio if (format) { const parsedWithFormat = tzName - ? dayjs.tz(trimmed, format, tzName, true) + ? dayjs(trimmed, format, true).tz(tzName, true) : dayjs(trimmed, format, true) if (parsedWithFormat.isValid()) return parsedWithFormat @@ -202,7 +202,7 @@ export const toDayjs = (value: string | Dayjs | undefined, options: ToDayjsOptio const candidateFormats = formats ?? COMMON_PARSE_FORMATS for (const fmt of candidateFormats) { const parsed = tzName - ? dayjs.tz(trimmed, fmt, tzName, true) + ? dayjs(trimmed, fmt, true).tz(tzName, true) : dayjs(trimmed, fmt, true) if (parsed.isValid()) return parsed diff --git a/web/app/components/base/dialog/index.stories.tsx b/web/app/components/base/dialog/index.stories.tsx index 62ae7c00ce..f573b856d3 100644 --- a/web/app/components/base/dialog/index.stories.tsx +++ b/web/app/components/base/dialog/index.stories.tsx @@ -3,7 +3,7 @@ import { useEffect, useState } from 'react' import Dialog from '.' const meta = { - title: 'Base/Dialog/Dialog', + title: 'Base/Feedback/Dialog', component: Dialog, parameters: { layout: 'fullscreen', @@ -47,6 +47,7 @@ const meta = { args: { title: 'Manage API Keys', show: false, + children: null, }, } satisfies Meta diff --git a/web/app/components/base/divider/index.stories.tsx b/web/app/components/base/divider/index.stories.tsx new file mode 100644 index 0000000000..c634173202 --- /dev/null +++ b/web/app/components/base/divider/index.stories.tsx @@ -0,0 +1,46 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import Divider from '.' + +const meta = { + title: 'Base/Layout/Divider', + component: Divider, + parameters: { + docs: { + description: { + component: 'Lightweight separator supporting horizontal and vertical orientations with gradient or solid backgrounds.', + }, + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Horizontal: Story = {} + +export const Vertical: Story = { + render: args => ( +
+ Filters + + Tags +
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/drawer-plus/index.stories.tsx b/web/app/components/base/drawer-plus/index.stories.tsx new file mode 100644 index 0000000000..ddb39f2d63 --- /dev/null +++ b/web/app/components/base/drawer-plus/index.stories.tsx @@ -0,0 +1,124 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { fn } from 'storybook/test' +import { useState } from 'react' +import DrawerPlus from '.' + +const meta = { + title: 'Base/Feedback/DrawerPlus', + component: DrawerPlus, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Enhanced drawer built atop the base drawer component. Provides header/foot slots, mask control, and mobile breakpoints.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +type DrawerPlusProps = React.ComponentProps + +const storyBodyElement: React.JSX.Element = ( +
+

+ DrawerPlus allows rich content with sticky header/footer and responsive masking on mobile. Great for editing flows or showing execution logs. +

+
+ Body content scrolls if it exceeds the allotted height. +
+
+) + +const DrawerPlusDemo = (props: Partial) => { + const [open, setOpen] = useState(false) + + const { + body, + title, + foot, + isShow: _isShow, + onHide: _onHide, + ...rest + } = props + + const resolvedBody: React.JSX.Element = body ?? storyBodyElement + + return ( +
+ + + } + isShow={open} + onHide={() => setOpen(false)} + title={title ?? 'Workflow execution details'} + body={resolvedBody} + foot={foot} + /> +
+ ) +} + +export const Playground: Story = { + render: args => , + args: { + isShow: false, + onHide: fn(), + title: 'Edit configuration', + body: storyBodyElement, + }, +} + +export const WithFooter: Story = { + render: (args) => { + const FooterDemo = () => { + const [open, setOpen] = useState(false) + return ( +
+ + + setOpen(false)} + title={args.title ?? 'Workflow execution details'} + body={args.body ?? ( +
+

Populate the body with scrollable content. Footer stays pinned.

+
+ )} + foot={ +
+ + +
+ } + /> +
+ ) + } + return + }, + args: { + isShow: false, + onHide: fn(), + title: 'Edit configuration!', + body: storyBodyElement, + }, +} diff --git a/web/app/components/base/drawer/index.stories.tsx b/web/app/components/base/drawer/index.stories.tsx new file mode 100644 index 0000000000..e7711bc1a2 --- /dev/null +++ b/web/app/components/base/drawer/index.stories.tsx @@ -0,0 +1,114 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { fn } from 'storybook/test' +import { useState } from 'react' +import Drawer from '.' + +const meta = { + title: 'Base/Feedback/Drawer', + component: Drawer, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Sliding panel built on Headless UI dialog primitives. Supports optional mask, custom footer, and close behaviour.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +const DrawerDemo = (props: React.ComponentProps) => { + const [open, setOpen] = useState(false) + + return ( +
+ + + setOpen(false)} + title={props.title ?? 'Edit configuration'} + description={props.description ?? 'Adjust settings in the side panel and save.'} + footer={props.footer ?? undefined} + > +
+

+ This example renders arbitrary content inside the drawer body. Use it for contextual forms, settings, or informational panels. +

+
+ Content area +
+
+
+
+ ) +} + +export const Playground: Story = { + render: args => , + args: { + children: null, + isOpen: false, + onClose: fn(), + }, + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +const [open, setOpen] = useState(false) + + setOpen(false)} + title="Edit configuration" + description="Adjust settings in the side panel and save." +> + ... + + `.trim(), + }, + }, + }, +} + +export const CustomFooter: Story = { + render: args => ( + + + +
+ } + /> + ), + args: { + children: null, + isOpen: false, + onClose: fn(), + }, + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +}> + ... + + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/dropdown/index.stories.tsx b/web/app/components/base/dropdown/index.stories.tsx new file mode 100644 index 0000000000..da70730744 --- /dev/null +++ b/web/app/components/base/dropdown/index.stories.tsx @@ -0,0 +1,85 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { fn } from 'storybook/test' +import { useState } from 'react' +import Dropdown, { type Item } from '.' + +const PRIMARY_ITEMS: Item[] = [ + { value: 'rename', text: 'Rename' }, + { value: 'duplicate', text: 'Duplicate' }, +] + +const SECONDARY_ITEMS: Item[] = [ + { value: 'archive', text: Archive }, + { value: 'delete', text: Delete }, +] + +const meta = { + title: 'Base/Navigation/Dropdown', + component: Dropdown, + parameters: { + docs: { + description: { + component: 'Small contextual menu with optional destructive section. Uses portal positioning utilities for precise placement.', + }, + }, + }, + tags: ['autodocs'], + args: { + items: PRIMARY_ITEMS, + secondItems: SECONDARY_ITEMS, + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +const DropdownDemo = (props: React.ComponentProps) => { + const [lastAction, setLastAction] = useState('None') + + return ( +
+ { + setLastAction(String(item.value)) + props.onSelect?.(item) + }} + /> +
+ Last action: {lastAction} +
+
+ ) +} + +export const Playground: Story = { + render: args => , + args: { + items: PRIMARY_ITEMS, + secondItems: SECONDARY_ITEMS, + onSelect: fn(), + }, +} + +export const CustomTrigger: Story = { + render: args => ( + ( + + )} + /> + ), + args: { + items: PRIMARY_ITEMS, + onSelect: fn(), + }, +} diff --git a/web/app/components/base/effect/index.stories.tsx b/web/app/components/base/effect/index.stories.tsx new file mode 100644 index 0000000000..a7f316fe7e --- /dev/null +++ b/web/app/components/base/effect/index.stories.tsx @@ -0,0 +1,39 @@ +/* eslint-disable tailwindcss/classnames-order */ +import type { Meta, StoryObj } from '@storybook/nextjs' +import Effect from '.' + +const meta = { + title: 'Base/Other/Effect', + component: Effect, + parameters: { + docs: { + description: { + component: 'Blurred circular glow used as a decorative background accent. Combine with relatively positioned containers.', + }, + source: { + language: 'tsx', + code: ` +
+ +
+ `.trim(), + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = { + render: () => ( +
+ + +
+ Accent glow +
+
+ ), +} diff --git a/web/app/components/base/emoji-picker/Inner.stories.tsx b/web/app/components/base/emoji-picker/Inner.stories.tsx new file mode 100644 index 0000000000..5341d63ee3 --- /dev/null +++ b/web/app/components/base/emoji-picker/Inner.stories.tsx @@ -0,0 +1,57 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import EmojiPickerInner from './Inner' + +const meta = { + title: 'Base/Data Entry/EmojiPickerInner', + component: EmojiPickerInner, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Core emoji grid with search and style swatches. Use this when embedding the selector inline without a modal frame.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +const InnerDemo = () => { + const [selection, setSelection] = useState<{ emoji: string; background: string } | null>(null) + + return ( +
+ setSelection({ emoji, background })} + className="flex-1 overflow-hidden rounded-xl border border-divider-subtle bg-white" + /> +
+
Latest selection
+
+          {selection ? JSON.stringify(selection, null, 2) : 'Tap an emoji to set background options.'}
+        
+
+
+ ) +} + +export const Playground: Story = { + render: () => , + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +const [selection, setSelection] = useState<{ emoji: string; background: string } | null>(null) + +return ( + setSelection({ emoji, background })} /> +) + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/emoji-picker/index.stories.tsx b/web/app/components/base/emoji-picker/index.stories.tsx new file mode 100644 index 0000000000..7c9b07f138 --- /dev/null +++ b/web/app/components/base/emoji-picker/index.stories.tsx @@ -0,0 +1,91 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import EmojiPicker from '.' + +const meta = { + title: 'Base/Data Entry/EmojiPicker', + component: EmojiPicker, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Modal-based emoji selector that powers the icon picker. Supports search, background swatches, and confirmation callbacks.', + }, + }, + nextjs: { + appDirectory: true, + navigation: { + pathname: '/apps/demo-app/emoji-picker', + params: { appId: 'demo-app' }, + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +const EmojiPickerDemo = () => { + const [open, setOpen] = useState(false) + const [selection, setSelection] = useState<{ emoji: string; background: string } | null>(null) + + return ( +
+ + +
+
Selection preview
+
+          {selection ? JSON.stringify(selection, null, 2) : 'No emoji selected yet.'}
+        
+
+ + {open && ( + { + setSelection({ emoji, background }) + setOpen(false) + }} + onClose={() => setOpen(false)} + /> + )} +
+ ) +} + +export const Playground: Story = { + render: () => , + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +const [open, setOpen] = useState(false) +const [selection, setSelection] = useState<{ emoji: string; background: string } | null>(null) + +return ( + <> + + {open && ( + { + setSelection({ emoji, background }) + setOpen(false) + }} + onClose={() => setOpen(false)} + /> + )} + +) + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/features/index.stories.tsx b/web/app/components/base/features/index.stories.tsx new file mode 100644 index 0000000000..f1eaf048b8 --- /dev/null +++ b/web/app/components/base/features/index.stories.tsx @@ -0,0 +1,73 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import { FeaturesProvider } from '.' +import NewFeaturePanel from './new-feature-panel' +import type { Features } from './types' + +const DEFAULT_FEATURES: Features = { + moreLikeThis: { enabled: false }, + opening: { enabled: false }, + suggested: { enabled: false }, + text2speech: { enabled: false }, + speech2text: { enabled: false }, + citation: { enabled: false }, + moderation: { enabled: false }, + file: { enabled: false }, + annotationReply: { enabled: false }, +} + +const meta = { + title: 'Base/Other/FeaturesProvider', + component: FeaturesProvider, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Zustand-backed provider used for feature toggles. Paired with `NewFeaturePanel` for workflow settings.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +const FeaturesDemo = () => { + const [show, setShow] = useState(true) + const [features, setFeatures] = useState(DEFAULT_FEATURES) + + return ( + +
+
+
Feature toggles preview
+
+ +
+
+
+ + setFeatures(prev => ({ ...prev, ...next }))} + onClose={() => setShow(false)} + /> +
+ ) +} + +export const Playground: Story = { + render: () => , + args: { + children: null, + }, +} diff --git a/web/app/components/base/features/new-feature-panel/conversation-opener/modal.tsx b/web/app/components/base/features/new-feature-panel/conversation-opener/modal.tsx index f0af893f0d..8ab007e66b 100644 --- a/web/app/components/base/features/new-feature-panel/conversation-opener/modal.tsx +++ b/web/app/components/base/features/new-feature-panel/conversation-opener/modal.tsx @@ -1,4 +1,4 @@ -import React, { useCallback, useEffect, useState } from 'react' +import React, { useCallback, useEffect, useMemo, useState } from 'react' import { useTranslation } from 'react-i18next' import { useBoolean } from 'ahooks' import { produce } from 'immer' @@ -45,7 +45,13 @@ const OpeningSettingModal = ({ const [isShowConfirmAddVar, { setTrue: showConfirmAddVar, setFalse: hideConfirmAddVar }] = useBoolean(false) const [notIncludeKeys, setNotIncludeKeys] = useState([]) + const isSaveDisabled = useMemo(() => !tempValue.trim(), [tempValue]) + const handleSave = useCallback((ignoreVariablesCheck?: boolean) => { + // Prevent saving if opening statement is empty + if (isSaveDisabled) + return + if (!ignoreVariablesCheck) { const keys = getInputKeys(tempValue) const promptKeys = promptVariables.map(item => item.key) @@ -75,7 +81,7 @@ const OpeningSettingModal = ({ } }) onSave(newOpening) - }, [data, onSave, promptVariables, workflowVariables, showConfirmAddVar, tempSuggestedQuestions, tempValue]) + }, [data, onSave, promptVariables, workflowVariables, showConfirmAddVar, tempSuggestedQuestions, tempValue, isSaveDisabled]) const cancelAutoAddVar = useCallback(() => { hideConfirmAddVar() @@ -217,6 +223,7 @@ const OpeningSettingModal = ({ diff --git a/web/app/components/base/features/new-feature-panel/file-upload/setting-modal.tsx b/web/app/components/base/features/new-feature-panel/file-upload/setting-modal.tsx index 92f93b8819..6ebbc05ae5 100644 --- a/web/app/components/base/features/new-feature-panel/file-upload/setting-modal.tsx +++ b/web/app/components/base/features/new-feature-panel/file-upload/setting-modal.tsx @@ -37,7 +37,7 @@ const FileUploadSettings = ({ {children} -
+
onOpen(false)} diff --git a/web/app/components/base/features/new-feature-panel/moderation/moderation-setting-modal.tsx b/web/app/components/base/features/new-feature-panel/moderation/moderation-setting-modal.tsx index 095137203b..ff45a7ea4c 100644 --- a/web/app/components/base/features/new-feature-panel/moderation/moderation-setting-modal.tsx +++ b/web/app/components/base/features/new-feature-panel/moderation/moderation-setting-modal.tsx @@ -26,6 +26,7 @@ import { CustomConfigurationStatusEnum } from '@/app/components/header/account-s import cn from '@/utils/classnames' import { noop } from 'lodash-es' import { useDocLink } from '@/context/i18n' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' const systemTypes = ['openai_moderation', 'keywords', 'api'] @@ -55,7 +56,7 @@ const ModerationSettingModal: FC = ({ const { setShowAccountSettingModal } = useModalContext() const handleOpenSettingsModal = () => { setShowAccountSettingModal({ - payload: 'provider', + payload: ACCOUNT_SETTING_TAB.PROVIDER, onCancelCallback: () => { mutate() }, diff --git a/web/app/components/base/file-icon/index.stories.tsx b/web/app/components/base/file-icon/index.stories.tsx new file mode 100644 index 0000000000..dbd3e13fea --- /dev/null +++ b/web/app/components/base/file-icon/index.stories.tsx @@ -0,0 +1,79 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import FileIcon from '.' + +const meta = { + title: 'Base/General/FileIcon', + component: FileIcon, + parameters: { + docs: { + description: { + component: 'Maps a file extension to the appropriate SVG icon used across upload and attachment surfaces.', + }, + }, + }, + tags: ['autodocs'], + argTypes: { + type: { + control: 'text', + description: 'File extension or identifier used to resolve the icon.', + }, + className: { + control: 'text', + description: 'Custom classes passed to the SVG wrapper.', + }, + }, + args: { + type: 'pdf', + className: 'h-10 w-10', + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Default: Story = { + render: args => ( +
+ + Extension: {args.type} +
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, +} + +export const Gallery: Story = { + render: () => { + const examples = ['pdf', 'docx', 'xlsx', 'csv', 'json', 'md', 'txt', 'html', 'notion', 'unknown'] + return ( +
+ {examples.map(type => ( +
+ + {type} +
+ ))} +
+ ) + }, + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +{['pdf','docx','xlsx','csv','json','md','txt','html','notion','unknown'].map(type => ( + +))} + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/file-uploader/file-image-render.stories.tsx b/web/app/components/base/file-uploader/file-image-render.stories.tsx new file mode 100644 index 0000000000..132c0b61a3 --- /dev/null +++ b/web/app/components/base/file-uploader/file-image-render.stories.tsx @@ -0,0 +1,32 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import FileImageRender from './file-image-render' + +const SAMPLE_IMAGE = 'data:image/svg+xml;utf8,Preview' + +const meta = { + title: 'Base/General/FileImageRender', + component: FileImageRender, + parameters: { + docs: { + description: { + component: 'Renders image previews inside a bordered frame. Often used in upload galleries and logs.', + }, + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, + tags: ['autodocs'], + args: { + imageUrl: SAMPLE_IMAGE, + className: 'h-32 w-52', + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/file-uploader/file-list.stories.tsx b/web/app/components/base/file-uploader/file-list.stories.tsx new file mode 100644 index 0000000000..89c0568735 --- /dev/null +++ b/web/app/components/base/file-uploader/file-list.stories.tsx @@ -0,0 +1,96 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import { FileList } from './file-uploader-in-chat-input/file-list' +import type { FileEntity } from './types' +import { SupportUploadFileTypes } from '@/app/components/workflow/types' +import { TransferMethod } from '@/types/app' + +const SAMPLE_IMAGE = 'data:image/svg+xml;utf8,IMG' + +const filesSample: FileEntity[] = [ + { + id: '1', + name: 'Project Brief.pdf', + size: 256000, + type: 'application/pdf', + progress: 100, + transferMethod: TransferMethod.local_file, + supportFileType: SupportUploadFileTypes.document, + url: '', + }, + { + id: '2', + name: 'Design.png', + size: 128000, + type: 'image/png', + progress: 100, + transferMethod: TransferMethod.local_file, + supportFileType: SupportUploadFileTypes.image, + base64Url: SAMPLE_IMAGE, + }, + { + id: '3', + name: 'Voiceover.mp3', + size: 512000, + type: 'audio/mpeg', + progress: 45, + transferMethod: TransferMethod.remote_url, + supportFileType: SupportUploadFileTypes.audio, + url: '', + }, +] + +const meta = { + title: 'Base/Data Display/FileList', + component: FileList, + parameters: { + docs: { + description: { + component: 'Renders a responsive gallery of uploaded files, handling icons, previews, and progress states.', + }, + }, + }, + tags: ['autodocs'], + args: { + files: filesSample, + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +const FileListPlayground = (args: React.ComponentProps) => { + const [items, setItems] = useState(args.files || []) + + return ( +
+ setItems(list => list.filter(file => file.id !== fileId))} + /> +
+ ) +} + +export const Playground: Story = { + render: args => , + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +const [files, setFiles] = useState(initialFiles) + + setFiles(list => list.filter(file => file.id !== id))} /> + `.trim(), + }, + }, + }, +} + +export const UploadStates: Story = { + args: { + files: filesSample.map(file => ({ ...file, progress: file.id === '3' ? 45 : 100 })), + }, +} diff --git a/web/app/components/base/file-uploader/file-type-icon.stories.tsx b/web/app/components/base/file-uploader/file-type-icon.stories.tsx new file mode 100644 index 0000000000..c317afab68 --- /dev/null +++ b/web/app/components/base/file-uploader/file-type-icon.stories.tsx @@ -0,0 +1,38 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import FileTypeIcon from './file-type-icon' +import { FileAppearanceTypeEnum } from './types' + +const meta = { + title: 'Base/General/FileTypeIcon', + component: FileTypeIcon, + parameters: { + docs: { + description: { + component: 'Displays the appropriate icon and accent colour for a file appearance type. Useful in lists and attachments.', + }, + }, + }, + tags: ['autodocs'], + args: { + type: FileAppearanceTypeEnum.document, + size: 'md', + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} + +export const Gallery: Story = { + render: () => ( +
+ {Object.values(FileAppearanceTypeEnum).map(type => ( +
+ + {type} +
+ ))} +
+ ), +} diff --git a/web/app/components/base/file-uploader/file-uploader-in-attachment/index.stories.tsx b/web/app/components/base/file-uploader/file-uploader-in-attachment/index.stories.tsx new file mode 100644 index 0000000000..dabb8b6615 --- /dev/null +++ b/web/app/components/base/file-uploader/file-uploader-in-attachment/index.stories.tsx @@ -0,0 +1,110 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { fn } from 'storybook/test' +import { useState } from 'react' +import FileUploaderInAttachmentWrapper from './index' +import type { FileEntity } from '../types' +import type { FileUpload } from '@/app/components/base/features/types' +import { PreviewMode } from '@/app/components/base/features/types' +import { TransferMethod } from '@/types/app' +import { ToastProvider } from '@/app/components/base/toast' +import { SupportUploadFileTypes } from '@/app/components/workflow/types' + +const SAMPLE_IMAGE = 'data:image/svg+xml;utf8,IMG' + +const mockFiles: FileEntity[] = [ + { + id: 'file-1', + name: 'Requirements.pdf', + size: 256000, + type: 'application/pdf', + progress: 100, + transferMethod: TransferMethod.local_file, + supportFileType: SupportUploadFileTypes.document, + url: '', + }, + { + id: 'file-2', + name: 'Interface.png', + size: 128000, + type: 'image/png', + progress: 100, + transferMethod: TransferMethod.local_file, + supportFileType: SupportUploadFileTypes.image, + base64Url: SAMPLE_IMAGE, + }, + { + id: 'file-3', + name: 'Voiceover.mp3', + size: 512000, + type: 'audio/mpeg', + progress: 35, + transferMethod: TransferMethod.remote_url, + supportFileType: SupportUploadFileTypes.audio, + url: '', + }, +] + +const fileConfig: FileUpload = { + enabled: true, + allowed_file_upload_methods: [TransferMethod.local_file, TransferMethod.remote_url], + allowed_file_types: ['document', 'image', 'audio'], + number_limits: 5, + preview_config: { mode: PreviewMode.NewPage, file_type_list: ['pdf', 'png'] }, +} + +const meta = { + title: 'Base/Data Entry/FileUploaderInAttachment', + component: FileUploaderInAttachmentWrapper, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Attachment-style uploader that supports local files and remote links. Demonstrates upload progress, re-upload, and preview actions.', + }, + }, + nextjs: { + appDirectory: true, + navigation: { + pathname: '/apps/demo-app/uploads', + params: { appId: 'demo-app' }, + }, + }, + }, + tags: ['autodocs'], + args: { + fileConfig, + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +const AttachmentDemo = (props: React.ComponentProps) => { + const [files, setFiles] = useState(mockFiles) + + return ( + +
+ +
+
+ ) +} + +export const Playground: Story = { + render: args => , + args: { + onChange: fn(), + }, +} + +export const Disabled: Story = { + render: args => , + args: { + onChange: fn(), + }, +} diff --git a/web/app/components/base/file-uploader/file-uploader-in-chat-input/index.stories.tsx b/web/app/components/base/file-uploader/file-uploader-in-chat-input/index.stories.tsx new file mode 100644 index 0000000000..f4165f64cb --- /dev/null +++ b/web/app/components/base/file-uploader/file-uploader-in-chat-input/index.stories.tsx @@ -0,0 +1,95 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import FileUploaderInChatInput from '.' +import { FileContextProvider } from '../store' +import type { FileEntity } from '../types' +import type { FileUpload } from '@/app/components/base/features/types' +import { SupportUploadFileTypes } from '@/app/components/workflow/types' +import { TransferMethod } from '@/types/app' +import { FileList } from '../file-uploader-in-chat-input/file-list' +import { ToastProvider } from '@/app/components/base/toast' + +const mockFiles: FileEntity[] = [ + { + id: '1', + name: 'Dataset.csv', + size: 64000, + type: 'text/csv', + progress: 100, + transferMethod: TransferMethod.local_file, + supportFileType: SupportUploadFileTypes.document, + }, +] + +const chatUploadConfig: FileUpload = { + enabled: true, + allowed_file_upload_methods: [TransferMethod.local_file, TransferMethod.remote_url], + allowed_file_types: ['image', 'document'], + number_limits: 3, +} + +type ChatInputDemoProps = React.ComponentProps & { + initialFiles?: FileEntity[] +} + +const ChatInputDemo = ({ initialFiles = mockFiles, ...props }: ChatInputDemoProps) => { + const [files, setFiles] = useState(initialFiles) + + return ( + + +
+
Simulated chat input
+
+ +
Type a message...
+
+
+ +
+
+
+
+ ) +} + +const meta = { + title: 'Base/Data Entry/FileUploaderInChatInput', + component: ChatInputDemo, + parameters: { + docs: { + description: { + component: 'Attachment trigger suited for chat inputs. Demonstrates integration with the shared file store and preview list.', + }, + }, + nextjs: { + appDirectory: true, + navigation: { + pathname: '/chats/demo', + params: { appId: 'demo-app' }, + }, + }, + }, + tags: ['autodocs'], + args: { + fileConfig: chatUploadConfig, + initialFiles: mockFiles, + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = { + render: args => , +} + +export const RemoteOnly: Story = { + args: { + fileConfig: { + ...chatUploadConfig, + allowed_file_upload_methods: [TransferMethod.remote_url], + }, + initialFiles: [], + }, +} diff --git a/web/app/components/base/file-uploader/hooks.ts b/web/app/components/base/file-uploader/hooks.ts index 3f4d4a6b06..521ecdbafd 100644 --- a/web/app/components/base/file-uploader/hooks.ts +++ b/web/app/components/base/file-uploader/hooks.ts @@ -11,6 +11,7 @@ import type { FileEntity } from './types' import { useFileStore } from './store' import { fileUpload, + getFileUploadErrorMessage, getSupportFileType, isAllowedFileExtension, } from './utils' @@ -172,8 +173,9 @@ export const useFile = (fileConfig: FileUpload) => { onSuccessCallback: (res) => { handleUpdateFile({ ...uploadingFile, uploadedId: res.id, progress: 100 }) }, - onErrorCallback: () => { - notify({ type: 'error', message: t('common.fileUploader.uploadFromComputerUploadError') }) + onErrorCallback: (error?: any) => { + const errorMessage = getFileUploadErrorMessage(error, t('common.fileUploader.uploadFromComputerUploadError'), t) + notify({ type: 'error', message: errorMessage }) handleUpdateFile({ ...uploadingFile, progress: -1 }) }, }, !!params.token) @@ -279,8 +281,9 @@ export const useFile = (fileConfig: FileUpload) => { onSuccessCallback: (res) => { handleUpdateFile({ ...uploadingFile, uploadedId: res.id, progress: 100 }) }, - onErrorCallback: () => { - notify({ type: 'error', message: t('common.fileUploader.uploadFromComputerUploadError') }) + onErrorCallback: (error?: any) => { + const errorMessage = getFileUploadErrorMessage(error, t('common.fileUploader.uploadFromComputerUploadError'), t) + notify({ type: 'error', message: errorMessage }) handleUpdateFile({ ...uploadingFile, progress: -1 }) }, }, !!params.token) @@ -302,9 +305,23 @@ export const useFile = (fileConfig: FileUpload) => { const text = e.clipboardData?.getData('text/plain') if (file && !text) { e.preventDefault() + + const allowedFileTypes = fileConfig.allowed_file_types || [] + const fileType = getSupportFileType(file.name, file.type, allowedFileTypes?.includes(SupportUploadFileTypes.custom)) + const isFileTypeAllowed = allowedFileTypes.includes(fileType) + + // Check if file type is in allowed list + if (!isFileTypeAllowed || !fileConfig.enabled) { + notify({ + type: 'error', + message: t('common.fileUploader.fileExtensionNotSupport'), + }) + return + } + handleLocalFileUpload(file) } - }, [handleLocalFileUpload]) + }, [handleLocalFileUpload, fileConfig, notify, t]) const [isDragActive, setIsDragActive] = useState(false) const handleDragFileEnter = useCallback((e: React.DragEvent) => { diff --git a/web/app/components/base/file-uploader/utils.ts b/web/app/components/base/file-uploader/utils.ts index 9c217646ca..e0a1a0250f 100644 --- a/web/app/components/base/file-uploader/utils.ts +++ b/web/app/components/base/file-uploader/utils.ts @@ -7,11 +7,30 @@ import { SupportUploadFileTypes } from '@/app/components/workflow/types' import type { FileResponse } from '@/types/workflow' import { TransferMethod } from '@/types/app' +/** + * Get appropriate error message for file upload errors + * @param error - The error object from upload failure + * @param defaultMessage - Default error message to use if no specific error is matched + * @param t - Translation function + * @returns Localized error message + */ +export const getFileUploadErrorMessage = (error: any, defaultMessage: string, t: (key: string) => string): string => { + const errorCode = error?.response?.code + + if (errorCode === 'forbidden') + return error?.response?.message + + if (errorCode === 'file_extension_blocked') + return t('common.fileUploader.fileExtensionBlocked') + + return defaultMessage +} + type FileUploadParams = { file: File onProgressCallback: (progress: number) => void onSuccessCallback: (res: { id: string }) => void - onErrorCallback: () => void + onErrorCallback: (error?: any) => void } type FileUpload = (v: FileUploadParams, isPublic?: boolean, url?: string) => void export const fileUpload: FileUpload = ({ @@ -37,8 +56,8 @@ export const fileUpload: FileUpload = ({ .then((res: { id: string }) => { onSuccessCallback(res) }) - .catch(() => { - onErrorCallback() + .catch((error) => { + onErrorCallback(error) }) } diff --git a/web/app/components/base/float-right-container/index.stories.tsx b/web/app/components/base/float-right-container/index.stories.tsx new file mode 100644 index 0000000000..18173f086d --- /dev/null +++ b/web/app/components/base/float-right-container/index.stories.tsx @@ -0,0 +1,74 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { fn } from 'storybook/test' +import { useState } from 'react' +import FloatRightContainer from '.' + +const meta = { + title: 'Base/Feedback/FloatRightContainer', + component: FloatRightContainer, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Wrapper that renders content in a drawer on mobile and inline on desktop. Useful for responsive settings panels.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +const ContainerDemo = () => { + const [open, setOpen] = useState(false) + const [isMobile, setIsMobile] = useState(false) + + return ( +
+
+ + +
+ + setOpen(false)} + title="Responsive panel" + description="Switch the toggle to see drawer vs inline behaviour." + mask + > +
+

Panel Content

+

+ On desktop, this block renders inline when `isOpen` is true. On mobile it appears inside the drawer wrapper. +

+
+
+
+ ) +} + +export const Playground: Story = { + render: () => , + args: { + isMobile: false, + isOpen: false, + onClose: fn(), + children: null, + }, +} diff --git a/web/app/components/base/form/components/base/base-field.tsx b/web/app/components/base/form/components/base/base-field.tsx index be74d701ba..db57059b82 100644 --- a/web/app/components/base/form/components/base/base-field.tsx +++ b/web/app/components/base/form/components/base/base-field.tsx @@ -4,7 +4,6 @@ import { FormItemValidateStatusEnum, FormTypeEnum } from '@/app/components/base/ import Input from '@/app/components/base/input' import Radio from '@/app/components/base/radio' import RadioE from '@/app/components/base/radio/ui' -import { PortalSelect } from '@/app/components/base/select' import PureSelect from '@/app/components/base/select/pure' import Tooltip from '@/app/components/base/tooltip' import { useRenderI18nObject } from '@/hooks/use-i18n' @@ -161,7 +160,7 @@ const BaseField = ({ const value = useStore(field.form.store, s => s.values[field.name]) - const { data: dynamicOptionsData, isLoading: isDynamicOptionsLoading } = useTriggerPluginDynamicOptions( + const { data: dynamicOptionsData, isLoading: isDynamicOptionsLoading, error: dynamicOptionsError } = useTriggerPluginDynamicOptions( dynamicSelectParams || { plugin_id: '', provider: '', @@ -176,7 +175,7 @@ const BaseField = ({ if (!dynamicOptionsData?.options) return [] return dynamicOptionsData.options.map(option => ({ - name: getTranslatedContent({ content: option.label, render: renderI18nObject }), + label: getTranslatedContent({ content: option.label, render: renderI18nObject }), value: option.value, })) }, [dynamicOptionsData, renderI18nObject]) @@ -250,17 +249,20 @@ const BaseField = ({ } { formItemType === FormTypeEnum.dynamicSelect && ( - field.handleChange(item.value)} - readonly={disabled || isDynamicOptionsLoading} + onChange={field.handleChange} + disabled={disabled || isDynamicOptionsLoading} placeholder={ isDynamicOptionsLoading - ? 'Loading options...' - : translatedPlaceholder || 'Select an option' + ? t('common.dynamicSelect.loading') + : translatedPlaceholder } - items={dynamicOptions} - popupClassName="z-[9999]" + {...(dynamicOptionsError ? { popupProps: { title: t('common.dynamicSelect.error'), titleClassName: 'text-text-destructive-secondary' } } + : (!dynamicOptions.length ? { popupProps: { title: t('common.dynamicSelect.noData') } } : {}))} + triggerPopupSameWidth + multiple={multiple} /> ) } diff --git a/web/app/components/base/form/components/field/select.tsx b/web/app/components/base/form/components/field/select.tsx index dee047e2eb..8a36a49510 100644 --- a/web/app/components/base/form/components/field/select.tsx +++ b/web/app/components/base/form/components/field/select.tsx @@ -11,7 +11,9 @@ type SelectFieldProps = { options: Option[] onChange?: (value: string) => void className?: string -} & Omit +} & Omit & { + multiple?: false +} const SelectField = ({ label, diff --git a/web/app/components/base/form/components/field/variable-or-constant-input.tsx b/web/app/components/base/form/components/field/variable-or-constant-input.tsx index a07e356fa2..b8a96c5401 100644 --- a/web/app/components/base/form/components/field/variable-or-constant-input.tsx +++ b/web/app/components/base/form/components/field/variable-or-constant-input.tsx @@ -1,5 +1,5 @@ import type { ChangeEvent } from 'react' -import { useState } from 'react' +import { useCallback, useState } from 'react' import { RiEditLine } from '@remixicon/react' import cn from '@/utils/classnames' import SegmentedControl from '@/app/components/base/segmented-control' @@ -33,9 +33,9 @@ const VariableOrConstantInputField = ({ }, ] - const handleVariableOrConstantChange = (value: string) => { + const handleVariableOrConstantChange = useCallback((value: string) => { setVariableType(value) - } + }, [setVariableType]) const handleVariableValueChange = () => { console.log('Variable value changed') diff --git a/web/app/components/base/form/index.stories.tsx b/web/app/components/base/form/index.stories.tsx new file mode 100644 index 0000000000..f170cb4771 --- /dev/null +++ b/web/app/components/base/form/index.stories.tsx @@ -0,0 +1,559 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useMemo, useState } from 'react' +import { useStore } from '@tanstack/react-form' +import ContactFields from './form-scenarios/demo/contact-fields' +import { demoFormOpts } from './form-scenarios/demo/shared-options' +import { ContactMethods, UserSchema } from './form-scenarios/demo/types' +import BaseForm from './components/base/base-form' +import type { FormSchema } from './types' +import { FormTypeEnum } from './types' +import { type FormStoryRender, FormStoryWrapper } from '../../../../.storybook/utils/form-story-wrapper' +import Button from '../button' +import { TransferMethod } from '@/types/app' +import { PreviewMode } from '@/app/components/base/features/types' + +const FormStoryHost = () => null + +const meta = { + title: 'Base/Data Entry/AppForm', + component: FormStoryHost, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Helper utilities built on top of `@tanstack/react-form` that power form rendering across Dify. These stories demonstrate the `useAppForm` hook, field primitives, conditional visibility, and custom actions.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +type AppFormInstance = Parameters[0] +type ContactFieldsProps = React.ComponentProps +type ContactFieldsFormApi = ContactFieldsProps['form'] + +type PlaygroundFormFieldsProps = { + form: AppFormInstance + status: string +} + +const PlaygroundFormFields = ({ form, status }: PlaygroundFormFieldsProps) => { + type PlaygroundFormValues = typeof demoFormOpts.defaultValues + const name = useStore(form.store, state => (state.values as PlaygroundFormValues).name) + const contactFormApi = form as ContactFieldsFormApi + + return ( +
{ + event.preventDefault() + event.stopPropagation() + form.handleSubmit() + }} + > + ( + + )} + /> + ( + + )} + /> + ( + + )} + /> + + {!!name && } + + + + + +

{status}

+ + ) +} + +const FormPlayground = () => { + const [status, setStatus] = useState('Fill in the form and submit to see results.') + + return ( + { + const result = UserSchema.safeParse(formValue as typeof demoFormOpts.defaultValues) + if (!result.success) + return result.error.issues[0].message + return undefined + }, + }, + onSubmit: () => { + setStatus('Successfully saved profile.') + }, + }} + > + {form => } + + ) +} + +const mockFileUploadConfig = { + enabled: true, + allowed_file_extensions: ['pdf', 'png'], + allowed_file_upload_methods: [TransferMethod.local_file, TransferMethod.remote_url], + number_limits: 3, + preview_config: { + mode: PreviewMode.CurrentPage, + file_type_list: ['pdf', 'png'], + }, +} + +const mockFieldDefaults = { + headline: 'Dify App', + description: 'Streamline your AI workflows with configurable building blocks.', + category: 'workbench', + allowNotifications: true, + dailyLimit: 40, + attachment: [], +} + +const FieldGallery = () => { + const selectOptions = useMemo(() => [ + { value: 'workbench', label: 'Workbench' }, + { value: 'playground', label: 'Playground' }, + { value: 'production', label: 'Production' }, + ], []) + + return ( + + {form => ( +
{ + event.preventDefault() + event.stopPropagation() + form.handleSubmit() + }} + > + ( + + )} + /> + ( + + )} + /> + ( + + )} + /> + ( + + )} + /> + ( + + )} + /> + ( + + )} + /> +
+ + + +
+ + )} +
+ ) +} + +const conditionalSchemas: FormSchema[] = [ + { + type: FormTypeEnum.select, + name: 'channel', + label: 'Preferred channel', + required: true, + default: 'email', + options: ContactMethods, + }, + { + type: FormTypeEnum.textInput, + name: 'contactEmail', + label: 'Email address', + required: true, + placeholder: 'user@example.com', + show_on: [{ variable: 'channel', value: 'email' }], + }, + { + type: FormTypeEnum.textInput, + name: 'contactPhone', + label: 'Phone number', + required: true, + placeholder: '+1 555 123 4567', + show_on: [{ variable: 'channel', value: 'phone' }], + }, + { + type: FormTypeEnum.boolean, + name: 'optIn', + label: 'Opt in to marketing messages', + required: false, + }, +] + +const ConditionalFieldsStory = () => { + const [values, setValues] = useState>({ + channel: 'email', + optIn: false, + }) + + return ( +
+
+ { + setValues(prev => ({ + ...prev, + [field]: value, + })) + }} + /> +
+ +
+ ) +} + +const CustomActionsStory = () => { + return ( + { + const nextValues = value as { datasetName?: string } + if (!nextValues.datasetName || nextValues.datasetName.length < 3) + return 'Dataset name must contain at least 3 characters.' + return undefined + }, + }, + }} + > + {form => ( +
{ + event.preventDefault() + event.stopPropagation() + form.handleSubmit() + }} + > + ( + + )} + /> + ( + + )} + /> + + ( +
+ + + +
+ )} + /> +
+ + )} +
+ ) +} + +export const Playground: Story = { + render: () => , + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +const form = useAppForm({ + ...demoFormOpts, + validators: { + onSubmit: ({ value }) => UserSchema.safeParse(value).success ? undefined : 'Validation failed', + }, + onSubmit: ({ value }) => { + setStatus(\`Successfully saved profile for \${value.name}\`) + }, +}) + +return ( +
+ + {field => } + + + {field => } + + + {field => } + + {!!form.store.state.values.name && } + + + + +) + `.trim(), + }, + }, + }, +} + +export const FieldExplorer: Story = { + render: () => , + parameters: { + nextjs: { + appDirectory: true, + navigation: { + pathname: '/apps/demo-app/form', + params: { appId: 'demo-app' }, + }, + }, + docs: { + source: { + language: 'tsx', + code: ` +const form = useAppForm({ + defaultValues: { + headline: 'Dify App', + description: 'Streamline your AI workflows', + category: 'workbench', + allowNotifications: true, + dailyLimit: 40, + attachment: [], + }, +}) + +return ( +
+ + {field => } + + + {field => } + + + {field => } + + + {field => } + + + {field => } + + + {field => } + + + + + +) + `.trim(), + }, + }, + }, +} + +export const ConditionalVisibility: Story = { + render: () => , + parameters: { + docs: { + description: { + story: 'Demonstrates schema-driven visibility using `show_on` conditions rendered through the reusable `BaseForm` component.', + }, + source: { + language: 'tsx', + code: ` +const conditionalSchemas: FormSchema[] = [ + { type: FormTypeEnum.select, name: 'channel', label: 'Preferred channel', options: ContactMethods }, + { type: FormTypeEnum.textInput, name: 'contactEmail', label: 'Email', show_on: [{ variable: 'channel', value: 'email' }] }, + { type: FormTypeEnum.textInput, name: 'contactPhone', label: 'Phone', show_on: [{ variable: 'channel', value: 'phone' }] }, + { type: FormTypeEnum.boolean, name: 'optIn', label: 'Opt in to marketing messages' }, +] + +return ( + setValues(prev => ({ ...prev, [field]: value }))} + /> +) + `.trim(), + }, + }, + }, +} + +export const CustomActions: Story = { + render: () => , + parameters: { + docs: { + description: { + story: 'Shows how to replace the default submit button with a fully custom footer leveraging contextual form state.', + }, + source: { + language: 'tsx', + code: ` +const form = useAppForm({ + defaultValues: { + datasetName: 'Support FAQ', + datasetDescription: 'Knowledge base snippets sourced from Zendesk exports.', + }, + validators: { + onChange: ({ value }) => value.datasetName?.length >= 3 ? undefined : 'Dataset name must contain at least 3 characters.', + }, +}) + +return ( +
+ + {field => } + + + {field => } + + + ( +
+ + + +
+ )} + /> +
+ +) + `.trim(), + }, + }, + }, +} diff --git a/web/app/components/base/form/types.ts b/web/app/components/base/form/types.ts index c9c5b873d7..268f9db89a 100644 --- a/web/app/components/base/form/types.ts +++ b/web/app/components/base/form/types.ts @@ -43,7 +43,7 @@ export type FormOption = { icon?: string } -export type AnyValidators = FieldValidators +export type AnyValidators = FieldValidators export enum FormItemValidateStatusEnum { Success = 'success', diff --git a/web/app/components/base/fullscreen-modal/index.stories.tsx b/web/app/components/base/fullscreen-modal/index.stories.tsx new file mode 100644 index 0000000000..72fd28df66 --- /dev/null +++ b/web/app/components/base/fullscreen-modal/index.stories.tsx @@ -0,0 +1,59 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import FullScreenModal from '.' + +const meta = { + title: 'Base/Feedback/FullScreenModal', + component: FullScreenModal, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Backdrop-blurred fullscreen modal. Supports close button, custom content, and optional overflow visibility.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +const ModalDemo = (props: React.ComponentProps) => { + const [open, setOpen] = useState(false) + + return ( +
+ + + setOpen(false)} + closable + > +
+
+ Full-screen experience +
+
+ Place dashboards, flow builders, or immersive previews here. +
+
+
+
+ ) +} + +export const Playground: Story = { + render: args => , + args: { + open: false, + }, +} diff --git a/web/app/components/base/ga/index.tsx b/web/app/components/base/ga/index.tsx index 81d84a85d3..7688e0de50 100644 --- a/web/app/components/base/ga/index.tsx +++ b/web/app/components/base/ga/index.tsx @@ -1,7 +1,7 @@ import type { FC } from 'react' import React from 'react' import Script from 'next/script' -import { type UnsafeUnwrappedHeaders, headers } from 'next/headers' +import { headers } from 'next/headers' import { IS_CE_EDITION } from '@/config' export enum GaType { @@ -18,13 +18,13 @@ export type IGAProps = { gaType: GaType } -const GA: FC = ({ +const GA: FC = async ({ gaType, }) => { if (IS_CE_EDITION) return null - const nonce = process.env.NODE_ENV === 'production' ? (headers() as unknown as UnsafeUnwrappedHeaders).get('x-nonce') ?? '' : '' + const nonce = process.env.NODE_ENV === 'production' ? (await headers()).get('x-nonce') ?? '' : '' return ( <> diff --git a/web/app/components/base/grid-mask/index.stories.tsx b/web/app/components/base/grid-mask/index.stories.tsx new file mode 100644 index 0000000000..1b67a1510d --- /dev/null +++ b/web/app/components/base/grid-mask/index.stories.tsx @@ -0,0 +1,51 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import GridMask from '.' + +const meta = { + title: 'Base/Layout/GridMask', + component: GridMask, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Displays a soft grid overlay with gradient mask, useful for framing hero sections or marketing callouts.', + }, + }, + }, + args: { + wrapperClassName: 'rounded-2xl p-10', + canvasClassName: '', + gradientClassName: '', + children: ( +
+ Grid Mask Demo + Beautiful backgrounds for feature highlights +

+ Place any content inside the mask. On dark backgrounds the grid and soft gradient add depth without distracting from the main message. +

+
+ ), + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} + +export const CustomBackground: Story = { + args: { + wrapperClassName: 'rounded-3xl p-10 bg-[#0A0A1A]', + gradientClassName: 'bg-gradient-to-r from-[#0A0A1A]/90 via-[#101030]/60 to-[#05050A]/90', + children: ( +
+ Custom gradient + Use your own colors +

+ Override gradient and canvas classes to match brand palettes while keeping the grid texture. +

+
+ ), + }, +} diff --git a/web/app/components/base/icons/assets/public/tracing/tencent-icon-big.svg b/web/app/components/base/icons/assets/public/tracing/tencent-icon-big.svg new file mode 100644 index 0000000000..b38316f3b6 --- /dev/null +++ b/web/app/components/base/icons/assets/public/tracing/tencent-icon-big.svg @@ -0,0 +1,23 @@ + + + logo + + + + diff --git a/web/app/components/base/icons/assets/public/tracing/tencent-icon.svg b/web/app/components/base/icons/assets/public/tracing/tencent-icon.svg new file mode 100644 index 0000000000..53347bf23c --- /dev/null +++ b/web/app/components/base/icons/assets/public/tracing/tencent-icon.svg @@ -0,0 +1,23 @@ + + + logo + + + + \ No newline at end of file diff --git a/web/app/components/base/icons/assets/vender/line/others/apps-02.svg b/web/app/components/base/icons/assets/vender/line/others/apps-02.svg deleted file mode 100644 index 8e1fec9ecc..0000000000 --- a/web/app/components/base/icons/assets/vender/line/others/apps-02.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/web/app/components/base/icons/assets/vender/line/others/exchange-02.svg b/web/app/components/base/icons/assets/vender/line/others/exchange-02.svg deleted file mode 100644 index 45d2770277..0000000000 --- a/web/app/components/base/icons/assets/vender/line/others/exchange-02.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/web/app/components/base/icons/assets/vender/line/others/file-code.svg b/web/app/components/base/icons/assets/vender/line/others/file-code.svg deleted file mode 100644 index eb77033a0a..0000000000 --- a/web/app/components/base/icons/assets/vender/line/others/file-code.svg +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/web/app/components/base/icons/assets/vender/other/hourglass-shape.svg b/web/app/components/base/icons/assets/vender/other/hourglass-shape.svg new file mode 100644 index 0000000000..150630f460 --- /dev/null +++ b/web/app/components/base/icons/assets/vender/other/hourglass-shape.svg @@ -0,0 +1,3 @@ + + + diff --git a/web/app/components/base/icons/icon-gallery.stories.tsx b/web/app/components/base/icons/icon-gallery.stories.tsx new file mode 100644 index 0000000000..7da71b3b0b --- /dev/null +++ b/web/app/components/base/icons/icon-gallery.stories.tsx @@ -0,0 +1,258 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import React from 'react' + +declare const require: any + +type IconComponent = React.ComponentType> + +type IconEntry = { + name: string + category: string + path: string + Component: IconComponent +} + +const iconContext = require.context('./src', true, /\.tsx$/) + +const iconEntries: IconEntry[] = iconContext + .keys() + .filter((key: string) => !key.endsWith('.stories.tsx') && !key.endsWith('.spec.tsx')) + .map((key: string) => { + const mod = iconContext(key) + const Component = mod.default as IconComponent | undefined + if (!Component) + return null + + const relativePath = key.replace(/^\.\//, '') + const path = `app/components/base/icons/src/${relativePath}` + const parts = relativePath.split('/') + const fileName = parts.pop() || '' + const category = parts.length ? parts.join('/') : '(root)' + const name = Component.displayName || fileName.replace(/\.tsx$/, '') + + return { + name, + category, + path, + Component, + } + }) + .filter(Boolean) as IconEntry[] + +const sortedEntries = [...iconEntries].sort((a, b) => { + if (a.category === b.category) + return a.name.localeCompare(b.name) + return a.category.localeCompare(b.category) +}) + +const filterEntries = (entries: IconEntry[], query: string) => { + const normalized = query.trim().toLowerCase() + if (!normalized) + return entries + + return entries.filter(entry => + entry.name.toLowerCase().includes(normalized) + || entry.path.toLowerCase().includes(normalized) + || entry.category.toLowerCase().includes(normalized), + ) +} + +const groupByCategory = (entries: IconEntry[]) => entries.reduce((acc, entry) => { + if (!acc[entry.category]) + acc[entry.category] = [] + + acc[entry.category].push(entry) + return acc +}, {} as Record) + +const containerStyle: React.CSSProperties = { + padding: 24, + display: 'flex', + flexDirection: 'column', + gap: 24, +} + +const headerStyle: React.CSSProperties = { + display: 'flex', + flexDirection: 'column', + gap: 8, +} + +const controlsStyle: React.CSSProperties = { + display: 'flex', + alignItems: 'center', + gap: 12, + flexWrap: 'wrap', +} + +const searchInputStyle: React.CSSProperties = { + padding: '8px 12px', + minWidth: 280, + borderRadius: 6, + border: '1px solid #d0d0d5', +} + +const toggleButtonStyle: React.CSSProperties = { + padding: '8px 12px', + borderRadius: 6, + border: '1px solid #d0d0d5', + background: '#fff', + cursor: 'pointer', +} + +const emptyTextStyle: React.CSSProperties = { color: '#5f5f66' } + +const sectionStyle: React.CSSProperties = { + display: 'flex', + flexDirection: 'column', + gap: 12, +} + +const gridStyle: React.CSSProperties = { + display: 'grid', + gap: 12, + gridTemplateColumns: 'repeat(auto-fill, minmax(200px, 1fr))', +} + +const cardStyle: React.CSSProperties = { + border: '1px solid #e1e1e8', + borderRadius: 8, + padding: 12, + display: 'flex', + flexDirection: 'column', + gap: 8, + minHeight: 140, +} + +const previewBaseStyle: React.CSSProperties = { + display: 'flex', + justifyContent: 'center', + alignItems: 'center', + minHeight: 48, + borderRadius: 6, +} + +const nameButtonBaseStyle: React.CSSProperties = { + display: 'inline-flex', + padding: 0, + border: 'none', + background: 'transparent', + font: 'inherit', + cursor: 'pointer', + textAlign: 'left', + fontWeight: 600, +} + +const PREVIEW_SIZE = 40 + +const IconGalleryStory = () => { + const [query, setQuery] = React.useState('') + const [copiedPath, setCopiedPath] = React.useState(null) + const [previewTheme, setPreviewTheme] = React.useState<'light' | 'dark'>('light') + + const filtered = React.useMemo(() => filterEntries(sortedEntries, query), [query]) + + const grouped = React.useMemo(() => groupByCategory(filtered), [filtered]) + + const categoryOrder = React.useMemo( + () => Object.keys(grouped).sort((a, b) => a.localeCompare(b)), + [grouped], + ) + + React.useEffect(() => { + if (!copiedPath) + return undefined + + const timerId = window.setTimeout(() => { + setCopiedPath(null) + }, 1200) + + return () => window.clearTimeout(timerId) + }, [copiedPath]) + + const handleCopy = React.useCallback((text: string) => { + navigator.clipboard?.writeText(text) + .then(() => { + setCopiedPath(text) + }) + .catch((err) => { + console.error('Failed to copy icon path:', err) + }) + }, []) + + return ( +
+
+

Icon Gallery

+

+ Browse all icon components sourced from app/components/base/icons/src. Use the search bar + to filter by name or path. +

+
+ setQuery(event.target.value)} + /> + {filtered.length} icons + +
+
+ {categoryOrder.length === 0 && ( +

No icons match the current filter.

+ )} + {categoryOrder.map(category => ( +
+

{category}

+
+ {grouped[category].map(entry => ( +
+
+ +
+ +
+ ))} +
+
+ ))} +
+ ) +} + +const meta: Meta = { + title: 'Base/Icons/Icon Gallery', + component: IconGalleryStory, + parameters: { + layout: 'fullscreen', + }, +} + +export default meta + +type Story = StoryObj + +export const All: Story = { + render: () => , +} diff --git a/web/app/components/base/icons/src/vender/line/others/Apps02.json b/web/app/components/base/icons/src/vender/line/others/Apps02.json deleted file mode 100644 index 31378e175d..0000000000 --- a/web/app/components/base/icons/src/vender/line/others/Apps02.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "icon": { - "type": "element", - "isRootNode": true, - "name": "svg", - "attributes": { - "width": "16", - "height": "16", - "viewBox": "0 0 16 16", - "fill": "none", - "xmlns": "http://www.w3.org/2000/svg" - }, - "children": [ - { - "type": "element", - "name": "g", - "attributes": { - "id": "apps-2-line" - }, - "children": [ - { - "type": "element", - "name": "path", - "attributes": { - "id": "Vector", - "d": "M4.66602 7.6665C3.00916 7.6665 1.66602 6.32336 1.66602 4.6665C1.66602 3.00965 3.00916 1.6665 4.66602 1.6665C6.32287 1.6665 7.66602 3.00965 7.66602 4.6665C7.66602 6.32336 6.32287 7.6665 4.66602 7.6665ZM4.66602 14.3332C3.00916 14.3332 1.66602 12.99 1.66602 11.3332C1.66602 9.6763 3.00916 8.33317 4.66602 8.33317C6.32287 8.33317 7.66602 9.6763 7.66602 11.3332C7.66602 12.99 6.32287 14.3332 4.66602 14.3332ZM11.3327 7.6665C9.67582 7.6665 8.33268 6.32336 8.33268 4.6665C8.33268 3.00965 9.67582 1.6665 11.3327 1.6665C12.9895 1.6665 14.3327 3.00965 14.3327 4.6665C14.3327 6.32336 12.9895 7.6665 11.3327 7.6665ZM11.3327 14.3332C9.67582 14.3332 8.33268 12.99 8.33268 11.3332C8.33268 9.6763 9.67582 8.33317 11.3327 8.33317C12.9895 8.33317 14.3327 9.6763 14.3327 11.3332C14.3327 12.99 12.9895 14.3332 11.3327 14.3332ZM4.66602 6.33317C5.58649 6.33317 6.33268 5.58698 6.33268 4.6665C6.33268 3.74603 5.58649 2.99984 4.66602 2.99984C3.74554 2.99984 2.99935 3.74603 2.99935 4.6665C2.99935 5.58698 3.74554 6.33317 4.66602 6.33317ZM4.66602 12.9998C5.58649 12.9998 6.33268 12.2536 6.33268 11.3332C6.33268 10.4127 5.58649 9.6665 4.66602 9.6665C3.74554 9.6665 2.99935 10.4127 2.99935 11.3332C2.99935 12.2536 3.74554 12.9998 4.66602 12.9998ZM11.3327 6.33317C12.2531 6.33317 12.9993 5.58698 12.9993 4.6665C12.9993 3.74603 12.2531 2.99984 11.3327 2.99984C10.4122 2.99984 9.66602 3.74603 9.66602 4.6665C9.66602 5.58698 10.4122 6.33317 11.3327 6.33317ZM11.3327 12.9998C12.2531 12.9998 12.9993 12.2536 12.9993 11.3332C12.9993 10.4127 12.2531 9.6665 11.3327 9.6665C10.4122 9.6665 9.66602 10.4127 9.66602 11.3332C9.66602 12.2536 10.4122 12.9998 11.3327 12.9998Z", - "fill": "currentColor" - }, - "children": [] - } - ] - } - ] - }, - "name": "Apps02" -} diff --git a/web/app/components/base/icons/src/vender/line/others/Apps02.tsx b/web/app/components/base/icons/src/vender/line/others/Apps02.tsx deleted file mode 100644 index 3236059d8d..0000000000 --- a/web/app/components/base/icons/src/vender/line/others/Apps02.tsx +++ /dev/null @@ -1,20 +0,0 @@ -// GENERATE BY script -// DON NOT EDIT IT MANUALLY - -import * as React from 'react' -import data from './Apps02.json' -import IconBase from '@/app/components/base/icons/IconBase' -import type { IconData } from '@/app/components/base/icons/IconBase' - -const Icon = ( - { - ref, - ...props - }: React.SVGProps & { - ref?: React.RefObject>; - }, -) => - -Icon.displayName = 'Apps02' - -export default Icon diff --git a/web/app/components/base/icons/src/vender/line/others/Exchange02.json b/web/app/components/base/icons/src/vender/line/others/Exchange02.json deleted file mode 100644 index 3672d8b88b..0000000000 --- a/web/app/components/base/icons/src/vender/line/others/Exchange02.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "icon": { - "type": "element", - "isRootNode": true, - "name": "svg", - "attributes": { - "width": "16", - "height": "16", - "viewBox": "0 0 16 16", - "fill": "none", - "xmlns": "http://www.w3.org/2000/svg" - }, - "children": [ - { - "type": "element", - "name": "path", - "attributes": { - "d": "M4.66602 14.3334C3.00916 14.3334 1.66602 12.9903 1.66602 11.3334C1.66602 9.67655 3.00916 8.33342 4.66602 8.33342C6.32287 8.33342 7.66602 9.67655 7.66602 11.3334C7.66602 12.9903 6.32287 14.3334 4.66602 14.3334ZM11.3327 7.66675C9.67582 7.66675 8.33268 6.3236 8.33268 4.66675C8.33268 3.00989 9.67582 1.66675 11.3327 1.66675C12.9895 1.66675 14.3327 3.00989 14.3327 4.66675C14.3327 6.3236 12.9895 7.66675 11.3327 7.66675ZM4.66602 13.0001C5.58649 13.0001 6.33268 12.2539 6.33268 11.3334C6.33268 10.4129 5.58649 9.66675 4.66602 9.66675C3.74554 9.66675 2.99935 10.4129 2.99935 11.3334C2.99935 12.2539 3.74554 13.0001 4.66602 13.0001ZM11.3327 6.33342C12.2531 6.33342 12.9993 5.58722 12.9993 4.66675C12.9993 3.74627 12.2531 3.00008 11.3327 3.00008C10.4122 3.00008 9.66602 3.74627 9.66602 4.66675C9.66602 5.58722 10.4122 6.33342 11.3327 6.33342ZM1.99935 5.33341C1.99935 3.49247 3.49174 2.00008 5.33268 2.00008H7.33268V3.33341H5.33268C4.22812 3.33341 3.33268 4.22885 3.33268 5.33341V7.33342H1.99935V5.33341ZM13.9993 8.66675H12.666V10.6667C12.666 11.7713 11.7706 12.6667 10.666 12.6667H8.66602V14.0001H10.666C12.5069 14.0001 13.9993 12.5077 13.9993 10.6667V8.66675Z", - "fill": "currentColor" - }, - "children": [] - } - ] - }, - "name": "Exchange02" -} diff --git a/web/app/components/base/icons/src/vender/line/others/Exchange02.tsx b/web/app/components/base/icons/src/vender/line/others/Exchange02.tsx deleted file mode 100644 index 4f58de3619..0000000000 --- a/web/app/components/base/icons/src/vender/line/others/Exchange02.tsx +++ /dev/null @@ -1,20 +0,0 @@ -// GENERATE BY script -// DON NOT EDIT IT MANUALLY - -import * as React from 'react' -import data from './Exchange02.json' -import IconBase from '@/app/components/base/icons/IconBase' -import type { IconData } from '@/app/components/base/icons/IconBase' - -const Icon = ( - { - ref, - ...props - }: React.SVGProps & { - ref?: React.RefObject>; - }, -) => - -Icon.displayName = 'Exchange02' - -export default Icon diff --git a/web/app/components/base/icons/src/vender/line/others/FileCode.json b/web/app/components/base/icons/src/vender/line/others/FileCode.json deleted file mode 100644 index d61af3fdb3..0000000000 --- a/web/app/components/base/icons/src/vender/line/others/FileCode.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "icon": { - "type": "element", - "isRootNode": true, - "name": "svg", - "attributes": { - "width": "16", - "height": "16", - "viewBox": "0 0 16 16", - "fill": "none", - "xmlns": "http://www.w3.org/2000/svg" - }, - "children": [ - { - "type": "element", - "name": "path", - "attributes": { - "d": "M10 2.66659H3.33333V13.3333H12.6667V5.33325H10V2.66659ZM2 1.99445C2 1.62929 2.29833 1.33325 2.66567 1.33325H10.6667L13.9998 4.66658L14 13.9949C14 14.3659 13.7034 14.6666 13.3377 14.6666H2.66227C2.29651 14.6666 2 14.3631 2 14.0054V1.99445ZM11.7713 7.99992L9.4142 10.3569L8.4714 9.41412L9.8856 7.99992L8.4714 6.58571L9.4142 5.6429L11.7713 7.99992ZM4.22877 7.99992L6.58579 5.6429L7.5286 6.58571L6.11438 7.99992L7.5286 9.41412L6.58579 10.3569L4.22877 7.99992Z", - "fill": "currentColor" - }, - "children": [] - } - ] - }, - "name": "FileCode" -} diff --git a/web/app/components/base/icons/src/vender/line/others/index.ts b/web/app/components/base/icons/src/vender/line/others/index.ts index 2322e9d9f1..99db66b397 100644 --- a/web/app/components/base/icons/src/vender/line/others/index.ts +++ b/web/app/components/base/icons/src/vender/line/others/index.ts @@ -1,10 +1,7 @@ -export { default as Apps02 } from './Apps02' export { default as BubbleX } from './BubbleX' export { default as Colors } from './Colors' export { default as DragHandle } from './DragHandle' export { default as Env } from './Env' -export { default as Exchange02 } from './Exchange02' -export { default as FileCode } from './FileCode' export { default as GlobalVariable } from './GlobalVariable' export { default as Icon3Dots } from './Icon3Dots' export { default as LongArrowLeft } from './LongArrowLeft' diff --git a/web/app/components/base/icons/src/vender/other/HourglassShape.json b/web/app/components/base/icons/src/vender/other/HourglassShape.json new file mode 100644 index 0000000000..27f31bbe28 --- /dev/null +++ b/web/app/components/base/icons/src/vender/other/HourglassShape.json @@ -0,0 +1,27 @@ +{ + "icon": { + "type": "element", + "isRootNode": true, + "name": "svg", + "attributes": { + "width": "8", + "height": "14", + "viewBox": "0 0 8 14", + "fill": "none", + "xmlns": "http://www.w3.org/2000/svg" + }, + "children": [ + { + "type": "element", + "name": "path", + "attributes": { + "d": "M8 14C8 11.7909 6.20914 10 4 10C1.79086 10 0 11.7909 0 14V0C8.05332e-08 2.20914 1.79086 4 4 4C6.20914 4 8 2.20914 8 0V14Z", + "fill": "currentColor", + "fill-opacity": "1" + }, + "children": [] + } + ] + }, + "name": "HourglassShape" +} diff --git a/web/app/components/base/icons/src/vender/line/others/FileCode.tsx b/web/app/components/base/icons/src/vender/other/HourglassShape.tsx similarity index 85% rename from web/app/components/base/icons/src/vender/line/others/FileCode.tsx rename to web/app/components/base/icons/src/vender/other/HourglassShape.tsx index 3660aad794..a1ef8c8d5f 100644 --- a/web/app/components/base/icons/src/vender/line/others/FileCode.tsx +++ b/web/app/components/base/icons/src/vender/other/HourglassShape.tsx @@ -2,7 +2,7 @@ // DON NOT EDIT IT MANUALLY import * as React from 'react' -import data from './FileCode.json' +import data from './HourglassShape.json' import IconBase from '@/app/components/base/icons/IconBase' import type { IconData } from '@/app/components/base/icons/IconBase' @@ -15,6 +15,6 @@ const Icon = ( }, ) => -Icon.displayName = 'FileCode' +Icon.displayName = 'HourglassShape' export default Icon diff --git a/web/app/components/base/icons/src/vender/other/index.ts b/web/app/components/base/icons/src/vender/other/index.ts index 8a7bb7ae28..89cbe9033d 100644 --- a/web/app/components/base/icons/src/vender/other/index.ts +++ b/web/app/components/base/icons/src/vender/other/index.ts @@ -1,6 +1,7 @@ export { default as AnthropicText } from './AnthropicText' export { default as Generator } from './Generator' export { default as Group } from './Group' +export { default as HourglassShape } from './HourglassShape' export { default as Mcp } from './Mcp' export { default as NoToolPlaceholder } from './NoToolPlaceholder' export { default as Openai } from './Openai' diff --git a/web/app/components/base/image-gallery/index.stories.tsx b/web/app/components/base/image-gallery/index.stories.tsx new file mode 100644 index 0000000000..c1b463170c --- /dev/null +++ b/web/app/components/base/image-gallery/index.stories.tsx @@ -0,0 +1,39 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import ImageGallery from '.' + +const IMAGE_SOURCES = [ + 'data:image/svg+xml;utf8,Dataset', + 'data:image/svg+xml;utf8,Playground', + 'data:image/svg+xml;utf8,Workflow', + 'data:image/svg+xml;utf8,Prompts', +] + +const meta = { + title: 'Base/Data Display/ImageGallery', + component: ImageGallery, + parameters: { + docs: { + description: { + component: 'Responsive thumbnail grid with lightbox preview for larger imagery.', + }, + source: { + language: 'tsx', + code: ` +', + 'data:image/svg+xml;utf8,', +]} /> + `.trim(), + }, + }, + }, + tags: ['autodocs'], + args: { + srcs: IMAGE_SOURCES, + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Default: Story = {} diff --git a/web/app/components/base/image-uploader/hooks.ts b/web/app/components/base/image-uploader/hooks.ts index 41074000a2..524e86cc1b 100644 --- a/web/app/components/base/image-uploader/hooks.ts +++ b/web/app/components/base/image-uploader/hooks.ts @@ -2,7 +2,7 @@ import { useCallback, useMemo, useRef, useState } from 'react' import type { ClipboardEvent } from 'react' import { useParams } from 'next/navigation' import { useTranslation } from 'react-i18next' -import { imageUpload } from './utils' +import { getImageUploadErrorMessage, imageUpload } from './utils' import { useToastContext } from '@/app/components/base/toast' import { ALLOW_FILE_EXTENSIONS, TransferMethod } from '@/types/app' import type { ImageFile, VisionSettings } from '@/types/app' @@ -81,8 +81,9 @@ export const useImageFiles = () => { filesRef.current = newFiles setFiles(newFiles) }, - onErrorCallback: () => { - notify({ type: 'error', message: t('common.imageUploader.uploadFromComputerUploadError') }) + onErrorCallback: (error?: any) => { + const errorMessage = getImageUploadErrorMessage(error, t('common.imageUploader.uploadFromComputerUploadError'), t) + notify({ type: 'error', message: errorMessage }) const newFiles = [...files.slice(0, index), { ...currentImageFile, progress: -1 }, ...files.slice(index + 1)] filesRef.current = newFiles setFiles(newFiles) @@ -158,8 +159,9 @@ export const useLocalFileUploader = ({ limit, disabled = false, onUpload }: useL onSuccessCallback: (res) => { onUpload({ ...imageFile, fileId: res.id, progress: 100 }) }, - onErrorCallback: () => { - notify({ type: 'error', message: t('common.imageUploader.uploadFromComputerUploadError') }) + onErrorCallback: (error?: any) => { + const errorMessage = getImageUploadErrorMessage(error, t('common.imageUploader.uploadFromComputerUploadError'), t) + notify({ type: 'error', message: errorMessage }) onUpload({ ...imageFile, progress: -1 }) }, }, !!params.token) diff --git a/web/app/components/base/image-uploader/image-list.stories.tsx b/web/app/components/base/image-uploader/image-list.stories.tsx new file mode 100644 index 0000000000..530ef69556 --- /dev/null +++ b/web/app/components/base/image-uploader/image-list.stories.tsx @@ -0,0 +1,182 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useMemo, useState } from 'react' +import ImageList from './image-list' +import ImageLinkInput from './image-link-input' +import type { ImageFile } from '@/types/app' +import { TransferMethod } from '@/types/app' + +const SAMPLE_BASE64 + = '' + +const createRemoteImage = ( + id: string, + progress: number, + url: string, +): ImageFile => ({ + type: TransferMethod.remote_url, + _id: id, + fileId: `remote-${id}`, + progress, + url, +}) + +const createLocalImage = (id: string, progress: number): ImageFile => ({ + type: TransferMethod.local_file, + _id: id, + fileId: `local-${id}`, + progress, + url: SAMPLE_BASE64, + base64Url: SAMPLE_BASE64, +}) + +const initialImages: ImageFile[] = [ + createLocalImage('local-initial', 100), + createRemoteImage( + 'remote-loading', + 40, + 'https://images.unsplash.com/photo-1500530855697-b586d89ba3ee?auto=format&fit=crop&w=300&q=80', + ), + { + ...createRemoteImage( + 'remote-error', + -1, + 'https://example.com/not-an-image.jpg', + ), + url: 'https://example.com/not-an-image.jpg', + }, +] + +const meta = { + title: 'Base/Data Entry/ImageList', + component: ImageList, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Renders thumbnails for uploaded images and manages their states like uploading, error, and deletion.', + }, + }, + }, + argTypes: { + list: { control: false }, + onRemove: { control: false }, + onReUpload: { control: false }, + onImageLinkLoadError: { control: false }, + onImageLinkLoadSuccess: { control: false }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +const ImageUploaderPlayground = ({ readonly }: Story['args']) => { + const [images, setImages] = useState(() => initialImages) + + const activeImages = useMemo(() => images.filter(item => !item.deleted), [images]) + + const handleRemove = (id: string) => { + setImages(prev => prev.map(item => (item._id === id ? { ...item, deleted: true } : item))) + } + + const handleReUpload = (id: string) => { + setImages(prev => prev.map((item) => { + if (item._id !== id) + return item + + return { + ...item, + progress: 60, + } + })) + + setTimeout(() => { + setImages(prev => prev.map((item) => { + if (item._id !== id) + return item + + return { + ...item, + progress: 100, + } + })) + }, 1200) + } + + const handleImageLinkLoadSuccess = (id: string) => { + setImages(prev => prev.map(item => (item._id === id ? { ...item, progress: 100 } : item))) + } + + const handleImageLinkLoadError = (id: string) => { + setImages(prev => prev.map(item => (item._id === id ? { ...item, progress: -1 } : item))) + } + + const handleUploadFromLink = (imageFile: ImageFile) => { + setImages(prev => [ + ...prev, + { + ...imageFile, + fileId: `remote-${imageFile._id}`, + }, + ]) + } + + const handleAddLocalImage = () => { + const id = `local-${Date.now()}` + setImages(prev => [ + ...prev, + createLocalImage(id, 100), + ]) + } + + return ( +
+
+ Add images +
+ + +
+
+ + + +
+ + Files state + +
+          {JSON.stringify(activeImages, null, 2)}
+        
+
+
+ ) +} + +export const Playground: Story = { + render: args => , + args: { + list: [], + }, +} + +export const ReadonlyList: Story = { + render: args => , + args: { + list: [], + }, +} diff --git a/web/app/components/base/image-uploader/utils.ts b/web/app/components/base/image-uploader/utils.ts index 0c1ada747d..3579d0541e 100644 --- a/web/app/components/base/image-uploader/utils.ts +++ b/web/app/components/base/image-uploader/utils.ts @@ -1,10 +1,29 @@ import { upload } from '@/service/base' +/** + * Get appropriate error message for image upload errors + * @param error - The error object from upload failure + * @param defaultMessage - Default error message to use if no specific error is matched + * @param t - Translation function + * @returns Localized error message + */ +export const getImageUploadErrorMessage = (error: any, defaultMessage: string, t: (key: string) => string): string => { + const errorCode = error?.response?.code + + if (errorCode === 'forbidden') + return error?.response?.message + + if (errorCode === 'file_extension_blocked') + return t('common.fileUploader.fileExtensionBlocked') + + return defaultMessage +} + type ImageUploadParams = { file: File onProgressCallback: (progress: number) => void onSuccessCallback: (res: { id: string }) => void - onErrorCallback: () => void + onErrorCallback: (error?: any) => void } type ImageUpload = (v: ImageUploadParams, isPublic?: boolean, url?: string) => void export const imageUpload: ImageUpload = ({ @@ -30,7 +49,7 @@ export const imageUpload: ImageUpload = ({ .then((res: { id: string }) => { onSuccessCallback(res) }) - .catch(() => { - onErrorCallback() + .catch((error) => { + onErrorCallback(error) }) } diff --git a/web/app/components/base/inline-delete-confirm/index.stories.tsx b/web/app/components/base/inline-delete-confirm/index.stories.tsx new file mode 100644 index 0000000000..e0b0757718 --- /dev/null +++ b/web/app/components/base/inline-delete-confirm/index.stories.tsx @@ -0,0 +1,87 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { fn } from 'storybook/test' +import { useState } from 'react' +import InlineDeleteConfirm from '.' + +const meta = { + title: 'Base/Feedback/InlineDeleteConfirm', + component: InlineDeleteConfirm, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Compact confirmation prompt that appears inline, commonly used near delete buttons or destructive controls.', + }, + }, + }, + argTypes: { + variant: { + control: 'select', + options: ['delete', 'warning', 'info'], + }, + }, + args: { + title: 'Delete this item?', + confirmText: 'Delete', + cancelText: 'Cancel', + onConfirm: fn(), + onCancel: fn(), + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +const InlineDeleteConfirmDemo = (args: Story['args']) => { + const [visible, setVisible] = useState(true) + + return ( +
+ + {visible && ( + { + console.log('✅ Confirm clicked') + setVisible(false) + }} + onCancel={() => { + console.log('❎ Cancel clicked') + setVisible(false) + }} + /> + )} +
+ ) +} + +export const Playground: Story = { + render: args => , +} + +export const WarningVariant: Story = { + render: args => , + args: { + variant: 'warning', + title: 'Archive conversation?', + confirmText: 'Archive', + cancelText: 'Keep', + }, +} + +export const InfoVariant: Story = { + render: args => , + args: { + variant: 'info', + title: 'Remove collaborator?', + confirmText: 'Remove', + cancelText: 'Keep', + }, +} diff --git a/web/app/components/base/input-number/index.stories.tsx b/web/app/components/base/input-number/index.stories.tsx index aa075b0ff1..88999af9e0 100644 --- a/web/app/components/base/input-number/index.stories.tsx +++ b/web/app/components/base/input-number/index.stories.tsx @@ -3,7 +3,7 @@ import { useState } from 'react' import { InputNumber } from '.' const meta = { - title: 'Base/Input/InputNumber', + title: 'Base/Data Entry/InputNumber', component: InputNumber, parameters: { layout: 'centered', diff --git a/web/app/components/base/input-with-copy/index.tsx b/web/app/components/base/input-with-copy/index.tsx index 0d10714b86..87b7de5005 100644 --- a/web/app/components/base/input-with-copy/index.tsx +++ b/web/app/components/base/input-with-copy/index.tsx @@ -67,7 +67,7 @@ const InputWithCopy = React.forwardRef(( inputProps.className, )} value={value} - {...(({ size, ...rest }) => rest)(inputProps)} + {...(({ size: _size, ...rest }) => rest)(inputProps)} /> {showCopyButton && (
} & Omit, 'size'> & VariantProps const removeLeadingZeros = (value: string) => value.replace(/^(-?)0+(?=\d)/, '$1') -const Input = ({ +const Input = React.forwardRef(({ size, disabled, destructive, @@ -53,9 +52,8 @@ const Input = ({ onChange = noop, onBlur = noop, unit, - ref, ...props -}: InputProps) => { +}, ref) => { const { t } = useTranslation() const handleNumberChange: ChangeEventHandler = (e) => { if (value === 0) { @@ -135,7 +133,7 @@ const Input = ({ }
) -} +}) Input.displayName = 'Input' diff --git a/web/app/components/base/linked-apps-panel/index.stories.tsx b/web/app/components/base/linked-apps-panel/index.stories.tsx new file mode 100644 index 0000000000..da8abb0677 --- /dev/null +++ b/web/app/components/base/linked-apps-panel/index.stories.tsx @@ -0,0 +1,73 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import LinkedAppsPanel from '.' +import type { RelatedApp } from '@/models/datasets' +import { AppModeEnum } from '@/types/app' + +const mockRelatedApps: RelatedApp[] = [ + { + id: 'app-cx', + name: 'Customer Support Assistant', + mode: AppModeEnum.CHAT, + icon_type: 'emoji', + icon: '\u{1F4AC}', + icon_background: '#EEF2FF', + icon_url: '', + }, + { + id: 'app-ops', + name: 'Ops Workflow Orchestrator', + mode: AppModeEnum.WORKFLOW, + icon_type: 'emoji', + icon: '\u{1F6E0}\u{FE0F}', + icon_background: '#ECFDF3', + icon_url: '', + }, + { + id: 'app-research', + name: 'Research Synthesizer', + mode: AppModeEnum.ADVANCED_CHAT, + icon_type: 'emoji', + icon: '\u{1F9E0}', + icon_background: '#FDF2FA', + icon_url: '', + }, +] + +const meta = { + title: 'Base/Feedback/LinkedAppsPanel', + component: LinkedAppsPanel, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Shows a curated list of related applications, pairing each app icon with quick navigation links.', + }, + }, + }, + args: { + relatedApps: mockRelatedApps, + isMobile: false, + }, + argTypes: { + isMobile: { + control: 'boolean', + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Desktop: Story = {} + +export const Mobile: Story = { + args: { + isMobile: true, + }, + parameters: { + viewport: { + defaultViewport: 'mobile2', + }, + }, +} diff --git a/web/app/components/base/list-empty/index.stories.tsx b/web/app/components/base/list-empty/index.stories.tsx new file mode 100644 index 0000000000..36c0e3c7a7 --- /dev/null +++ b/web/app/components/base/list-empty/index.stories.tsx @@ -0,0 +1,49 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import ListEmpty from '.' + +const meta = { + title: 'Base/Data Display/ListEmpty', + component: ListEmpty, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Large empty state card used in panels and drawers to hint at the next action for the user.', + }, + }, + }, + args: { + title: 'No items yet', + description: ( +

+ Add your first entry to see it appear here. Empty states help users discover what happens next. +

+ ), + }, + argTypes: { + description: { control: false }, + icon: { control: false }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Default: Story = {} + +export const WithCustomIcon: Story = { + args: { + title: 'Connect a data source', + description: ( +

+ Choose a database, knowledge base, or upload documents to get started with retrieval. +

+ ), + icon: ( +
+ {'\u{26A1}\u{FE0F}'} +
+ ), + }, +} diff --git a/web/app/components/base/loading/index.stories.tsx b/web/app/components/base/loading/index.stories.tsx new file mode 100644 index 0000000000..f22f87516c --- /dev/null +++ b/web/app/components/base/loading/index.stories.tsx @@ -0,0 +1,52 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import Loading from '.' + +const meta = { + title: 'Base/Feedback/Loading', + component: Loading, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Spinner used while fetching data (`area`) or bootstrapping the full application shell (`app`).', + }, + }, + }, + argTypes: { + type: { + control: 'radio', + options: ['area', 'app'], + }, + }, + args: { + type: 'area', + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +const LoadingPreview = ({ type }: { type: 'area' | 'app' }) => { + const containerHeight = type === 'app' ? 'h-48' : 'h-20' + const title = type === 'app' ? 'App loading state' : 'Inline loading state' + + return ( +
+ {title} +
+ +
+
+ ) +} + +export const AreaSpinner: Story = { + render: () => , +} + +export const AppSpinner: Story = { + render: () => , +} diff --git a/web/app/components/base/logo/index.stories.tsx b/web/app/components/base/logo/index.stories.tsx new file mode 100644 index 0000000000..01464b8c13 --- /dev/null +++ b/web/app/components/base/logo/index.stories.tsx @@ -0,0 +1,82 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { ThemeProvider } from 'next-themes' +import type { ReactNode } from 'react' +import DifyLogo from './dify-logo' +import LogoSite from './logo-site' +import LogoEmbeddedChatHeader from './logo-embedded-chat-header' +import LogoEmbeddedChatAvatar from './logo-embedded-chat-avatar' + +const meta = { + title: 'Base/General/Logo', + component: DifyLogo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Brand assets rendered in different contexts. DifyLogo adapts to the active theme while other variants target specific surfaces.', + }, + }, + }, + args: { + size: 'medium', + style: 'default', + }, + argTypes: { + size: { + control: 'radio', + options: ['large', 'medium', 'small'], + }, + style: { + control: 'radio', + options: ['default', 'monochromeWhite'], + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +const ThemePreview = ({ theme, children }: { theme: 'light' | 'dark'; children: ReactNode }) => { + return ( + +
+ {children} +
+
+ ) +} + +export const Playground: Story = { + render: ({ size, style }) => { + return ( + +
+
+ Primary logo +
+ + {`size="${size}" | style="${style}"`} +
+
+
+
+ Site favicon + +
+
+ Embedded header + +
+
+ Embedded avatar + +
+
+
+
+ ) + }, +} diff --git a/web/app/components/base/markdown-blocks/code-block.stories.tsx b/web/app/components/base/markdown-blocks/code-block.stories.tsx new file mode 100644 index 0000000000..98473bdf57 --- /dev/null +++ b/web/app/components/base/markdown-blocks/code-block.stories.tsx @@ -0,0 +1,70 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import CodeBlock from './code-block' + +const SAMPLE_CODE = `const greet = (name: string) => { + return \`Hello, \${name}\` +} + +console.log(greet('Dify'))` + +const CodeBlockDemo = ({ + language = 'typescript', +}: { + language?: string +}) => { + return ( +
+
Code block
+ + {SAMPLE_CODE} + +
+ ) +} + +const meta = { + title: 'Base/Data Display/CodeBlock', + component: CodeBlockDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Syntax highlighted code block with copy button and SVG toggle support.', + }, + }, + }, + argTypes: { + language: { + control: 'radio', + options: ['typescript', 'json', 'mermaid'], + }, + }, + args: { + language: 'typescript', + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} + +export const Mermaid: Story = { + args: { + language: 'mermaid', + }, + render: ({ language }) => ( +
+ + {`graph TD + Start --> Decision{User message?} + Decision -->|Tool| ToolCall[Call web search] + Decision -->|Respond| Answer[Compose draft] +`} + +
+ ), +} diff --git a/web/app/components/base/markdown-blocks/img.tsx b/web/app/components/base/markdown-blocks/img.tsx index fe20bad6b1..33fce13f0b 100644 --- a/web/app/components/base/markdown-blocks/img.tsx +++ b/web/app/components/base/markdown-blocks/img.tsx @@ -3,48 +3,11 @@ * Extracted from the main markdown renderer for modularity. * Uses the ImageGallery component to display images. */ -import React, { useEffect, useMemo, useState } from 'react' +import React from 'react' import ImageGallery from '@/app/components/base/image-gallery' -import { getMarkdownImageURL } from './utils' -import { usePluginReadmeAsset } from '@/service/use-plugins' -import type { SimplePluginInfo } from '../markdown/react-markdown-wrapper' -type ImgProps = { - src: string - pluginInfo?: SimplePluginInfo -} - -const Img: React.FC = ({ src, pluginInfo }) => { - const { plugin_unique_identifier, plugin_id } = pluginInfo || {} - const { data: assetData } = usePluginReadmeAsset({ plugin_unique_identifier, file_name: src }) - const [blobUrl, setBlobUrl] = useState() - - useEffect(() => { - if (!assetData) { - setBlobUrl(undefined) - return - } - - const objectUrl = URL.createObjectURL(assetData) - setBlobUrl(objectUrl) - - return () => { - URL.revokeObjectURL(objectUrl) - } - }, [assetData]) - - const imageUrl = useMemo(() => { - if (blobUrl) - return blobUrl - - return getMarkdownImageURL(src, plugin_id) - }, [blobUrl, plugin_id, src]) - - return ( -
- -
- ) +const Img = ({ src }: any) => { + return
} export default Img diff --git a/web/app/components/base/markdown-blocks/index.ts b/web/app/components/base/markdown-blocks/index.ts index ba68b4e8b1..ab6be2e9e7 100644 --- a/web/app/components/base/markdown-blocks/index.ts +++ b/web/app/components/base/markdown-blocks/index.ts @@ -5,9 +5,11 @@ export { default as AudioBlock } from './audio-block' export { default as CodeBlock } from './code-block' +export * from './plugin-img' +export * from './plugin-paragraph' export { default as Img } from './img' -export { default as Link } from './link' export { default as Paragraph } from './paragraph' +export { default as Link } from './link' export { default as PreCode } from './pre-code' export { default as ScriptBlock } from './script-block' export { default as VideoBlock } from './video-block' diff --git a/web/app/components/base/markdown-blocks/paragraph.tsx b/web/app/components/base/markdown-blocks/paragraph.tsx index cb654118fd..fb1612477a 100644 --- a/web/app/components/base/markdown-blocks/paragraph.tsx +++ b/web/app/components/base/markdown-blocks/paragraph.tsx @@ -3,69 +3,25 @@ * Extracted from the main markdown renderer for modularity. * Handles special rendering for paragraphs that directly contain an image. */ -import React, { useEffect, useMemo, useState } from 'react' +import React from 'react' import ImageGallery from '@/app/components/base/image-gallery' -import { getMarkdownImageURL } from './utils' -import { usePluginReadmeAsset } from '@/service/use-plugins' -import type { SimplePluginInfo } from '../markdown/react-markdown-wrapper' - -type ParagraphProps = { - pluginInfo?: SimplePluginInfo - node?: any - children?: React.ReactNode -} - -const Paragraph: React.FC = ({ pluginInfo, node, children }) => { - const { plugin_unique_identifier, plugin_id } = pluginInfo || {} - const childrenNode = node?.children as Array | undefined - const firstChild = childrenNode?.[0] - const isImageParagraph = firstChild?.tagName === 'img' - const imageSrc = isImageParagraph ? firstChild?.properties?.src : undefined - - const { data: assetData } = usePluginReadmeAsset({ - plugin_unique_identifier, - file_name: isImageParagraph && imageSrc ? imageSrc : '', - }) - - const [blobUrl, setBlobUrl] = useState() - - useEffect(() => { - if (!assetData) { - setBlobUrl(undefined) - return - } - - const objectUrl = URL.createObjectURL(assetData) - setBlobUrl(objectUrl) - - return () => { - URL.revokeObjectURL(objectUrl) - } - }, [assetData]) - - const imageUrl = useMemo(() => { - if (blobUrl) - return blobUrl - - if (isImageParagraph && imageSrc) - return getMarkdownImageURL(imageSrc, plugin_id) - - return '' - }, [blobUrl, imageSrc, isImageParagraph, plugin_id]) - - if (isImageParagraph) { - const remainingChildren = Array.isArray(children) && children.length > 1 ? children.slice(1) : undefined +const Paragraph = (paragraph: any) => { + const { node }: any = paragraph + const children_node = node.children + if (children_node && children_node[0] && 'tagName' in children_node[0] && children_node[0].tagName === 'img') { return (
- - {remainingChildren && ( -
{remainingChildren}
- )} + + { + Array.isArray(paragraph.children) && paragraph.children.length > 1 && ( +
{paragraph.children.slice(1)}
+ ) + }
) } - return

{children}

+ return

{paragraph.children}

} export default Paragraph diff --git a/web/app/components/base/markdown-blocks/plugin-img.tsx b/web/app/components/base/markdown-blocks/plugin-img.tsx new file mode 100644 index 0000000000..ed1ee8fa0b --- /dev/null +++ b/web/app/components/base/markdown-blocks/plugin-img.tsx @@ -0,0 +1,48 @@ +/** + * @fileoverview Img component for rendering tags in Markdown. + * Extracted from the main markdown renderer for modularity. + * Uses the ImageGallery component to display images. + */ +import React, { useEffect, useMemo, useState } from 'react' +import ImageGallery from '@/app/components/base/image-gallery' +import { getMarkdownImageURL } from './utils' +import { usePluginReadmeAsset } from '@/service/use-plugins' +import type { SimplePluginInfo } from '../markdown/react-markdown-wrapper' + +type ImgProps = { + src: string + pluginInfo?: SimplePluginInfo +} + +export const PluginImg: React.FC = ({ src, pluginInfo }) => { + const { pluginUniqueIdentifier, pluginId } = pluginInfo || {} + const { data: assetData } = usePluginReadmeAsset({ plugin_unique_identifier: pluginUniqueIdentifier, file_name: src }) + const [blobUrl, setBlobUrl] = useState() + + useEffect(() => { + if (!assetData) { + setBlobUrl(undefined) + return + } + + const objectUrl = URL.createObjectURL(assetData) + setBlobUrl(objectUrl) + + return () => { + URL.revokeObjectURL(objectUrl) + } + }, [assetData]) + + const imageUrl = useMemo(() => { + if (blobUrl) + return blobUrl + + return getMarkdownImageURL(src, pluginId) + }, [blobUrl, pluginId, src]) + + return ( +
+ +
+ ) +} diff --git a/web/app/components/base/markdown-blocks/plugin-paragraph.tsx b/web/app/components/base/markdown-blocks/plugin-paragraph.tsx new file mode 100644 index 0000000000..ae1e2d7101 --- /dev/null +++ b/web/app/components/base/markdown-blocks/plugin-paragraph.tsx @@ -0,0 +1,69 @@ +/** + * @fileoverview Paragraph component for rendering

tags in Markdown. + * Extracted from the main markdown renderer for modularity. + * Handles special rendering for paragraphs that directly contain an image. + */ +import ImageGallery from '@/app/components/base/image-gallery' +import { usePluginReadmeAsset } from '@/service/use-plugins' +import React, { useEffect, useMemo, useState } from 'react' +import type { SimplePluginInfo } from '../markdown/react-markdown-wrapper' +import { getMarkdownImageURL } from './utils' + +type PluginParagraphProps = { + pluginInfo?: SimplePluginInfo + node?: any + children?: React.ReactNode +} + +export const PluginParagraph: React.FC = ({ pluginInfo, node, children }) => { + const { pluginUniqueIdentifier, pluginId } = pluginInfo || {} + const childrenNode = node?.children as Array | undefined + const firstChild = childrenNode?.[0] + const isImageParagraph = firstChild?.tagName === 'img' + const imageSrc = isImageParagraph ? firstChild?.properties?.src : undefined + + const { data: assetData } = usePluginReadmeAsset({ + plugin_unique_identifier: pluginUniqueIdentifier, + file_name: isImageParagraph && imageSrc ? imageSrc : '', + }) + + const [blobUrl, setBlobUrl] = useState() + + useEffect(() => { + if (!assetData) { + setBlobUrl(undefined) + return + } + + const objectUrl = URL.createObjectURL(assetData) + setBlobUrl(objectUrl) + + return () => { + URL.revokeObjectURL(objectUrl) + } + }, [assetData]) + + const imageUrl = useMemo(() => { + if (blobUrl) + return blobUrl + + if (isImageParagraph && imageSrc) + return getMarkdownImageURL(imageSrc, pluginId) + + return '' + }, [blobUrl, imageSrc, isImageParagraph, pluginId]) + + if (isImageParagraph) { + const remainingChildren = Array.isArray(children) && children.length > 1 ? children.slice(1) : undefined + + return ( +

+ + {remainingChildren && ( +
{remainingChildren}
+ )} +
+ ) + } + return

{children}

+} diff --git a/web/app/components/base/markdown-blocks/think-block.stories.tsx b/web/app/components/base/markdown-blocks/think-block.stories.tsx new file mode 100644 index 0000000000..571959259a --- /dev/null +++ b/web/app/components/base/markdown-blocks/think-block.stories.tsx @@ -0,0 +1,78 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import ThinkBlock from './think-block' +import { ChatContextProvider } from '@/app/components/base/chat/chat/context' + +const THOUGHT_TEXT = ` +Gather docs from knowledge base. +Score snippets against query. +[ENDTHINKFLAG] +` + +const ThinkBlockDemo = ({ + responding = false, +}: { + responding?: boolean +}) => { + const [isResponding, setIsResponding] = useState(responding) + + return ( + +
+
+ Think block + +
+ +
+            {THOUGHT_TEXT}
+          
+
+
+
+ ) +} + +const meta = { + title: 'Base/Data Display/ThinkBlock', + component: ThinkBlockDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Expandable chain-of-thought block used in chat responses. Toggles between “thinking” and completed states.', + }, + }, + }, + argTypes: { + responding: { control: 'boolean' }, + }, + args: { + responding: false, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/markdown-blocks/think-block.tsx b/web/app/components/base/markdown-blocks/think-block.tsx index a3b0561677..9c43578e4c 100644 --- a/web/app/components/base/markdown-blocks/think-block.tsx +++ b/web/app/components/base/markdown-blocks/think-block.tsx @@ -1,6 +1,7 @@ import React, { useEffect, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { useChatContext } from '../chat/chat/context' +import cn from '@/utils/classnames' const hasEndThink = (children: any): boolean => { if (typeof children === 'string') @@ -40,7 +41,7 @@ const useThinkTimer = (children: any) => { const [startTime] = useState(() => Date.now()) const [elapsedTime, setElapsedTime] = useState(0) const [isComplete, setIsComplete] = useState(false) - const timerRef = useRef() + const timerRef = useRef(null) useEffect(() => { if (isComplete) return @@ -63,16 +64,26 @@ const useThinkTimer = (children: any) => { return { elapsedTime, isComplete } } -const ThinkBlock = ({ children, ...props }: React.ComponentProps<'details'>) => { +type ThinkBlockProps = React.ComponentProps<'details'> & { + 'data-think'?: boolean +} + +const ThinkBlock = ({ children, ...props }: ThinkBlockProps) => { const { elapsedTime, isComplete } = useThinkTimer(children) const displayContent = removeEndThink(children) const { t } = useTranslation() + const { 'data-think': isThink = false, className, open, ...rest } = props - if (!(props['data-think'] ?? false)) + if (!isThink) return (
{children}
) return ( -
+
+Thinking aloud + +Check cached metrics first. +If missing, fetch raw warehouse data. +[ENDTHINKFLAG] + +
+ +## Mermaid Diagram +\`\`\`mermaid +graph TD + Start[User Message] --> Parse{Detect Intent?} + Parse -->|Tool| ToolCall[Call search tool] + Parse -->|Answer| Respond[Stream response] + ToolCall --> Respond +\`\`\` + +## Code Example +\`\`\`typescript +const reply = await client.chat({ + message: 'Summarise weekly metrics.', + tags: ['analytics'], +}) +\`\`\` +` + +const MarkdownDemo = ({ + compact = false, +}: { + compact?: boolean +}) => { + const [content] = useState(SAMPLE_MD.trim()) + + return ( +
+
Markdown renderer
+ +
+ ) +} + +const meta = { + title: 'Base/Data Display/Markdown', + component: MarkdownDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Markdown wrapper with GitHub-flavored markdown, Mermaid diagrams, math, and custom blocks (details, audio, etc.).', + }, + }, + }, + argTypes: { + compact: { control: 'boolean' }, + }, + args: { + compact: false, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} + +export const Compact: Story = { + args: { + compact: true, + }, +} diff --git a/web/app/components/base/markdown/markdown-utils.ts b/web/app/components/base/markdown/markdown-utils.ts index 0089bef0ac..d02f98b51b 100644 --- a/web/app/components/base/markdown/markdown-utils.ts +++ b/web/app/components/base/markdown/markdown-utils.ts @@ -32,8 +32,8 @@ export const preprocessLaTeX = (content: string) => { } export const preprocessThinkTag = (content: string) => { - const thinkOpenTagRegex = /(\n)+/g - const thinkCloseTagRegex = /\n<\/think>/g + const thinkOpenTagRegex = /(\s*)+/g + const thinkCloseTagRegex = /(\s*<\/think>)+/g return flow([ (str: string) => str.replace(thinkOpenTagRegex, '
\n'), (str: string) => str.replace(thinkCloseTagRegex, '\n[ENDTHINKFLAG]
'), diff --git a/web/app/components/base/markdown/react-markdown-wrapper.tsx b/web/app/components/base/markdown/react-markdown-wrapper.tsx index 83b76d97cc..22964ec04f 100644 --- a/web/app/components/base/markdown/react-markdown-wrapper.tsx +++ b/web/app/components/base/markdown/react-markdown-wrapper.tsx @@ -1,30 +1,20 @@ -import ReactMarkdown from 'react-markdown' -import RemarkMath from 'remark-math' -import RemarkBreaks from 'remark-breaks' -import RehypeKatex from 'rehype-katex' -import RemarkGfm from 'remark-gfm' -import RehypeRaw from 'rehype-raw' +import { AudioBlock, Img, Link, MarkdownButton, MarkdownForm, Paragraph, PluginImg, PluginParagraph, ScriptBlock, ThinkBlock, VideoBlock } from '@/app/components/base/markdown-blocks' import { ENABLE_SINGLE_DOLLAR_LATEX } from '@/config' -import AudioBlock from '@/app/components/base/markdown-blocks/audio-block' -import Img from '@/app/components/base/markdown-blocks/img' -import Link from '@/app/components/base/markdown-blocks/link' -import MarkdownButton from '@/app/components/base/markdown-blocks/button' -import MarkdownForm from '@/app/components/base/markdown-blocks/form' -import Paragraph from '@/app/components/base/markdown-blocks/paragraph' -import ScriptBlock from '@/app/components/base/markdown-blocks/script-block' -import ThinkBlock from '@/app/components/base/markdown-blocks/think-block' -import VideoBlock from '@/app/components/base/markdown-blocks/video-block' -import { customUrlTransform } from './markdown-utils' - -import type { FC } from 'react' - import dynamic from 'next/dynamic' +import type { FC } from 'react' +import ReactMarkdown from 'react-markdown' +import RehypeKatex from 'rehype-katex' +import RehypeRaw from 'rehype-raw' +import RemarkBreaks from 'remark-breaks' +import RemarkGfm from 'remark-gfm' +import RemarkMath from 'remark-math' +import { customUrlTransform } from './markdown-utils' const CodeBlock = dynamic(() => import('@/app/components/base/markdown-blocks/code-block'), { ssr: false }) export type SimplePluginInfo = { pluginUniqueIdentifier: string - plugin_id: string + pluginId: string } export type ReactMarkdownWrapperProps = { @@ -70,11 +60,11 @@ export const ReactMarkdownWrapper: FC = (props) => { disallowedElements={['iframe', 'head', 'html', 'meta', 'link', 'style', 'body', ...(props.customDisallowedElements || [])]} components={{ code: CodeBlock, - img: (props: any) => , + img: (props: any) => pluginInfo ? : , video: VideoBlock, audio: AudioBlock, a: Link, - p: (props: any) => , + p: (props: any) => pluginInfo ? : , button: MarkdownButton, form: MarkdownForm, script: ScriptBlock as any, diff --git a/web/app/components/base/mermaid/index.stories.tsx b/web/app/components/base/mermaid/index.stories.tsx new file mode 100644 index 0000000000..73030d7905 --- /dev/null +++ b/web/app/components/base/mermaid/index.stories.tsx @@ -0,0 +1,64 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import Flowchart from '.' + +const SAMPLE = ` +flowchart LR + A[User Message] --> B{Agent decides} + B -->|Needs tool| C[Search Tool] + C --> D[Combine result] + B -->|Direct answer| D + D --> E[Send response] +` + +const MermaidDemo = ({ + theme = 'light', +}: { + theme?: 'light' | 'dark' +}) => { + const [currentTheme, setCurrentTheme] = useState<'light' | 'dark'>(theme) + + return ( +
+
+ Mermaid diagram + +
+ +
+ ) +} + +const meta = { + title: 'Base/Data Display/Mermaid', + component: MermaidDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Mermaid renderer with custom theme toggle and caching. Useful for visualizing agent flows.', + }, + }, + }, + argTypes: { + theme: { + control: 'inline-radio', + options: ['light', 'dark'], + }, + }, + args: { + theme: 'light', + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/message-log-modal/index.stories.tsx b/web/app/components/base/message-log-modal/index.stories.tsx new file mode 100644 index 0000000000..4173a85ebc --- /dev/null +++ b/web/app/components/base/message-log-modal/index.stories.tsx @@ -0,0 +1,188 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useEffect } from 'react' +import MessageLogModal from '.' +import type { IChatItem } from '@/app/components/base/chat/chat/type' +import { useStore } from '@/app/components/app/store' +import type { WorkflowRunDetailResponse } from '@/models/log' +import type { NodeTracing, NodeTracingListResponse } from '@/types/workflow' +import { BlockEnum } from '@/app/components/workflow/types' +import { WorkflowContextProvider } from '@/app/components/workflow/context' + +const SAMPLE_APP_DETAIL = { + id: 'app-demo-1', + name: 'Support Assistant', + mode: 'chat', +} as any + +const mockRunDetail: WorkflowRunDetailResponse = { + id: 'run-demo-1', + version: 'v1.0.0', + graph: { + nodes: [], + edges: [], + }, + inputs: JSON.stringify({ question: 'How do I reset my password?' }, null, 2), + inputs_truncated: false, + status: 'succeeded', + outputs: JSON.stringify({ answer: 'Follow the reset link we just emailed you.' }, null, 2), + outputs_truncated: false, + total_steps: 3, + created_by_role: 'account', + created_by_account: { + id: 'account-1', + name: 'Demo Admin', + email: 'demo@example.com', + }, + created_at: 1700000000, + finished_at: 1700000006, + elapsed_time: 5.2, + total_tokens: 864, +} + +const buildNode = (override: Partial): NodeTracing => ({ + id: 'node-start', + index: 0, + predecessor_node_id: '', + node_id: 'node-start', + node_type: BlockEnum.Start, + title: 'Start', + inputs: {}, + inputs_truncated: false, + process_data: {}, + process_data_truncated: false, + outputs: {}, + outputs_truncated: false, + status: 'succeeded', + metadata: { + iterator_length: 1, + iterator_index: 0, + loop_length: 1, + loop_index: 0, + }, + created_at: 1700000000, + created_by: { + id: 'account-1', + name: 'Demo Admin', + email: 'demo@example.com', + }, + finished_at: 1700000001, + elapsed_time: 1.1, + extras: {}, + ...override, +}) + +const mockTracingList: NodeTracingListResponse = { + data: [ + buildNode({}), + buildNode({ + id: 'node-answer', + node_id: 'node-answer', + node_type: BlockEnum.Answer, + title: 'Answer', + inputs: { prompt: 'How do I reset my password?' }, + outputs: { output: 'Follow the reset link we just emailed you.' }, + finished_at: 1700000005, + elapsed_time: 2.6, + }), + ], +} + +const mockCurrentLogItem: IChatItem = { + id: 'message-1', + content: 'Follow the reset link we just emailed you.', + isAnswer: true, + workflow_run_id: 'run-demo-1', +} + +const useMessageLogMocks = () => { + useEffect(() => { + const store = useStore.getState() + store.setAppDetail(SAMPLE_APP_DETAIL) + + const originalFetch = globalThis.fetch?.bind(globalThis) ?? null + + const handle = async (input: RequestInfo | URL, init?: RequestInit) => { + const url = typeof input === 'string' + ? input + : input instanceof URL + ? input.toString() + : input.url + + if (url.includes('/workflow-runs/run-demo-1/') && url.endsWith('/node-executions')) { + return new Response( + JSON.stringify(mockTracingList), + { headers: { 'Content-Type': 'application/json' }, status: 200 }, + ) + } + + if (url.endsWith('/workflow-runs/run-demo-1')) { + return new Response( + JSON.stringify(mockRunDetail), + { headers: { 'Content-Type': 'application/json' }, status: 200 }, + ) + } + + if (originalFetch) + return originalFetch(input, init) + + throw new Error(`Unmocked fetch call for ${url}`) + } + + globalThis.fetch = handle as typeof globalThis.fetch + + return () => { + globalThis.fetch = originalFetch || globalThis.fetch + useStore.getState().setAppDetail(undefined) + } + }, []) +} + +type MessageLogModalProps = React.ComponentProps + +const MessageLogPreview = (props: MessageLogModalProps) => { + useMessageLogMocks() + + return ( +
+ + + +
+ ) +} + +const meta = { + title: 'Base/Feedback/MessageLogModal', + component: MessageLogPreview, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Workflow run inspector presented alongside chat transcripts. This Storybook mock provides canned run details and tracing metadata.', + }, + }, + }, + args: { + defaultTab: 'DETAIL', + width: 960, + fixedWidth: true, + onCancel: () => { + console.log('Modal closed') + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const FixedPanel: Story = {} + +export const FloatingPanel: Story = { + args: { + fixedWidth: false, + }, +} diff --git a/web/app/components/base/modal-like-wrap/index.stories.tsx b/web/app/components/base/modal-like-wrap/index.stories.tsx index bf027e0db4..c7d66b8e6a 100644 --- a/web/app/components/base/modal-like-wrap/index.stories.tsx +++ b/web/app/components/base/modal-like-wrap/index.stories.tsx @@ -2,7 +2,7 @@ import type { Meta, StoryObj } from '@storybook/nextjs' import ModalLikeWrap from '.' const meta = { - title: 'Base/Dialog/ModalLikeWrap', + title: 'Base/Feedback/ModalLikeWrap', component: ModalLikeWrap, parameters: { layout: 'centered', @@ -45,6 +45,7 @@ const meta = { hideCloseBtn: false, onClose: () => console.log('close'), onConfirm: () => console.log('confirm'), + children: null, }, } satisfies Meta @@ -68,6 +69,9 @@ export const Default: Story = { ), + args: { + children: null, + }, } export const WithBackLink: Story = { @@ -90,6 +94,7 @@ export const WithBackLink: Story = { ), args: { title: 'Select metadata type', + children: null, }, parameters: { docs: { @@ -114,6 +119,7 @@ export const CustomWidth: Story = { ), args: { title: 'Advanced configuration', + children: null, }, parameters: { docs: { diff --git a/web/app/components/base/modal/index.stories.tsx b/web/app/components/base/modal/index.stories.tsx index e561acebbb..c0ea31eb42 100644 --- a/web/app/components/base/modal/index.stories.tsx +++ b/web/app/components/base/modal/index.stories.tsx @@ -3,7 +3,7 @@ import { useEffect, useState } from 'react' import Modal from '.' const meta = { - title: 'Base/Dialog/Modal', + title: 'Base/Feedback/Modal', component: Modal, parameters: { layout: 'fullscreen', diff --git a/web/app/components/base/modal/modal.stories.tsx b/web/app/components/base/modal/modal.stories.tsx index 3e5be78a5b..adb80aebe6 100644 --- a/web/app/components/base/modal/modal.stories.tsx +++ b/web/app/components/base/modal/modal.stories.tsx @@ -3,7 +3,7 @@ import { useEffect, useState } from 'react' import Modal from './modal' const meta = { - title: 'Base/Dialog/RichModal', + title: 'Base/Feedback/RichModal', component: Modal, parameters: { layout: 'fullscreen', diff --git a/web/app/components/base/new-audio-button/index.stories.tsx b/web/app/components/base/new-audio-button/index.stories.tsx index d2f9b8b4d5..c672392562 100644 --- a/web/app/components/base/new-audio-button/index.stories.tsx +++ b/web/app/components/base/new-audio-button/index.stories.tsx @@ -20,7 +20,7 @@ const StoryWrapper = (props: ComponentProps) => { } const meta = { - title: 'Base/Button/NewAudioButton', + title: 'Base/General/NewAudioButton', component: AudioBtn, tags: ['autodocs'], parameters: { diff --git a/web/app/components/base/notion-connector/index.stories.tsx b/web/app/components/base/notion-connector/index.stories.tsx new file mode 100644 index 0000000000..eb8b17df3f --- /dev/null +++ b/web/app/components/base/notion-connector/index.stories.tsx @@ -0,0 +1,26 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import NotionConnector from '.' + +const meta = { + title: 'Base/Other/NotionConnector', + component: NotionConnector, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Call-to-action card inviting users to connect a Notion workspace. Shows the product icon, copy, and primary button.', + }, + }, + }, + args: { + onSetting: () => { + console.log('Open Notion settings') + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/notion-icon/index.stories.tsx b/web/app/components/base/notion-icon/index.stories.tsx new file mode 100644 index 0000000000..5389a6f935 --- /dev/null +++ b/web/app/components/base/notion-icon/index.stories.tsx @@ -0,0 +1,129 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import NotionIcon from '.' + +const meta = { + title: 'Base/General/NotionIcon', + component: NotionIcon, + parameters: { + docs: { + description: { + component: 'Renders workspace and page icons returned from Notion APIs, falling back to text initials or the default document glyph.', + }, + }, + }, + tags: ['autodocs'], + args: { + type: 'workspace', + name: 'Knowledge Base', + src: 'https://cloud.dify.ai/logo/logo.svg', + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const WorkspaceIcon: Story = { + render: args => ( +
+ + Workspace icon pulled from a remote URL. +
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +` + .trim(), + }, + }, + }, +} + +export const WorkspaceInitials: Story = { + render: args => ( +
+ + Fallback initial rendered when no icon URL is available. +
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +` + .trim(), + }, + }, + }, +} + +export const PageEmoji: Story = { + render: args => ( +
+ + Page-level emoji icon returned by the API. +
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +` + .trim(), + }, + }, + }, +} + +export const PageImage: Story = { + render: args => ( +
+ + Page icon resolved from an image URL. +
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +` + .trim(), + }, + }, + }, +} + +export const DefaultIcon: Story = { + render: args => ( +
+ + When neither emoji nor URL is provided, the generic document icon is shown. +
+ ), + parameters: { + docs: { + source: { + language: 'tsx', + code: ` +` + .trim(), + }, + }, + }, +} diff --git a/web/app/components/base/notion-page-selector/base.tsx b/web/app/components/base/notion-page-selector/base.tsx index adf044c406..1f9ddeaebd 100644 --- a/web/app/components/base/notion-page-selector/base.tsx +++ b/web/app/components/base/notion-page-selector/base.tsx @@ -10,6 +10,7 @@ import { useInvalidPreImportNotionPages, usePreImportNotionPages } from '@/servi import Header from '../../datasets/create/website/base/header' import type { DataSourceCredential } from '../../header/account-setting/data-source-page-new/types' import Loading from '../loading' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' type NotionPageSelectorProps = { value?: string[] @@ -124,7 +125,7 @@ const NotionPageSelector = ({ }, [pagesMapAndSelectedPagesId, onPreview]) const handleConfigureNotion = useCallback(() => { - setShowAccountSettingModal({ payload: 'data-source' }) + setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.DATA_SOURCE }) }, [setShowAccountSettingModal]) if (isFetchingNotionPagesError) { diff --git a/web/app/components/base/notion-page-selector/index.stories.tsx b/web/app/components/base/notion-page-selector/index.stories.tsx new file mode 100644 index 0000000000..6fdee03adb --- /dev/null +++ b/web/app/components/base/notion-page-selector/index.stories.tsx @@ -0,0 +1,200 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useEffect, useMemo, useState } from 'react' +import { CredentialTypeEnum } from '@/app/components/plugins/plugin-auth/types' +import { NotionPageSelector } from '.' +import type { DataSourceCredential } from '@/app/components/header/account-setting/data-source-page-new/types' +import type { NotionPage } from '@/models/common' + +const DATASET_ID = 'dataset-demo' +const CREDENTIALS: DataSourceCredential[] = [ + { + id: 'cred-1', + name: 'Marketing Workspace', + type: CredentialTypeEnum.OAUTH2, + is_default: true, + avatar_url: '', + credential: { + workspace_name: 'Marketing Workspace', + workspace_icon: null, + workspace_id: 'workspace-1', + }, + }, + { + id: 'cred-2', + name: 'Product Workspace', + type: CredentialTypeEnum.OAUTH2, + is_default: false, + avatar_url: '', + credential: { + workspace_name: 'Product Workspace', + workspace_icon: null, + workspace_id: 'workspace-2', + }, + }, +] + +const marketingPages = { + notion_info: [ + { + workspace_name: 'Marketing Workspace', + workspace_id: 'workspace-1', + workspace_icon: null, + pages: [ + { + page_icon: { type: 'emoji', emoji: '\u{1F4CB}', url: null }, + page_id: 'briefs', + page_name: 'Campaign Briefs', + parent_id: 'root', + type: 'page', + is_bound: false, + }, + { + page_icon: { type: 'emoji', emoji: '\u{1F4DD}', url: null }, + page_id: 'notes', + page_name: 'Meeting Notes', + parent_id: 'root', + type: 'page', + is_bound: true, + }, + { + page_icon: { type: 'emoji', emoji: '\u{1F30D}', url: null }, + page_id: 'localizations', + page_name: 'Localization Pipeline', + parent_id: 'briefs', + type: 'page', + is_bound: false, + }, + ], + }, + ], +} + +const productPages = { + notion_info: [ + { + workspace_name: 'Product Workspace', + workspace_id: 'workspace-2', + workspace_icon: null, + pages: [ + { + page_icon: { type: 'emoji', emoji: '\u{1F4A1}', url: null }, + page_id: 'ideas', + page_name: 'Idea Backlog', + parent_id: 'root', + type: 'page', + is_bound: false, + }, + { + page_icon: { type: 'emoji', emoji: '\u{1F9EA}', url: null }, + page_id: 'experiments', + page_name: 'Experiments', + parent_id: 'ideas', + type: 'page', + is_bound: false, + }, + ], + }, + ], +} + +type NotionApiResponse = typeof marketingPages +const emptyNotionResponse: NotionApiResponse = { notion_info: [] } + +const useMockNotionApi = () => { + const responseMap = useMemo(() => ({ + [`${DATASET_ID}:cred-1`]: marketingPages, + [`${DATASET_ID}:cred-2`]: productPages, + }) satisfies Record<`${typeof DATASET_ID}:${typeof CREDENTIALS[number]['id']}`, NotionApiResponse>, []) + + useEffect(() => { + const originalFetch = globalThis.fetch?.bind(globalThis) + + const handler = async (input: RequestInfo | URL, init?: RequestInit) => { + const url = typeof input === 'string' + ? input + : input instanceof URL + ? input.toString() + : input.url + + if (url.includes('/notion/pre-import/pages')) { + const parsed = new URL(url, globalThis.location.origin) + const datasetId = parsed.searchParams.get('dataset_id') || '' + const credentialId = parsed.searchParams.get('credential_id') || '' + let payload: NotionApiResponse = emptyNotionResponse + + if (datasetId === DATASET_ID) { + const credential = CREDENTIALS.find(item => item.id === credentialId) + if (credential) { + const mapKey = `${DATASET_ID}:${credential.id}` as keyof typeof responseMap + payload = responseMap[mapKey] + } + } + + return new Response( + JSON.stringify(payload), + { headers: { 'Content-Type': 'application/json' }, status: 200 }, + ) + } + + if (originalFetch) + return originalFetch(input, init) + + throw new Error(`Unmocked fetch call for ${url}`) + } + + globalThis.fetch = handler as typeof globalThis.fetch + + return () => { + if (originalFetch) + globalThis.fetch = originalFetch + } + }, [responseMap]) +} + +const NotionSelectorPreview = () => { + const [selectedPages, setSelectedPages] = useState([]) + const [credentialId, setCredentialId] = useState() + + useMockNotionApi() + + return ( +
+ page.page_id)} + onSelect={setSelectedPages} + onSelectCredential={setCredentialId} + canPreview + /> +
+
+ Debug state +
+

Active credential: {credentialId || 'None'}

+
+          {JSON.stringify(selectedPages, null, 2)}
+        
+
+
+ ) +} + +const meta = { + title: 'Base/Other/NotionPageSelector', + component: NotionSelectorPreview, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Credential-aware selector that fetches Notion pages and lets users choose which ones to sync.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/pagination/index.stories.tsx b/web/app/components/base/pagination/index.stories.tsx new file mode 100644 index 0000000000..4ad5488b96 --- /dev/null +++ b/web/app/components/base/pagination/index.stories.tsx @@ -0,0 +1,81 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useMemo, useState } from 'react' +import Pagination from '.' + +const TOTAL_ITEMS = 120 + +const PaginationDemo = ({ + initialPage = 0, + initialLimit = 10, +}: { + initialPage?: number + initialLimit?: number +}) => { + const [current, setCurrent] = useState(initialPage) + const [limit, setLimit] = useState(initialLimit) + + const pageSummary = useMemo(() => { + const start = current * limit + 1 + const end = Math.min((current + 1) * limit, TOTAL_ITEMS) + return `${start}-${end} of ${TOTAL_ITEMS}` + }, [current, limit]) + + return ( +
+
+ Log pagination + + {pageSummary} + +
+ { + setCurrent(0) + setLimit(nextLimit) + }} + /> +
+ ) +} + +const meta = { + title: 'Base/Navigation/Pagination', + component: PaginationDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Paginate long lists with optional per-page selector. Demonstrates the inline page jump input and quick limit toggles.', + }, + }, + }, + args: { + initialPage: 0, + initialLimit: 10, + }, + argTypes: { + initialPage: { + control: { type: 'number', min: 0, max: 9, step: 1 }, + }, + initialLimit: { + control: { type: 'radio' }, + options: [10, 25, 50], + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} + +export const StartAtMiddle: Story = { + args: { + initialPage: 4, + }, +} diff --git a/web/app/components/base/param-item/index.stories.tsx b/web/app/components/base/param-item/index.stories.tsx new file mode 100644 index 0000000000..a256b56dbf --- /dev/null +++ b/web/app/components/base/param-item/index.stories.tsx @@ -0,0 +1,121 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import ParamItem from '.' + +type ParamConfig = { + id: string + name: string + tip: string + value: number + min: number + max: number + step: number + allowToggle?: boolean +} + +const PARAMS: ParamConfig[] = [ + { + id: 'temperature', + name: 'Temperature', + tip: 'Controls randomness. Lower values make the model more deterministic, higher values encourage creativity.', + value: 0.7, + min: 0, + max: 2, + step: 0.1, + allowToggle: true, + }, + { + id: 'top_p', + name: 'Top P', + tip: 'Nucleus sampling keeps only the most probable tokens whose cumulative probability exceeds this threshold.', + value: 0.9, + min: 0, + max: 1, + step: 0.05, + }, + { + id: 'frequency_penalty', + name: 'Frequency Penalty', + tip: 'Discourages repeating tokens. Increase to reduce repetition.', + value: 0.2, + min: 0, + max: 1, + step: 0.05, + }, +] + +const ParamItemPlayground = () => { + const [state, setState] = useState>(() => { + return PARAMS.reduce((acc, item) => { + acc[item.id] = { value: item.value, enabled: true } + return acc + }, {} as Record) + }) + + const handleChange = (id: string, value: number) => { + setState(prev => ({ + ...prev, + [id]: { + ...prev[id], + value: Number.parseFloat(value.toFixed(3)), + }, + })) + } + + const handleToggle = (id: string, enabled: boolean) => { + setState(prev => ({ + ...prev, + [id]: { + ...prev[id], + enabled, + }, + })) + } + + return ( +
+
+ Generation parameters + + {JSON.stringify(state, null, 0)} + +
+ {PARAMS.map(param => ( + + ))} +
+ ) +} + +const meta = { + title: 'Base/Data Entry/ParamItem', + component: ParamItemPlayground, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Slider + numeric input pairing used for model parameter tuning. Supports optional enable toggles per parameter.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/popover/index.stories.tsx b/web/app/components/base/popover/index.stories.tsx new file mode 100644 index 0000000000..1977c89116 --- /dev/null +++ b/web/app/components/base/popover/index.stories.tsx @@ -0,0 +1,120 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import CustomPopover from '.' + +type PopoverContentProps = { + open?: boolean + onClose?: () => void + onClick?: () => void + title: string + description: string +} + +const PopoverContent = ({ title, description, onClose }: PopoverContentProps) => { + return ( +
+
+ {title} +
+

{description}

+ +
+ ) +} + +const Template = ({ + trigger = 'hover', + position = 'bottom', + manualClose, + disabled, +}: { + trigger?: 'click' | 'hover' + position?: 'bottom' | 'bl' | 'br' + manualClose?: boolean + disabled?: boolean +}) => { + const [hoverHint] = useState( + trigger === 'hover' + ? 'Hover over the badge to reveal quick tips.' + : 'Click the badge to open the contextual menu.', + ) + + return ( +
+

{hoverHint}

+
+ Popover trigger} + htmlContent={ + + } + /> +
+
+ ) +} + +const meta = { + title: 'Base/Feedback/Popover', + component: Template, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Headless UI popover wrapper supporting hover and click triggers. These examples highlight alignment controls and manual closing.', + }, + }, + }, + argTypes: { + trigger: { + control: 'radio', + options: ['hover', 'click'], + }, + position: { + control: 'radio', + options: ['bottom', 'bl', 'br'], + }, + manualClose: { control: 'boolean' }, + disabled: { control: 'boolean' }, + }, + args: { + trigger: 'hover', + position: 'bottom', + manualClose: false, + disabled: false, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const HoverPopover: Story = {} + +export const ClickPopover: Story = { + args: { + trigger: 'click', + position: 'br', + }, +} + +export const DisabledState: Story = { + args: { + disabled: true, + }, +} diff --git a/web/app/components/base/popover/index.tsx b/web/app/components/base/popover/index.tsx index 41df06f43a..2387737d02 100644 --- a/web/app/components/base/popover/index.tsx +++ b/web/app/components/base/popover/index.tsx @@ -1,5 +1,5 @@ import { Popover, PopoverButton, PopoverPanel, Transition } from '@headlessui/react' -import { Fragment, cloneElement, useRef } from 'react' +import { Fragment, cloneElement, isValidElement, useRef } from 'react' import cn from '@/utils/classnames' export type HtmlContentProps = { @@ -103,15 +103,17 @@ export default function CustomPopover({ }) } > - {cloneElement(htmlContent as React.ReactElement, { - open, - onClose: close, - ...(manualClose - ? { - onClick: close, - } - : {}), - })} + {isValidElement(htmlContent) + ? cloneElement(htmlContent as React.ReactElement, { + open, + onClose: close, + ...(manualClose + ? { + onClick: close, + } + : {}), + }) + : htmlContent}
)} diff --git a/web/app/components/base/portal-to-follow-elem/index.stories.tsx b/web/app/components/base/portal-to-follow-elem/index.stories.tsx new file mode 100644 index 0000000000..44c8e964ce --- /dev/null +++ b/web/app/components/base/portal-to-follow-elem/index.stories.tsx @@ -0,0 +1,103 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import { + PortalToFollowElem, + PortalToFollowElemContent, + PortalToFollowElemTrigger, +} from '.' + +const TooltipCard = ({ title, description }: { title: string; description: string }) => ( +
+
+ {title} +
+

{description}

+
+) + +const PortalDemo = ({ + placement = 'bottom', + triggerPopupSameWidth = false, +}: { + placement?: Parameters[0]['placement'] + triggerPopupSameWidth?: boolean +}) => { + const [controlledOpen, setControlledOpen] = useState(false) + + return ( +
+
+ + + Hover me + + + + + + + + + + + + + + +
+
+ ) +} + +const meta = { + title: 'Base/Feedback/PortalToFollowElem', + component: PortalDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Floating UI based portal that tracks trigger positioning. Demonstrates both hover-driven and controlled usage.', + }, + }, + }, + argTypes: { + placement: { + control: 'select', + options: ['top', 'top-start', 'top-end', 'bottom', 'bottom-start', 'bottom-end'], + }, + triggerPopupSameWidth: { control: 'boolean' }, + }, + args: { + placement: 'bottom', + triggerPopupSameWidth: false, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} + +export const SameWidthPanel: Story = { + args: { + triggerPopupSameWidth: true, + }, +} diff --git a/web/app/components/base/portal-to-follow-elem/index.tsx b/web/app/components/base/portal-to-follow-elem/index.tsx index 71ee251edd..e1192fe73b 100644 --- a/web/app/components/base/portal-to-follow-elem/index.tsx +++ b/web/app/components/base/portal-to-follow-elem/index.tsx @@ -125,7 +125,7 @@ export const PortalToFollowElemTrigger = ( children, asChild = false, ...props - }: React.HTMLProps & { ref?: React.RefObject, asChild?: boolean }, + }: React.HTMLProps & { ref?: React.RefObject, asChild?: boolean }, ) => { const context = usePortalToFollowElemContext() const childrenRef = (children as any).props?.ref @@ -133,12 +133,13 @@ export const PortalToFollowElemTrigger = ( // `asChild` allows the user to pass any element as the anchor if (asChild && React.isValidElement(children)) { + const childProps = (children.props ?? {}) as Record return React.cloneElement( children, context.getReferenceProps({ ref, ...props, - ...children.props, + ...childProps, 'data-state': context.open ? 'open' : 'closed', } as React.HTMLProps), ) @@ -164,7 +165,7 @@ export const PortalToFollowElemContent = ( style, ...props }: React.HTMLProps & { - ref?: React.RefObject; + ref?: React.RefObject; }, ) => { const context = usePortalToFollowElemContext() diff --git a/web/app/components/base/premium-badge/index.stories.tsx b/web/app/components/base/premium-badge/index.stories.tsx new file mode 100644 index 0000000000..c1f6ede869 --- /dev/null +++ b/web/app/components/base/premium-badge/index.stories.tsx @@ -0,0 +1,64 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import PremiumBadge from '.' + +const colors: Array['color']>> = ['blue', 'indigo', 'gray', 'orange'] + +const PremiumBadgeGallery = ({ + size = 'm', + allowHover = false, +}: { + size?: 's' | 'm' + allowHover?: boolean +}) => { + return ( +
+

Brand badge variants

+
+ {colors.map(color => ( +
+ + Premium + + {color} +
+ ))} +
+
+ ) +} + +const meta = { + title: 'Base/General/PremiumBadge', + component: PremiumBadgeGallery, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Gradient badge used for premium features and upsell prompts. Hover animations can be toggled per instance.', + }, + }, + }, + argTypes: { + size: { + control: 'radio', + options: ['s', 'm'], + }, + allowHover: { control: 'boolean' }, + }, + args: { + size: 'm', + allowHover: false, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} + +export const HoverEnabled: Story = { + args: { + allowHover: true, + }, +} diff --git a/web/app/components/base/progress-bar/progress-circle.stories.tsx b/web/app/components/base/progress-bar/progress-circle.stories.tsx new file mode 100644 index 0000000000..a6a21d2695 --- /dev/null +++ b/web/app/components/base/progress-bar/progress-circle.stories.tsx @@ -0,0 +1,89 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import ProgressCircle from './progress-circle' + +const ProgressCircleDemo = ({ + initialPercentage = 42, + size = 24, +}: { + initialPercentage?: number + size?: number +}) => { + const [percentage, setPercentage] = useState(initialPercentage) + + return ( +
+
+ Upload progress + + {percentage}% + +
+
+ + setPercentage(Number.parseInt(event.target.value, 10))} + className="h-2 w-full cursor-pointer appearance-none rounded-full bg-divider-subtle accent-primary-600" + /> +
+
+ +
+
+ ProgressCircle renders a deterministic SVG slice. Advance the slider to preview how the arc grows for upload indicators. +
+
+ ) +} + +const meta = { + title: 'Base/Feedback/ProgressCircle', + component: ProgressCircleDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Compact radial progress indicator wired to upload flows. The story provides a slider to scrub through percentages.', + }, + }, + }, + argTypes: { + initialPercentage: { + control: { type: 'range', min: 0, max: 100, step: 1 }, + }, + size: { + control: { type: 'number', min: 12, max: 48, step: 2 }, + }, + }, + args: { + initialPercentage: 42, + size: 24, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} + +export const NearComplete: Story = { + args: { + initialPercentage: 92, + }, +} diff --git a/web/app/components/base/prompt-editor/hooks.ts b/web/app/components/base/prompt-editor/hooks.ts index 87119f8b49..b3d2b22236 100644 --- a/web/app/components/base/prompt-editor/hooks.ts +++ b/web/app/components/base/prompt-editor/hooks.ts @@ -35,7 +35,7 @@ import { DELETE_QUERY_BLOCK_COMMAND } from './plugins/query-block' import type { CustomTextNode } from './plugins/custom-text/node' import { registerLexicalTextEntity } from './utils' -export type UseSelectOrDeleteHandler = (nodeKey: string, command?: LexicalCommand) => [RefObject, boolean] +export type UseSelectOrDeleteHandler = (nodeKey: string, command?: LexicalCommand) => [RefObject, boolean] export const useSelectOrDelete: UseSelectOrDeleteHandler = (nodeKey: string, command?: LexicalCommand) => { const ref = useRef(null) const [editor] = useLexicalComposerContext() @@ -110,7 +110,7 @@ export const useSelectOrDelete: UseSelectOrDeleteHandler = (nodeKey: string, com return [ref, isSelected] } -export type UseTriggerHandler = () => [RefObject, boolean, Dispatch>] +export type UseTriggerHandler = () => [RefObject, boolean, Dispatch>] export const useTrigger: UseTriggerHandler = () => { const triggerRef = useRef(null) const [open, setOpen] = useState(false) diff --git a/web/app/components/base/prompt-editor/index.stories.tsx b/web/app/components/base/prompt-editor/index.stories.tsx index e0d0777306..35058ac37d 100644 --- a/web/app/components/base/prompt-editor/index.stories.tsx +++ b/web/app/components/base/prompt-editor/index.stories.tsx @@ -25,7 +25,7 @@ const PromptEditorMock = ({ value, onChange, placeholder, editable, compact, cla } const meta = { - title: 'Base/Input/PromptEditor', + title: 'Base/Data Entry/PromptEditor', component: PromptEditorMock, parameters: { layout: 'centered', diff --git a/web/app/components/base/prompt-editor/plugins/placeholder.tsx b/web/app/components/base/prompt-editor/plugins/placeholder.tsx index c2c2623992..187b574cea 100644 --- a/web/app/components/base/prompt-editor/plugins/placeholder.tsx +++ b/web/app/components/base/prompt-editor/plugins/placeholder.tsx @@ -1,4 +1,5 @@ import { memo } from 'react' +import type { ReactNode } from 'react' import { useTranslation } from 'react-i18next' import cn from '@/utils/classnames' @@ -8,7 +9,7 @@ const Placeholder = ({ className, }: { compact?: boolean - value?: string | JSX.Element + value?: ReactNode className?: string }) => { const { t } = useTranslation() diff --git a/web/app/components/base/prompt-log-modal/index.stories.tsx b/web/app/components/base/prompt-log-modal/index.stories.tsx new file mode 100644 index 0000000000..55389874cd --- /dev/null +++ b/web/app/components/base/prompt-log-modal/index.stories.tsx @@ -0,0 +1,74 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useEffect } from 'react' +import PromptLogModal from '.' +import { useStore } from '@/app/components/app/store' +import type { IChatItem } from '@/app/components/base/chat/chat/type' + +type PromptLogModalProps = React.ComponentProps + +const mockLogItem: IChatItem = { + id: 'message-1', + isAnswer: true, + content: 'Summarize our meeting notes about launch blockers.', + log: [ + { + role: 'system', + text: 'You are an assistant that extracts key launch blockers from the dialogue.', + }, + { + role: 'user', + text: 'Team discussed QA, marketing assets, and infra readiness. Highlight risks.', + }, + { + role: 'assistant', + text: 'Blocking items:\n1. QA needs staging data by Friday.\n2. Marketing awaiting final visuals.\n3. Infra rollout still missing approval.', + }, + ], +} + +const usePromptLogMocks = () => { + useEffect(() => { + useStore.getState().setCurrentLogItem(mockLogItem) + return () => { + useStore.getState().setCurrentLogItem(undefined) + } + }, []) +} + +const PromptLogPreview = (props: PromptLogModalProps) => { + usePromptLogMocks() + + return ( +
+ +
+ ) +} + +const meta = { + title: 'Base/Feedback/PromptLogModal', + component: PromptLogPreview, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Shows the prompt and message transcript used for a chat completion, with copy-to-clipboard support for single prompts.', + }, + }, + }, + args: { + width: 960, + onCancel: () => { + console.log('Prompt log closed') + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/qrcode/index.stories.tsx b/web/app/components/base/qrcode/index.stories.tsx new file mode 100644 index 0000000000..312dc6a5a8 --- /dev/null +++ b/web/app/components/base/qrcode/index.stories.tsx @@ -0,0 +1,52 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import ShareQRCode from '.' + +const QRDemo = ({ + content = 'https://dify.ai', +}: { + content?: string +}) => { + return ( +
+

Share QR

+
+ Generated URL: + {content} +
+ +
+ ) +} + +const meta = { + title: 'Base/Data Display/QRCode', + component: QRDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Toggleable QR code generator for sharing app URLs. Clicking the trigger reveals the code with a download CTA.', + }, + }, + }, + argTypes: { + content: { + control: 'text', + }, + }, + args: { + content: 'https://dify.ai', + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} + +export const DemoLink: Story = { + args: { + content: 'https://dify.ai/docs', + }, +} diff --git a/web/app/components/base/radio-card/index.stories.tsx b/web/app/components/base/radio-card/index.stories.tsx index bb45db622c..63dd1ad1ec 100644 --- a/web/app/components/base/radio-card/index.stories.tsx +++ b/web/app/components/base/radio-card/index.stories.tsx @@ -4,7 +4,7 @@ import { RiCloudLine, RiCpuLine, RiDatabase2Line, RiLightbulbLine, RiRocketLine, import RadioCard from '.' const meta = { - title: 'Base/Input/RadioCard', + title: 'Base/Data Entry/RadioCard', component: RadioCard, parameters: { layout: 'centered', diff --git a/web/app/components/base/radio/index.stories.tsx b/web/app/components/base/radio/index.stories.tsx index 0f917320bb..699372097f 100644 --- a/web/app/components/base/radio/index.stories.tsx +++ b/web/app/components/base/radio/index.stories.tsx @@ -3,7 +3,7 @@ import { useState } from 'react' import Radio from '.' const meta = { - title: 'Base/Input/Radio', + title: 'Base/Data Entry/Radio', component: Radio, parameters: { layout: 'centered', diff --git a/web/app/components/base/search-input/index.stories.tsx b/web/app/components/base/search-input/index.stories.tsx index eb051f892f..6b2326322b 100644 --- a/web/app/components/base/search-input/index.stories.tsx +++ b/web/app/components/base/search-input/index.stories.tsx @@ -3,7 +3,7 @@ import { useState } from 'react' import SearchInput from '.' const meta = { - title: 'Base/Input/SearchInput', + title: 'Base/Data Entry/SearchInput', component: SearchInput, parameters: { layout: 'centered', diff --git a/web/app/components/base/segmented-control/index.stories.tsx b/web/app/components/base/segmented-control/index.stories.tsx new file mode 100644 index 0000000000..c83112bd54 --- /dev/null +++ b/web/app/components/base/segmented-control/index.stories.tsx @@ -0,0 +1,92 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { RiLineChartLine, RiListCheck2, RiRobot2Line } from '@remixicon/react' +import { useState } from 'react' +import { SegmentedControl } from '.' + +const SEGMENTS = [ + { value: 'overview', text: 'Overview', Icon: RiLineChartLine }, + { value: 'tasks', text: 'Tasks', Icon: RiListCheck2, count: 8 }, + { value: 'agents', text: 'Agents', Icon: RiRobot2Line }, +] + +const SegmentedControlDemo = ({ + initialValue = 'overview', + size = 'regular', + padding = 'with', + activeState = 'default', +}: { + initialValue?: string + size?: 'regular' | 'small' | 'large' + padding?: 'none' | 'with' + activeState?: 'default' | 'accent' | 'accentLight' +}) => { + const [value, setValue] = useState(initialValue) + + return ( +
+
+ Segmented control + + value="{value}" + +
+ +
+ ) +} + +const meta = { + title: 'Base/Data Entry/SegmentedControl', + component: SegmentedControlDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Multi-tab segmented control with optional icons and badge counts. Adjust sizing and accent states via controls.', + }, + }, + }, + argTypes: { + initialValue: { + control: 'radio', + options: SEGMENTS.map(segment => segment.value), + }, + size: { + control: 'inline-radio', + options: ['small', 'regular', 'large'], + }, + padding: { + control: 'inline-radio', + options: ['none', 'with'], + }, + activeState: { + control: 'inline-radio', + options: ['default', 'accent', 'accentLight'], + }, + }, + args: { + initialValue: 'overview', + size: 'regular', + padding: 'with', + activeState: 'default', + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} + +export const AccentState: Story = { + args: { + activeState: 'accent', + }, +} diff --git a/web/app/components/base/select/index.stories.tsx b/web/app/components/base/select/index.stories.tsx index 48a715498b..f1b46f2d55 100644 --- a/web/app/components/base/select/index.stories.tsx +++ b/web/app/components/base/select/index.stories.tsx @@ -4,7 +4,7 @@ import Select, { PortalSelect, SimpleSelect } from '.' import type { Item } from '.' const meta = { - title: 'Base/Input/Select', + title: 'Base/Data Entry/Select', component: SimpleSelect, parameters: { layout: 'centered', diff --git a/web/app/components/base/select/index.tsx b/web/app/components/base/select/index.tsx index 2da3a0abe8..1a096d7f93 100644 --- a/web/app/components/base/select/index.tsx +++ b/web/app/components/base/select/index.tsx @@ -34,7 +34,7 @@ export type Item = { export type ISelectProps = { className?: string wrapperClassName?: string - renderTrigger?: (value: Item | null) => React.JSX.Element | null + renderTrigger?: (value: Item | null, isOpen: boolean) => React.JSX.Element | null items?: Item[] defaultValue?: number | string disabled?: boolean @@ -222,7 +222,7 @@ const SimpleSelect: FC = ({ > {({ open }) => (
- {renderTrigger && {renderTrigger(selectedItem)}} + {renderTrigger && {renderTrigger(selectedItem, open)}} {!renderTrigger && ( { onOpenChange?.(open) diff --git a/web/app/components/base/select/pure.tsx b/web/app/components/base/select/pure.tsx index cede31d2ba..3de8245025 100644 --- a/web/app/components/base/select/pure.tsx +++ b/web/app/components/base/select/pure.tsx @@ -1,5 +1,6 @@ import { useCallback, + useMemo, useState, } from 'react' import { useTranslation } from 'react-i18next' @@ -22,10 +23,8 @@ export type Option = { value: string } -export type PureSelectProps = { +type SharedPureSelectProps = { options: Option[] - value?: string - onChange?: (value: string) => void containerProps?: PortalToFollowElemOptions & { open?: boolean onOpenChange?: (open: boolean) => void @@ -38,22 +37,39 @@ export type PureSelectProps = { className?: string itemClassName?: string title?: string + titleClassName?: string }, placeholder?: string disabled?: boolean triggerPopupSameWidth?: boolean } -const PureSelect = ({ - options, - value, - onChange, - containerProps, - triggerProps, - popupProps, - placeholder, - disabled, - triggerPopupSameWidth, -}: PureSelectProps) => { + +type SingleSelectProps = { + multiple?: false + value?: string + onChange?: (value: string) => void +} + +type MultiSelectProps = { + multiple: true + value?: string[] + onChange?: (value: string[]) => void +} + +export type PureSelectProps = SharedPureSelectProps & (SingleSelectProps | MultiSelectProps) +const PureSelect = (props: PureSelectProps) => { + const { + options, + containerProps, + triggerProps, + popupProps, + placeholder, + disabled, + triggerPopupSameWidth, + multiple, + value, + onChange, + } = props const { t } = useTranslation() const { open, @@ -69,6 +85,7 @@ const PureSelect = ({ className: popupClassName, itemClassName: popupItemClassName, title: popupTitle, + titleClassName: popupTitleClassName, } = popupProps || {} const [localOpen, setLocalOpen] = useState(false) @@ -79,8 +96,13 @@ const PureSelect = ({ setLocalOpen(openValue) }, [onOpenChange]) - const selectedOption = options.find(option => option.value === value) - const triggerText = selectedOption?.label || placeholder || t('common.placeholder.select') + const triggerText = useMemo(() => { + const placeholderText = placeholder || t('common.placeholder.select') + if (multiple) + return value?.length ? t('common.dynamicSelect.selected', { count: value.length }) : placeholderText + + return options.find(option => option.value === value)?.label || placeholderText + }, [multiple, value, options, placeholder]) return (
{ popupTitle && ( -
+
{popupTitle}
) @@ -144,6 +169,14 @@ const PureSelect = ({ title={option.label} onClick={() => { if (disabled) return + if (multiple) { + const currentValues = value ?? [] + const nextValues = currentValues.includes(option.value) + ? currentValues.filter(valueItem => valueItem !== option.value) + : [...currentValues, option.value] + onChange?.(nextValues) + return + } onChange?.(option.value) handleOpenChange(false) }} @@ -152,7 +185,11 @@ const PureSelect = ({ {option.label}
{ - value === option.value && + ( + multiple + ? (value ?? []).includes(option.value) + : value === option.value + ) && }
)) diff --git a/web/app/components/base/simple-pie-chart/index.stories.tsx b/web/app/components/base/simple-pie-chart/index.stories.tsx new file mode 100644 index 0000000000..d08c8fa0ce --- /dev/null +++ b/web/app/components/base/simple-pie-chart/index.stories.tsx @@ -0,0 +1,89 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useMemo, useState } from 'react' +import SimplePieChart from '.' + +const PieChartPlayground = ({ + initialPercentage = 65, + fill = '#fdb022', + stroke = '#f79009', +}: { + initialPercentage?: number + fill?: string + stroke?: string +}) => { + const [percentage, setPercentage] = useState(initialPercentage) + + const label = useMemo(() => `${percentage}%`, [percentage]) + + return ( +
+
+ Conversion snapshot + + {label} + +
+
+ +
+ + setPercentage(Number.parseInt(event.target.value, 10))} + className="h-2 w-full cursor-pointer appearance-none rounded-full bg-divider-subtle accent-primary-600" + /> +
+
+
+ ) +} + +const meta = { + title: 'Base/Data Display/SimplePieChart', + component: PieChartPlayground, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Thin radial indicator built with ECharts. Use it for quick percentage snapshots inside cards.', + }, + }, + }, + argTypes: { + initialPercentage: { + control: { type: 'range', min: 0, max: 100, step: 1 }, + }, + fill: { control: 'color' }, + stroke: { control: 'color' }, + }, + args: { + initialPercentage: 65, + fill: '#fdb022', + stroke: '#f79009', + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} + +export const BrandAccent: Story = { + args: { + fill: '#155EEF', + stroke: '#0040C1', + initialPercentage: 82, + }, +} diff --git a/web/app/components/base/skeleton/index.stories.tsx b/web/app/components/base/skeleton/index.stories.tsx new file mode 100644 index 0000000000..b5ea649b34 --- /dev/null +++ b/web/app/components/base/skeleton/index.stories.tsx @@ -0,0 +1,59 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { + SkeletonContainer, + SkeletonPoint, + SkeletonRectangle, + SkeletonRow, +} from '.' + +const SkeletonDemo = () => { + return ( +
+
Loading skeletons
+
+ + + + + + + + + + + + + +
+
+ + + + + + + + +
+
+ ) +} + +const meta = { + title: 'Base/Feedback/Skeleton', + component: SkeletonDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Composable skeleton primitives (container, row, rectangle, point) to sketch loading states for panels and lists.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/slider/index.stories.tsx b/web/app/components/base/slider/index.stories.tsx index 691c75d7ad..4d06381d16 100644 --- a/web/app/components/base/slider/index.stories.tsx +++ b/web/app/components/base/slider/index.stories.tsx @@ -3,7 +3,7 @@ import { useState } from 'react' import Slider from '.' const meta = { - title: 'Base/Input/Slider', + title: 'Base/Data Entry/Slider', component: Slider, parameters: { layout: 'centered', diff --git a/web/app/components/base/sort/index.stories.tsx b/web/app/components/base/sort/index.stories.tsx new file mode 100644 index 0000000000..fea21e8edc --- /dev/null +++ b/web/app/components/base/sort/index.stories.tsx @@ -0,0 +1,59 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useMemo, useState } from 'react' +import Sort from '.' + +const SORT_ITEMS = [ + { value: 'created_at', name: 'Created time' }, + { value: 'updated_at', name: 'Updated time' }, + { value: 'latency', name: 'Latency' }, +] + +const SortPlayground = () => { + const [sortBy, setSortBy] = useState('-created_at') + + const { order, value } = useMemo(() => { + const isDesc = sortBy.startsWith('-') + return { + order: isDesc ? '-' : '', + value: sortBy.replace('-', '') || 'created_at', + } + }, [sortBy]) + + return ( +
+
+ Sort control + + sort_by="{sortBy}" + +
+ { + setSortBy(next as string) + }} + /> +
+ ) +} + +const meta = { + title: 'Base/Data Display/Sort', + component: SortPlayground, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Sorting trigger used in log tables. Includes dropdown selection and quick toggle between ascending and descending.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/sort/index.tsx b/web/app/components/base/sort/index.tsx index af90233575..3823b13d1a 100644 --- a/web/app/components/base/sort/index.tsx +++ b/web/app/components/base/sort/index.tsx @@ -47,10 +47,10 @@ const Sort: FC = ({ className='block' >
-
+
{t('appLog.filter.sortBy')}
{triggerContent} diff --git a/web/app/components/base/spinner/index.stories.tsx b/web/app/components/base/spinner/index.stories.tsx new file mode 100644 index 0000000000..9792b9b2fc --- /dev/null +++ b/web/app/components/base/spinner/index.stories.tsx @@ -0,0 +1,50 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import Spinner from '.' + +const SpinnerPlayground = ({ + loading = true, +}: { + loading?: boolean +}) => { + const [isLoading, setIsLoading] = useState(loading) + + return ( +
+

Spinner

+ + +
+ ) +} + +const meta = { + title: 'Base/Feedback/Spinner', + component: SpinnerPlayground, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Minimal spinner powered by Tailwind utilities. Toggle the state to inspect motion-reduced behaviour.', + }, + }, + }, + argTypes: { + loading: { control: 'boolean' }, + }, + args: { + loading: true, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/svg-gallery/index.stories.tsx b/web/app/components/base/svg-gallery/index.stories.tsx new file mode 100644 index 0000000000..65da97d243 --- /dev/null +++ b/web/app/components/base/svg-gallery/index.stories.tsx @@ -0,0 +1,51 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import SVGRenderer from '.' + +const SAMPLE_SVG = ` + + + + + + + + + + SVG Preview + Click to open high-resolution preview + + + + + Inline SVG asset + +`.trim() + +const meta = { + title: 'Base/Data Display/SVGRenderer', + component: SVGRenderer, + parameters: { + docs: { + description: { + component: 'Renders sanitized SVG markup with zoom-to-preview capability.', + }, + source: { + language: 'tsx', + code: ` +... +\`} /> + `.trim(), + }, + }, + }, + tags: ['autodocs'], + args: { + content: SAMPLE_SVG, + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Default: Story = {} diff --git a/web/app/components/base/svg/index.stories.tsx b/web/app/components/base/svg/index.stories.tsx new file mode 100644 index 0000000000..0b7d8d23c9 --- /dev/null +++ b/web/app/components/base/svg/index.stories.tsx @@ -0,0 +1,36 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import SVGBtn from '.' + +const SvgToggleDemo = () => { + const [isSVG, setIsSVG] = useState(false) + + return ( +
+

SVG toggle

+ + + Mode: {isSVG ? 'SVG' : 'PNG'} + +
+ ) +} + +const meta = { + title: 'Base/General/SVGBtn', + component: SvgToggleDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Small toggle used in icon pickers to switch between SVG and bitmap assets.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/switch/index.stories.tsx b/web/app/components/base/switch/index.stories.tsx index 2753a6a309..5b2b6e59c4 100644 --- a/web/app/components/base/switch/index.stories.tsx +++ b/web/app/components/base/switch/index.stories.tsx @@ -3,7 +3,7 @@ import { useState } from 'react' import Switch from '.' const meta = { - title: 'Base/Input/Switch', + title: 'Base/Data Entry/Switch', component: Switch, parameters: { layout: 'centered', diff --git a/web/app/components/base/tab-header/index.stories.tsx b/web/app/components/base/tab-header/index.stories.tsx new file mode 100644 index 0000000000..cb383947d9 --- /dev/null +++ b/web/app/components/base/tab-header/index.stories.tsx @@ -0,0 +1,64 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import TabHeader from '.' +import type { ITabHeaderProps } from '.' + +const items: ITabHeaderProps['items'] = [ + { id: 'overview', name: 'Overview' }, + { id: 'playground', name: 'Playground' }, + { id: 'changelog', name: 'Changelog', extra: New }, + { id: 'docs', name: 'Docs', isRight: true }, + { id: 'settings', name: 'Settings', isRight: true, disabled: true }, +] + +const TabHeaderDemo = ({ + initialTab = 'overview', +}: { + initialTab?: string +}) => { + const [activeTab, setActiveTab] = useState(initialTab) + + return ( +
+
+ Tabs + + active="{activeTab}" + +
+ +
+ ) +} + +const meta = { + title: 'Base/Navigation/TabHeader', + component: TabHeaderDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Two-sided header tabs with optional right-aligned actions. Disabled items illustrate read-only states.', + }, + }, + }, + argTypes: { + initialTab: { + control: 'radio', + options: items.map(item => item.id), + }, + }, + args: { + initialTab: 'overview', + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/tab-slider-new/index.stories.tsx b/web/app/components/base/tab-slider-new/index.stories.tsx new file mode 100644 index 0000000000..669ec9eed9 --- /dev/null +++ b/web/app/components/base/tab-slider-new/index.stories.tsx @@ -0,0 +1,52 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import { RiSparklingFill, RiTerminalBoxLine } from '@remixicon/react' +import TabSliderNew from '.' + +const OPTIONS = [ + { value: 'visual', text: 'Visual builder', icon: }, + { value: 'code', text: 'Code', icon: }, +] + +const TabSliderNewDemo = ({ + initialValue = 'visual', +}: { + initialValue?: string +}) => { + const [value, setValue] = useState(initialValue) + + return ( +
+
Pill tabs
+ +
+ ) +} + +const meta = { + title: 'Base/Navigation/TabSliderNew', + component: TabSliderNewDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Rounded pill tabs suited for switching between editors. Icons illustrate mixed text/icon options.', + }, + }, + }, + argTypes: { + initialValue: { + control: 'radio', + options: OPTIONS.map(option => option.value), + }, + }, + args: { + initialValue: 'visual', + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/tab-slider-plain/index.stories.tsx b/web/app/components/base/tab-slider-plain/index.stories.tsx new file mode 100644 index 0000000000..dd8c7e0d30 --- /dev/null +++ b/web/app/components/base/tab-slider-plain/index.stories.tsx @@ -0,0 +1,56 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useState } from 'react' +import TabSliderPlain from '.' + +const OPTIONS = [ + { value: 'analytics', text: 'Analytics' }, + { value: 'activity', text: 'Recent activity' }, + { value: 'alerts', text: 'Alerts' }, +] + +const TabSliderPlainDemo = ({ + initialValue = 'analytics', +}: { + initialValue?: string +}) => { + const [value, setValue] = useState(initialValue) + + return ( +
+
Underline tabs
+ +
+ ) +} + +const meta = { + title: 'Base/Navigation/TabSliderPlain', + component: TabSliderPlainDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Underline-style navigation commonly used in dashboards. Toggle between three sections.', + }, + }, + }, + argTypes: { + initialValue: { + control: 'radio', + options: OPTIONS.map(option => option.value), + }, + }, + args: { + initialValue: 'analytics', + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/tab-slider/index.stories.tsx b/web/app/components/base/tab-slider/index.stories.tsx new file mode 100644 index 0000000000..703116fe19 --- /dev/null +++ b/web/app/components/base/tab-slider/index.stories.tsx @@ -0,0 +1,93 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useEffect, useState } from 'react' +import TabSlider from '.' + +const OPTIONS = [ + { value: 'models', text: 'Models' }, + { value: 'datasets', text: 'Datasets' }, + { value: 'plugins', text: 'Plugins' }, +] + +const TabSliderDemo = ({ + initialValue = 'models', +}: { + initialValue?: string +}) => { + const [value, setValue] = useState(initialValue) + + useEffect(() => { + const originalFetch = globalThis.fetch?.bind(globalThis) + + const handler = async (input: RequestInfo | URL, init?: RequestInit) => { + const url = typeof input === 'string' + ? input + : input instanceof URL + ? input.toString() + : input.url + + if (url.includes('/workspaces/current/plugin/list')) { + return new Response( + JSON.stringify({ + total: 6, + plugins: [], + }), + { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }, + ) + } + + if (originalFetch) + return originalFetch(input, init) + + throw new Error(`Unhandled request for ${url}`) + } + + globalThis.fetch = handler as typeof globalThis.fetch + + return () => { + if (originalFetch) + globalThis.fetch = originalFetch + } + }, []) + + return ( +
+
Segmented tabs
+ +
+ ) +} + +const meta = { + title: 'Base/Navigation/TabSlider', + component: TabSliderDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Animated segmented control with sliding highlight. A badge appears when plugins are installed (mocked in Storybook).', + }, + }, + }, + argTypes: { + initialValue: { + control: 'radio', + options: OPTIONS.map(option => option.value), + }, + }, + args: { + initialValue: 'models', + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/tab-slider/index.tsx b/web/app/components/base/tab-slider/index.tsx index 55c44d5ea8..7c9364baf9 100644 --- a/web/app/components/base/tab-slider/index.tsx +++ b/web/app/components/base/tab-slider/index.tsx @@ -11,12 +11,14 @@ type Option = { type TabSliderProps = { className?: string value: string + itemClassName?: string | ((active: boolean) => string) onChange: (v: string) => void options: Option[] } const TabSlider: FC = ({ className, + itemClassName, value, onChange, options, @@ -58,6 +60,7 @@ const TabSlider: FC = ({ index === activeIndex ? 'text-text-primary' : 'text-text-tertiary', + typeof itemClassName === 'function' ? itemClassName(index === activeIndex) : itemClassName, )} onClick={() => { if (index !== activeIndex) { diff --git a/web/app/components/base/tag-input/index.stories.tsx b/web/app/components/base/tag-input/index.stories.tsx index bbb314cf3a..7aae9f2773 100644 --- a/web/app/components/base/tag-input/index.stories.tsx +++ b/web/app/components/base/tag-input/index.stories.tsx @@ -3,7 +3,7 @@ import { useState } from 'react' import TagInput from '.' const meta = { - title: 'Base/Input/TagInput', + title: 'Base/Data Entry/TagInput', component: TagInput, parameters: { layout: 'centered', diff --git a/web/app/components/base/tag-management/index.stories.tsx b/web/app/components/base/tag-management/index.stories.tsx new file mode 100644 index 0000000000..51f4233461 --- /dev/null +++ b/web/app/components/base/tag-management/index.stories.tsx @@ -0,0 +1,131 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useEffect, useRef } from 'react' +import TagManagementModal from '.' +import { ToastProvider } from '@/app/components/base/toast' +import { useStore as useTagStore } from './store' +import type { Tag } from './constant' + +const INITIAL_TAGS: Tag[] = [ + { id: 'tag-product', name: 'Product', type: 'app', binding_count: 12 }, + { id: 'tag-growth', name: 'Growth', type: 'app', binding_count: 4 }, + { id: 'tag-beta', name: 'Beta User', type: 'app', binding_count: 2 }, + { id: 'tag-rag', name: 'RAG', type: 'knowledge', binding_count: 3 }, + { id: 'tag-updates', name: 'Release Notes', type: 'knowledge', binding_count: 6 }, +] + +const TagManagementPlayground = ({ + type = 'app', +}: { + type?: 'app' | 'knowledge' +}) => { + const originalFetchRef = useRef(null) + const tagsRef = useRef(INITIAL_TAGS) + const setTagList = useTagStore(s => s.setTagList) + const showModal = useTagStore(s => s.showTagManagementModal) + const setShowModal = useTagStore(s => s.setShowTagManagementModal) + + useEffect(() => { + setTagList(tagsRef.current) + setShowModal(true) + }, [setTagList, setShowModal]) + + useEffect(() => { + originalFetchRef.current = globalThis.fetch?.bind(globalThis) + + const handler = async (input: RequestInfo | URL, init?: RequestInit) => { + const request = input instanceof Request ? input : new Request(input, init) + const url = request.url + const method = request.method.toUpperCase() + const parsedUrl = new URL(url, window.location.origin) + + if (parsedUrl.pathname.endsWith('/tags')) { + if (method === 'GET') { + const tagType = parsedUrl.searchParams.get('type') || 'app' + const payload = tagsRef.current.filter(tag => tag.type === tagType) + return new Response(JSON.stringify(payload), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }) + } + if (method === 'POST') { + const body = await request.clone().json() as { name: string; type: string } + const newTag: Tag = { + id: `tag-${Date.now()}`, + name: body.name, + type: body.type, + binding_count: 0, + } + tagsRef.current = [newTag, ...tagsRef.current] + setTagList(tagsRef.current) + return new Response(JSON.stringify(newTag), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }) + } + } + + if (parsedUrl.pathname.endsWith('/tag-bindings/create') || parsedUrl.pathname.endsWith('/tag-bindings/remove')) { + return new Response(JSON.stringify({ ok: true }), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }) + } + + if (originalFetchRef.current) + return originalFetchRef.current(request) + + throw new Error(`Unhandled request in mock fetch: ${url}`) + } + + globalThis.fetch = handler as typeof globalThis.fetch + + return () => { + if (originalFetchRef.current) + globalThis.fetch = originalFetchRef.current + } + }, [setTagList]) + + return ( + +
+ +

Mocked tag management flows with create and bind actions.

+
+ +
+ ) +} + +const meta = { + title: 'Base/Data Display/TagManagementModal', + component: TagManagementPlayground, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Complete tag management modal with mocked service calls for browsing and creating tags.', + }, + }, + }, + argTypes: { + type: { + control: 'radio', + options: ['app', 'knowledge'], + }, + }, + args: { + type: 'app', + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/tag/index.stories.tsx b/web/app/components/base/tag/index.stories.tsx new file mode 100644 index 0000000000..8ca15c0c8b --- /dev/null +++ b/web/app/components/base/tag/index.stories.tsx @@ -0,0 +1,62 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import Tag from '.' + +const COLORS: Array['color']>> = ['green', 'yellow', 'red', 'gray'] + +const TagGallery = ({ + bordered = false, + hideBg = false, +}: { + bordered?: boolean + hideBg?: boolean +}) => { + return ( +
+
Tag variants
+
+ {COLORS.map(color => ( +
+ + {color.charAt(0).toUpperCase() + color.slice(1)} + + {color} +
+ ))} +
+
+ ) +} + +const meta = { + title: 'Base/Data Display/Tag', + component: TagGallery, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Color-coded label component. Toggle borders or remove background to fit dark/light surfaces.', + }, + }, + }, + argTypes: { + bordered: { control: 'boolean' }, + hideBg: { control: 'boolean' }, + }, + args: { + bordered: false, + hideBg: false, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} + +export const Outlined: Story = { + args: { + bordered: true, + hideBg: true, + }, +} diff --git a/web/app/components/base/textarea/index.stories.tsx b/web/app/components/base/textarea/index.stories.tsx index ec27aac22b..41d8bda458 100644 --- a/web/app/components/base/textarea/index.stories.tsx +++ b/web/app/components/base/textarea/index.stories.tsx @@ -3,7 +3,7 @@ import { useState } from 'react' import Textarea from '.' const meta = { - title: 'Base/Input/Textarea', + title: 'Base/Data Entry/Textarea', component: Textarea, parameters: { layout: 'centered', diff --git a/web/app/components/base/toast/index.stories.tsx b/web/app/components/base/toast/index.stories.tsx new file mode 100644 index 0000000000..6ef65475cb --- /dev/null +++ b/web/app/components/base/toast/index.stories.tsx @@ -0,0 +1,104 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import { useCallback } from 'react' +import Toast, { ToastProvider, useToastContext } from '.' + +const ToastControls = () => { + const { notify } = useToastContext() + + const trigger = useCallback((type: 'success' | 'error' | 'warning' | 'info') => { + notify({ + type, + message: `This is a ${type} toast`, + children: type === 'info' ? 'Additional details can live here.' : undefined, + }) + }, [notify]) + + return ( +
+ + + + +
+ ) +} + +const ToastProviderDemo = () => { + return ( + +
+
Toast provider
+ +
+
+ ) +} + +const StaticToastDemo = () => { + return ( +
+
Static API
+ +
+ ) +} + +const meta = { + title: 'Base/Feedback/Toast', + component: ToastProviderDemo, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'ToastProvider based notifications and the static Toast.notify helper. Buttons showcase each toast variant.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Provider: Story = {} + +export const StaticApi: Story = { + render: () => , +} diff --git a/web/app/components/base/tooltip/index.stories.tsx b/web/app/components/base/tooltip/index.stories.tsx new file mode 100644 index 0000000000..aeca69464f --- /dev/null +++ b/web/app/components/base/tooltip/index.stories.tsx @@ -0,0 +1,60 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import Tooltip from '.' + +const TooltipGrid = () => { + return ( +
+
Hover tooltips
+
+ + + + + + Right tooltip + + +
+
Click tooltips
+
+ + + + + + Plain content + + +
+
+ ) +} + +const meta = { + title: 'Base/Feedback/Tooltip', + component: TooltipGrid, + parameters: { + layout: 'centered', + docs: { + description: { + component: 'Portal-based tooltip component supporting hover and click triggers, custom placements, and decorated content.', + }, + }, + }, + tags: ['autodocs'], +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Playground: Story = {} diff --git a/web/app/components/base/video-gallery/index.stories.tsx b/web/app/components/base/video-gallery/index.stories.tsx new file mode 100644 index 0000000000..7e17ee208c --- /dev/null +++ b/web/app/components/base/video-gallery/index.stories.tsx @@ -0,0 +1,40 @@ +import type { Meta, StoryObj } from '@storybook/nextjs' +import VideoGallery from '.' + +const VIDEO_SOURCES = [ + 'https://interactive-examples.mdn.mozilla.net/media/cc0-videos/flower.mp4', + 'https://interactive-examples.mdn.mozilla.net/media/cc0-videos/forest.mp4', +] + +const meta = { + title: 'Base/Data Display/VideoGallery', + component: VideoGallery, + parameters: { + layout: 'fullscreen', + docs: { + description: { + component: 'Stacked list of video players with custom controls for progress, volume, and fullscreen.', + }, + source: { + language: 'tsx', + code: ` + + `.trim(), + }, + }, + }, + tags: ['autodocs'], + args: { + srcs: VIDEO_SOURCES, + }, +} satisfies Meta + +export default meta +type Story = StoryObj + +export const Default: Story = {} diff --git a/web/app/components/base/voice-input/index.stories.tsx b/web/app/components/base/voice-input/index.stories.tsx index 0a7980e9ac..de6a675ab3 100644 --- a/web/app/components/base/voice-input/index.stories.tsx +++ b/web/app/components/base/voice-input/index.stories.tsx @@ -29,7 +29,7 @@ const VoiceInputMock = ({ onConverted, onCancel }: any) => {
{/* Waveform visualization placeholder */}
- {new Array(40).fill(0).map((_, i) => ( + {Array.from({ length: 40 }).map((_, i) => (
{ } const meta = { - title: 'Base/Input/VoiceInput', + title: 'Base/Data Entry/VoiceInput', component: VoiceInputMock, parameters: { layout: 'centered', diff --git a/web/app/components/base/voice-input/utils.ts b/web/app/components/base/voice-input/utils.ts index 70133f459f..a8ac9eba03 100644 --- a/web/app/components/base/voice-input/utils.ts +++ b/web/app/components/base/voice-input/utils.ts @@ -14,13 +14,19 @@ export const convertToMp3 = (recorder: any) => { const { channels, sampleRate } = wav const mp3enc = new lamejs.Mp3Encoder(channels, sampleRate, 128) const result = recorder.getChannelData() - const buffer = [] + const buffer: BlobPart[] = [] const leftData = result.left && new Int16Array(result.left.buffer, 0, result.left.byteLength / 2) const rightData = result.right && new Int16Array(result.right.buffer, 0, result.right.byteLength / 2) const remaining = leftData.length + (rightData ? rightData.length : 0) const maxSamples = 1152 + const toArrayBuffer = (bytes: Int8Array) => { + const arrayBuffer = new ArrayBuffer(bytes.length) + new Uint8Array(arrayBuffer).set(bytes) + return arrayBuffer + } + for (let i = 0; i < remaining; i += maxSamples) { const left = leftData.subarray(i, i + maxSamples) let right = null @@ -35,13 +41,13 @@ export const convertToMp3 = (recorder: any) => { } if (mp3buf.length > 0) - buffer.push(mp3buf) + buffer.push(toArrayBuffer(mp3buf)) } const enc = mp3enc.flush() if (enc.length > 0) - buffer.push(enc) + buffer.push(toArrayBuffer(enc)) return new Blob(buffer, { type: 'audio/mp3' }) } diff --git a/web/app/components/base/with-input-validation/index.stories.tsx b/web/app/components/base/with-input-validation/index.stories.tsx index 5a7e4bc678..26fa9747d8 100644 --- a/web/app/components/base/with-input-validation/index.stories.tsx +++ b/web/app/components/base/with-input-validation/index.stories.tsx @@ -63,7 +63,7 @@ const ValidatedUserCard = withValidation(UserCard, userSchema) const ValidatedProductCard = withValidation(ProductCard, productSchema) const meta = { - title: 'Base/Input/WithInputValidation', + title: 'Base/Data Entry/WithInputValidation', parameters: { layout: 'centered', docs: { diff --git a/web/app/components/base/zendesk/index.tsx b/web/app/components/base/zendesk/index.tsx index a6971fe1db..031a044c34 100644 --- a/web/app/components/base/zendesk/index.tsx +++ b/web/app/components/base/zendesk/index.tsx @@ -1,13 +1,13 @@ import { memo } from 'react' -import { type UnsafeUnwrappedHeaders, headers } from 'next/headers' +import { headers } from 'next/headers' import Script from 'next/script' import { IS_CE_EDITION, ZENDESK_WIDGET_KEY } from '@/config' -const Zendesk = () => { +const Zendesk = async () => { if (IS_CE_EDITION || !ZENDESK_WIDGET_KEY) return null - const nonce = process.env.NODE_ENV === 'production' ? (headers() as unknown as UnsafeUnwrappedHeaders).get('x-nonce') ?? '' : '' + const nonce = process.env.NODE_ENV === 'production' ? (await headers()).get('x-nonce') ?? '' : '' return ( <> diff --git a/web/app/components/billing/config.ts b/web/app/components/billing/config.ts index 1d5fbc7491..5ab836ad18 100644 --- a/web/app/components/billing/config.ts +++ b/web/app/components/billing/config.ts @@ -3,7 +3,7 @@ import { Plan, type PlanInfo, Priority } from '@/app/components/billing/type' const supportModelProviders = 'OpenAI/Anthropic/Llama2/Azure OpenAI/Hugging Face/Replicate' -export const NUM_INFINITE = 99999999 +export const NUM_INFINITE = -1 export const contractSales = 'contractSales' export const unAvailable = 'unAvailable' @@ -26,6 +26,7 @@ export const ALL_PLANS: Record = { apiRateLimit: 5000, documentProcessingPriority: Priority.standard, messageRequest: 200, + triggerEvents: 3000, annotatedResponse: 10, logHistory: 30, }, @@ -43,6 +44,7 @@ export const ALL_PLANS: Record = { apiRateLimit: NUM_INFINITE, documentProcessingPriority: Priority.priority, messageRequest: 5000, + triggerEvents: 20000, annotatedResponse: 2000, logHistory: NUM_INFINITE, }, @@ -60,6 +62,7 @@ export const ALL_PLANS: Record = { apiRateLimit: NUM_INFINITE, documentProcessingPriority: Priority.topPriority, messageRequest: 10000, + triggerEvents: NUM_INFINITE, annotatedResponse: 5000, logHistory: NUM_INFINITE, }, @@ -74,6 +77,8 @@ export const defaultPlan = { teamMembers: 1, annotatedResponse: 1, documentsUploadQuota: 0, + apiRateLimit: 0, + triggerEvents: 0, }, total: { documents: 50, @@ -82,5 +87,11 @@ export const defaultPlan = { teamMembers: 1, annotatedResponse: 10, documentsUploadQuota: 0, + apiRateLimit: ALL_PLANS.sandbox.apiRateLimit, + triggerEvents: ALL_PLANS.sandbox.triggerEvents, + }, + reset: { + apiRateLimit: null, + triggerEvents: null, }, } diff --git a/web/app/components/billing/partner-stack/index.tsx b/web/app/components/billing/partner-stack/index.tsx new file mode 100644 index 0000000000..84a09e260d --- /dev/null +++ b/web/app/components/billing/partner-stack/index.tsx @@ -0,0 +1,20 @@ +'use client' +import { IS_CLOUD_EDITION } from '@/config' +import type { FC } from 'react' +import React, { useEffect } from 'react' +import usePSInfo from './use-ps-info' + +const PartnerStack: FC = () => { + const { saveOrUpdate, bind } = usePSInfo() + useEffect(() => { + if (!IS_CLOUD_EDITION) + return + // Save PartnerStack info in cookie first. Because if user hasn't logged in, redirecting to login page would cause lose the partnerStack info in URL. + saveOrUpdate() + // bind PartnerStack info after user logged in + bind() + }, []) + + return null +} +export default React.memo(PartnerStack) diff --git a/web/app/components/billing/partner-stack/use-ps-info.ts b/web/app/components/billing/partner-stack/use-ps-info.ts new file mode 100644 index 0000000000..a308f7446e --- /dev/null +++ b/web/app/components/billing/partner-stack/use-ps-info.ts @@ -0,0 +1,70 @@ +import { PARTNER_STACK_CONFIG } from '@/config' +import { useBindPartnerStackInfo } from '@/service/use-billing' +import { useBoolean } from 'ahooks' +import Cookies from 'js-cookie' +import { useSearchParams } from 'next/navigation' +import { useCallback } from 'react' + +const usePSInfo = () => { + const searchParams = useSearchParams() + const psInfoInCookie = (() => { + try { + return JSON.parse(Cookies.get(PARTNER_STACK_CONFIG.cookieName) || '{}') + } + catch (e) { + console.error('Failed to parse partner stack info from cookie:', e) + return {} + } + })() + const psPartnerKey = searchParams.get('ps_partner_key') || psInfoInCookie?.partnerKey + const psClickId = searchParams.get('ps_xid') || psInfoInCookie?.clickId + const isPSChanged = psInfoInCookie?.partnerKey !== psPartnerKey || psInfoInCookie?.clickId !== psClickId + const [hasBind, { + setTrue: setBind, + }] = useBoolean(false) + const { mutateAsync } = useBindPartnerStackInfo() + // Save to top domain. cloud.dify.ai => .dify.ai + const domain = globalThis.location.hostname.replace('cloud', '') + + const saveOrUpdate = useCallback(() => { + if(!psPartnerKey || !psClickId) + return + if(!isPSChanged) + return + Cookies.set(PARTNER_STACK_CONFIG.cookieName, JSON.stringify({ + partnerKey: psPartnerKey, + clickId: psClickId, + }), { + expires: PARTNER_STACK_CONFIG.saveCookieDays, + path: '/', + domain, + }) + }, [psPartnerKey, psClickId, isPSChanged]) + + const bind = useCallback(async () => { + if (psPartnerKey && psClickId && !hasBind) { + let shouldRemoveCookie = false + try { + await mutateAsync({ + partnerKey: psPartnerKey, + clickId: psClickId, + }) + shouldRemoveCookie = true + } + catch (error: unknown) { + if((error as { status: number })?.status === 400) + shouldRemoveCookie = true + } + if (shouldRemoveCookie) + Cookies.remove(PARTNER_STACK_CONFIG.cookieName, { path: '/', domain }) + setBind() + } + }, [psPartnerKey, psClickId, mutateAsync, hasBind, setBind]) + return { + psPartnerKey, + psClickId, + saveOrUpdate, + bind, + } +} +export default usePSInfo diff --git a/web/app/components/billing/plan/index.tsx b/web/app/components/billing/plan/index.tsx index dd3908635b..b695302965 100644 --- a/web/app/components/billing/plan/index.tsx +++ b/web/app/components/billing/plan/index.tsx @@ -10,9 +10,12 @@ import { RiGroupLine, } from '@remixicon/react' import { Plan, SelfHostedPlan } from '../type' +import { NUM_INFINITE } from '../config' +import { getDaysUntilEndOfMonth } from '@/utils/time' import VectorSpaceInfo from '../usage-info/vector-space-info' import AppsInfo from '../usage-info/apps-info' import UpgradeBtn from '../upgrade-btn' +import { ApiAggregate, TriggerAll } from '@/app/components/base/icons/src/vender/workflow' import { useProviderContext } from '@/context/provider-context' import { useAppContext } from '@/context/app-context' import Button from '@/app/components/base/button' @@ -42,7 +45,20 @@ const PlanComp: FC = ({ const { usage, total, + reset, } = plan + const triggerEventsResetInDays = type === Plan.professional && total.triggerEvents !== NUM_INFINITE + ? reset.triggerEvents ?? undefined + : undefined + const apiRateLimitResetInDays = (() => { + if (total.apiRateLimit === NUM_INFINITE) + return undefined + if (typeof reset.apiRateLimit === 'number') + return reset.apiRateLimit + if (type === Plan.sandbox) + return getDaysUntilEndOfMonth() + return undefined + })() const [showModal, setShowModal] = React.useState(false) const { mutateAsync } = useEducationVerify() @@ -75,7 +91,6 @@ const PlanComp: FC = ({
{t(`billing.plans.${type}.name`)}
-
{t('billing.currentPlan')}
{t(`billing.plans.${type}.for`)}
@@ -119,6 +134,22 @@ const PlanComp: FC = ({ usage={usage.annotatedResponse} total={total.annotatedResponse} /> + +
= ({ const [planRange, setPlanRange] = React.useState(PlanRange.monthly) const [currentCategory, setCurrentCategory] = useState(CategoryEnum.CLOUD) const canPay = isCurrentWorkspaceManager - useKeyPress(['esc'], onCancel) const pricingPageLanguage = useGetPricingPageLanguage() diff --git a/web/app/components/billing/pricing/plans/cloud-plan-item/list/index.tsx b/web/app/components/billing/pricing/plans/cloud-plan-item/list/index.tsx index 0420bfc7a3..7674affc15 100644 --- a/web/app/components/billing/pricing/plans/cloud-plan-item/list/index.tsx +++ b/web/app/components/billing/pricing/plans/cloud-plan-item/list/index.tsx @@ -46,15 +46,36 @@ const List = ({ label={t('billing.plansCommon.documentsRequestQuota', { count: planInfo.documentsRequestQuota })} tooltip={t('billing.plansCommon.documentsRequestQuotaTooltip')} /> + + + + { + const currentPlanType: BasicPlan = plan.type === Plan.enterprise ? Plan.team : plan.type return (
@@ -28,21 +29,21 @@ const Plans = ({ currentPlan === 'cloud' && ( <> void + onUpgrade: () => void + usage: number + total: number + resetInDays?: number + planType: Plan +} + +const TriggerEventsLimitModal: FC = ({ + show, + onDismiss, + onUpgrade, + usage, + total, + resetInDays, +}) => { + const { t } = useTranslation() + + return ( + +
+
+
+
+ +
+
+
+ {t('billing.triggerLimitModal.title')} +
+
+ {t('billing.triggerLimitModal.description')} +
+
+ +
+
+ +
+ + +
+ + ) +} + +export default React.memo(TriggerEventsLimitModal) diff --git a/web/app/components/billing/type.ts b/web/app/components/billing/type.ts index 7cc4d19755..53b8b5b352 100644 --- a/web/app/components/billing/type.ts +++ b/web/app/components/billing/type.ts @@ -27,6 +27,7 @@ export type PlanInfo = { documentProcessingPriority: Priority logHistory: number messageRequest: number + triggerEvents: number annotatedResponse: number } @@ -52,7 +53,18 @@ export type SelfHostedPlanInfo = { annotatedResponse: number } -export type UsagePlanInfo = Pick & { vectorSpace: number } +export type UsagePlanInfo = Pick & { vectorSpace: number } + +export type UsageResetInfo = { + apiRateLimit?: number | null + triggerEvents?: number | null +} + +export type BillingQuota = { + usage: number + limit: number + reset_date?: number | null +} export enum DocumentProcessingPriority { standard = 'standard', @@ -87,6 +99,8 @@ export type CurrentPlanInfoBackend = { size: number limit: number // total. 0 means unlimited } + api_rate_limit?: BillingQuota + trigger_event?: BillingQuota docs_processing: DocumentProcessingPriority can_replace_logo: boolean model_load_balancing_enabled: boolean diff --git a/web/app/components/billing/upgrade-btn/index.tsx b/web/app/components/billing/upgrade-btn/index.tsx index f3ae95a10b..d576e07f3e 100644 --- a/web/app/components/billing/upgrade-btn/index.tsx +++ b/web/app/components/billing/upgrade-btn/index.tsx @@ -1,5 +1,5 @@ 'use client' -import type { FC } from 'react' +import type { CSSProperties, FC } from 'react' import React from 'react' import { useTranslation } from 'react-i18next' import PremiumBadge from '../../base/premium-badge' @@ -9,19 +9,24 @@ import { useModalContext } from '@/context/modal-context' type Props = { className?: string + style?: CSSProperties isFull?: boolean size?: 'md' | 'lg' isPlain?: boolean isShort?: boolean onClick?: () => void loc?: string + labelKey?: string } const UpgradeBtn: FC = ({ + className, + style, isPlain = false, isShort = false, onClick: _onClick, loc, + labelKey, }) => { const { t } = useTranslation() const { setShowPricingModal } = useModalContext() @@ -40,10 +45,17 @@ const UpgradeBtn: FC = ({ } } + const defaultBadgeLabel = t(`billing.upgradeBtn.${isShort ? 'encourageShort' : 'encourage'}`) + const label = labelKey ? t(labelKey) : defaultBadgeLabel + if (isPlain) { return ( - ) } @@ -54,11 +66,13 @@ const UpgradeBtn: FC = ({ color='blue' allowHover={true} onClick={onClick} + className={className} + style={style} >
- {t(`billing.upgradeBtn.${isShort ? 'encourageShort' : 'encourage'}`)} + {label}
diff --git a/web/app/components/billing/usage-info/index.tsx b/web/app/components/billing/usage-info/index.tsx index 30b4bca776..668d49d698 100644 --- a/web/app/components/billing/usage-info/index.tsx +++ b/web/app/components/billing/usage-info/index.tsx @@ -15,10 +15,13 @@ type Props = { usage: number total: number unit?: string + unitPosition?: 'inline' | 'suffix' + resetHint?: string + resetInDays?: number + hideIcon?: boolean } -const LOW = 50 -const MIDDLE = 80 +const WARNING_THRESHOLD = 80 const UsageInfo: FC = ({ className, @@ -27,23 +30,41 @@ const UsageInfo: FC = ({ tooltip, usage, total, - unit = '', + unit, + unitPosition = 'suffix', + resetHint, + resetInDays, + hideIcon = false, }) => { const { t } = useTranslation() const percent = usage / total * 100 - const color = (() => { - if (percent < LOW) - return 'bg-components-progress-bar-progress-solid' + const color = percent >= 100 + ? 'bg-components-progress-error-progress' + : (percent >= WARNING_THRESHOLD ? 'bg-components-progress-warning-progress' : 'bg-components-progress-bar-progress-solid') + const isUnlimited = total === NUM_INFINITE + let totalDisplay: string | number = isUnlimited ? t('billing.plansCommon.unlimited') : total + if (!isUnlimited && unit && unitPosition === 'inline') + totalDisplay = `${total}${unit}` + const showUnit = !!unit && !isUnlimited && unitPosition === 'suffix' + const resetText = resetHint ?? (typeof resetInDays === 'number' ? t('billing.usagePage.resetsIn', { count: resetInDays }) : undefined) + const rightInfo = resetText + ? ( +
+ {resetText} +
+ ) + : (showUnit && ( +
+ {unit} +
+ )) - if (percent < MIDDLE) - return 'bg-components-progress-warning-progress' - - return 'bg-components-progress-error-progress' - })() return (
- + {!hideIcon && Icon && ( + + )}
{name}
{tooltip && ( @@ -56,10 +77,13 @@ const UsageInfo: FC = ({ /> )}
-
- {usage} -
/
-
{total === NUM_INFINITE ? t('billing.plansCommon.unlimited') : `${total}${unit}`}
+
+
+ {usage} +
/
+
{totalDisplay}
+
+ {rightInfo}
= ({ usage={usage.vectorSpace} total={total.vectorSpace} unit='MB' + unitPosition='inline' /> ) } diff --git a/web/app/components/billing/utils/index.ts b/web/app/components/billing/utils/index.ts index 111f02e3cf..724b62e8cb 100644 --- a/web/app/components/billing/utils/index.ts +++ b/web/app/components/billing/utils/index.ts @@ -1,5 +1,6 @@ -import type { CurrentPlanInfoBackend } from '../type' -import { NUM_INFINITE } from '@/app/components/billing/config' +import dayjs from 'dayjs' +import type { BillingQuota, CurrentPlanInfoBackend } from '../type' +import { ALL_PLANS, NUM_INFINITE } from '@/app/components/billing/config' const parseLimit = (limit: number) => { if (limit === 0) @@ -8,15 +9,71 @@ const parseLimit = (limit: number) => { return limit } +const parseRateLimit = (limit: number) => { + if (limit === 0 || limit === -1) + return NUM_INFINITE + + return limit +} + +const normalizeResetDate = (resetDate?: number | null) => { + if (typeof resetDate !== 'number' || resetDate <= 0) + return null + + if (resetDate >= 1e12) + return dayjs(resetDate) + + if (resetDate >= 1e9) + return dayjs(resetDate * 1000) + + const digits = resetDate.toString() + if (digits.length === 8) { + const year = digits.slice(0, 4) + const month = digits.slice(4, 6) + const day = digits.slice(6, 8) + const parsed = dayjs(`${year}-${month}-${day}`) + return parsed.isValid() ? parsed : null + } + + return null +} + +const getResetInDaysFromDate = (resetDate?: number | null) => { + const resetDay = normalizeResetDate(resetDate) + if (!resetDay) + return null + + const diff = resetDay.startOf('day').diff(dayjs().startOf('day'), 'day') + if (Number.isNaN(diff) || diff < 0) + return null + + return diff +} + export const parseCurrentPlan = (data: CurrentPlanInfoBackend) => { + const planType = data.billing.subscription.plan + const planPreset = ALL_PLANS[planType] + const resolveRateLimit = (limit?: number, fallback?: number) => { + const value = limit ?? fallback ?? 0 + return parseRateLimit(value) + } + const getQuotaUsage = (quota?: BillingQuota) => quota?.usage ?? 0 + const getQuotaResetInDays = (quota?: BillingQuota) => { + if (!quota) + return null + return getResetInDaysFromDate(quota.reset_date) + } + return { - type: data.billing.subscription.plan, + type: planType, usage: { vectorSpace: data.vector_space.size, buildApps: data.apps?.size || 0, teamMembers: data.members.size, annotatedResponse: data.annotation_quota_limit.size, documentsUploadQuota: data.documents_upload_quota.size, + apiRateLimit: getQuotaUsage(data.api_rate_limit), + triggerEvents: getQuotaUsage(data.trigger_event), }, total: { vectorSpace: parseLimit(data.vector_space.limit), @@ -24,6 +81,12 @@ export const parseCurrentPlan = (data: CurrentPlanInfoBackend) => { teamMembers: parseLimit(data.members.limit), annotatedResponse: parseLimit(data.annotation_quota_limit.limit), documentsUploadQuota: parseLimit(data.documents_upload_quota.limit), + apiRateLimit: resolveRateLimit(data.api_rate_limit?.limit, planPreset?.apiRateLimit ?? NUM_INFINITE), + triggerEvents: resolveRateLimit(data.trigger_event?.limit, planPreset?.triggerEvents), + }, + reset: { + apiRateLimit: getQuotaResetInDays(data.api_rate_limit), + triggerEvents: getQuotaResetInDays(data.trigger_event), }, } } diff --git a/web/app/components/custom/custom-web-app-brand/index.tsx b/web/app/components/custom/custom-web-app-brand/index.tsx index eb06265042..fee0bf75f7 100644 --- a/web/app/components/custom/custom-web-app-brand/index.tsx +++ b/web/app/components/custom/custom-web-app-brand/index.tsx @@ -16,7 +16,7 @@ import Button from '@/app/components/base/button' import Divider from '@/app/components/base/divider' import { useProviderContext } from '@/context/provider-context' import { Plan } from '@/app/components/billing/type' -import { imageUpload } from '@/app/components/base/image-uploader/utils' +import { getImageUploadErrorMessage, imageUpload } from '@/app/components/base/image-uploader/utils' import { useToastContext } from '@/app/components/base/toast' import { BubbleTextMod } from '@/app/components/base/icons/src/vender/solid/communication' import { @@ -67,8 +67,9 @@ const CustomWebAppBrand = () => { setUploadProgress(100) setFileId(res.id) }, - onErrorCallback: () => { - notify({ type: 'error', message: t('common.imageUploader.uploadFromComputerUploadError') }) + onErrorCallback: (error?: any) => { + const errorMessage = getImageUploadErrorMessage(error, t('common.imageUploader.uploadFromComputerUploadError'), t) + notify({ type: 'error', message: errorMessage }) setUploadProgress(-1) }, }, false, '/workspaces/custom-config/webapp-logo/upload') diff --git a/web/app/components/datasets/create-from-pipeline/list/built-in-pipeline-list.tsx b/web/app/components/datasets/create-from-pipeline/list/built-in-pipeline-list.tsx index 6d22f2115a..74e565a494 100644 --- a/web/app/components/datasets/create-from-pipeline/list/built-in-pipeline-list.tsx +++ b/web/app/components/datasets/create-from-pipeline/list/built-in-pipeline-list.tsx @@ -4,6 +4,7 @@ import CreateCard from './create-card' import { useI18N } from '@/context/i18n' import { useMemo } from 'react' import { LanguagesSupported } from '@/i18n-config/language' +import { useGlobalPublicStore } from '@/context/global-public-context' const BuiltInPipelineList = () => { const { locale } = useI18N() @@ -12,7 +13,8 @@ const BuiltInPipelineList = () => { return locale return LanguagesSupported[0] }, [locale]) - const { data: pipelineList, isLoading } = usePipelineTemplateList({ type: 'built-in', language }) + const enableMarketplace = useGlobalPublicStore(s => s.systemFeatures.enable_marketplace) + const { data: pipelineList, isLoading } = usePipelineTemplateList({ type: 'built-in', language }, enableMarketplace) const list = pipelineList?.pipeline_templates || [] return ( diff --git a/web/app/components/datasets/create/embedding-process/index.tsx b/web/app/components/datasets/create/embedding-process/index.tsx index df64f26ab9..7b2eda1dcd 100644 --- a/web/app/components/datasets/create/embedding-process/index.tsx +++ b/web/app/components/datasets/create/embedding-process/index.tsx @@ -18,7 +18,13 @@ import DocumentFileIcon from '../../common/document-file-icon' import cn from '@/utils/classnames' import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata' import Button from '@/app/components/base/button' -import type { FullDocumentDetail, IndexingStatusResponse, ProcessRuleResponse } from '@/models/datasets' +import type { + DataSourceInfo, + FullDocumentDetail, + IndexingStatusResponse, + LegacyDataSourceInfo, + ProcessRuleResponse, +} from '@/models/datasets' import { fetchIndexingStatusBatch as doFetchIndexingStatus, fetchProcessRule } from '@/service/datasets' import { DataSourceType, ProcessMode } from '@/models/datasets' import NotionIcon from '@/app/components/base/notion-icon' @@ -241,10 +247,16 @@ const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], index return doc?.data_source_type as DataSourceType } + const isLegacyDataSourceInfo = (info: DataSourceInfo): info is LegacyDataSourceInfo => { + return info != null && typeof (info as LegacyDataSourceInfo).upload_file === 'object' + } + const getIcon = (id: string) => { const doc = documents.find(document => document.id === id) - - return doc?.data_source_info.notion_page_icon + const info = doc?.data_source_info + if (info && isLegacyDataSourceInfo(info)) + return info.notion_page_icon + return undefined } const isSourceEmbedding = (detail: IndexingStatusResponse) => ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '') diff --git a/web/app/components/datasets/create/file-uploader/index.tsx b/web/app/components/datasets/create/file-uploader/index.tsx index 43d69d1889..4aec0d4082 100644 --- a/web/app/components/datasets/create/file-uploader/index.tsx +++ b/web/app/components/datasets/create/file-uploader/index.tsx @@ -18,8 +18,7 @@ import { LanguagesSupported } from '@/i18n-config/language' import { IS_CE_EDITION } from '@/config' import { Theme } from '@/types/app' import useTheme from '@/hooks/use-theme' - -const FILES_NUMBER_LIMIT = 20 +import { getFileUploadErrorMessage } from '@/app/components/base/file-uploader/utils' type IFileUploaderProps = { fileList: FileItem[] @@ -72,6 +71,7 @@ const FileUploader = ({ const fileUploadConfig = useMemo(() => fileUploadConfigResponse ?? { file_size_limit: 15, batch_count_limit: 5, + file_upload_limit: 5, }, [fileUploadConfigResponse]) const fileListRef = useRef([]) @@ -121,10 +121,10 @@ const FileUploader = ({ data: formData, onprogress: onProgress, }, false, undefined, '?source=datasets') - .then((res: File) => { + .then((res) => { const completeFile = { fileID: fileItem.fileID, - file: res, + file: res as unknown as File, progress: -1, } const index = fileListRef.current.findIndex(item => item.fileID === fileItem.fileID) @@ -133,7 +133,8 @@ const FileUploader = ({ return Promise.resolve({ ...completeFile }) }) .catch((e) => { - notify({ type: 'error', message: e?.response?.code === 'forbidden' ? e?.response?.message : t('datasetCreation.stepOne.uploader.failed') }) + const errorMessage = getFileUploadErrorMessage(e, t('datasetCreation.stepOne.uploader.failed'), t) + notify({ type: 'error', message: errorMessage }) onFileUpdate(fileItem, -2, fileListRef.current) return Promise.resolve({ ...fileItem }) }) @@ -163,11 +164,12 @@ const FileUploader = ({ }, [fileUploadConfig, uploadBatchFiles]) const initialUpload = useCallback((files: File[]) => { + const filesCountLimit = fileUploadConfig.file_upload_limit if (!files.length) return false - if (files.length + fileList.length > FILES_NUMBER_LIMIT && !IS_CE_EDITION) { - notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.filesNumber', { filesNumber: FILES_NUMBER_LIMIT }) }) + if (files.length + fileList.length > filesCountLimit && !IS_CE_EDITION) { + notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.filesNumber', { filesNumber: filesCountLimit }) }) return false } @@ -180,7 +182,7 @@ const FileUploader = ({ prepareFileList(newFiles) fileListRef.current = newFiles uploadMultipleFiles(preparedFiles) - }, [prepareFileList, uploadMultipleFiles, notify, t, fileList]) + }, [prepareFileList, uploadMultipleFiles, notify, t, fileList, fileUploadConfig]) const handleDragEnter = (e: DragEvent) => { e.preventDefault() @@ -255,10 +257,11 @@ const FileUploader = ({ ) let files = nested.flat() if (notSupportBatchUpload) files = files.slice(0, 1) + files = files.slice(0, fileUploadConfig.batch_count_limit) const valid = files.filter(isValid) initialUpload(valid) }, - [initialUpload, isValid, notSupportBatchUpload, traverseFileEntry], + [initialUpload, isValid, notSupportBatchUpload, traverseFileEntry, fileUploadConfig], ) const selectHandle = () => { if (fileUploader.current) @@ -273,9 +276,10 @@ const FileUploader = ({ onFileListUpdate?.([...fileListRef.current]) } const fileChangeHandle = useCallback((e: React.ChangeEvent) => { - const files = [...(e.target.files ?? [])] as File[] + let files = [...(e.target.files ?? [])] as File[] + files = files.slice(0, fileUploadConfig.batch_count_limit) initialUpload(files.filter(isValid)) - }, [isValid, initialUpload]) + }, [isValid, initialUpload, fileUploadConfig]) const { theme } = useTheme() const chartColor = useMemo(() => theme === Theme.dark ? '#5289ff' : '#296dff', [theme]) @@ -325,6 +329,7 @@ const FileUploader = ({ size: fileUploadConfig.file_size_limit, supportTypes: supportTypesShowNames, batchCount: notSupportBatchUpload ? 1 : fileUploadConfig.batch_count_limit, + totalCount: fileUploadConfig.file_upload_limit, })}
{dragging &&
}
diff --git a/web/app/components/datasets/create/index.tsx b/web/app/components/datasets/create/index.tsx index 11def1a8bc..b04bd85530 100644 --- a/web/app/components/datasets/create/index.tsx +++ b/web/app/components/datasets/create/index.tsx @@ -16,6 +16,7 @@ import { useGetDefaultDataSourceListAuth } from '@/service/use-datasource' import { produce } from 'immer' import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' import Loading from '@/app/components/base/loading' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' type DatasetUpdateFormProps = { datasetId?: string @@ -117,7 +118,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { {step === 1 && ( setShowAccountSettingModal({ payload: 'data-source' })} + onSetting={() => setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.DATA_SOURCE })} datasetId={datasetId} dataSourceType={dataSourceType} dataSourceTypeDisable={!!datasetDetail?.data_source_type} @@ -141,7 +142,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { {(step === 2 && (!datasetId || (datasetId && !!datasetDetail))) && ( setShowAccountSettingModal({ payload: 'provider' })} + onSetting={() => setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.PROVIDER })} indexingType={datasetDetail?.indexing_technique} datasetId={datasetId} dataSourceType={dataSourceType} diff --git a/web/app/components/datasets/create/website/firecrawl/index.tsx b/web/app/components/datasets/create/website/firecrawl/index.tsx index 8d207a0386..51c2c7d505 100644 --- a/web/app/components/datasets/create/website/firecrawl/index.tsx +++ b/web/app/components/datasets/create/website/firecrawl/index.tsx @@ -14,6 +14,7 @@ import Toast from '@/app/components/base/toast' import { checkFirecrawlTaskStatus, createFirecrawlTask } from '@/service/datasets' import { sleep } from '@/utils' import Header from '../base/header' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' const ERROR_I18N_PREFIX = 'common.errorMsg' const I18N_PREFIX = 'datasetCreation.stepOne.website' @@ -51,7 +52,7 @@ const FireCrawl: FC = ({ const setShowAccountSettingModal = useModalContextSelector(s => s.setShowAccountSettingModal) const handleSetting = useCallback(() => { setShowAccountSettingModal({ - payload: 'data-source', + payload: ACCOUNT_SETTING_TAB.DATA_SOURCE, }) }, [setShowAccountSettingModal]) diff --git a/web/app/components/datasets/create/website/index.tsx b/web/app/components/datasets/create/website/index.tsx index 7190ca3228..ee7ace6815 100644 --- a/web/app/components/datasets/create/website/index.tsx +++ b/web/app/components/datasets/create/website/index.tsx @@ -13,6 +13,7 @@ import type { CrawlOptions, CrawlResultItem } from '@/models/datasets' import { DataSourceProvider } from '@/models/common' import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config' import type { DataSourceAuth } from '@/app/components/header/account-setting/data-source-page-new/types' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' type Props = { onPreview: (payload: CrawlResultItem) => void @@ -48,7 +49,7 @@ const Website: FC = ({ const handleOnConfig = useCallback(() => { setShowAccountSettingModal({ - payload: 'data-source', + payload: ACCOUNT_SETTING_TAB.DATA_SOURCE, }) }, [setShowAccountSettingModal]) diff --git a/web/app/components/datasets/create/website/jina-reader/index.tsx b/web/app/components/datasets/create/website/jina-reader/index.tsx index 460c169fb4..b6e6177af2 100644 --- a/web/app/components/datasets/create/website/jina-reader/index.tsx +++ b/web/app/components/datasets/create/website/jina-reader/index.tsx @@ -14,6 +14,7 @@ import { checkJinaReaderTaskStatus, createJinaReaderTask } from '@/service/datas import { sleep } from '@/utils' import type { CrawlOptions, CrawlResultItem } from '@/models/datasets' import Header from '../base/header' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' const ERROR_I18N_PREFIX = 'common.errorMsg' const I18N_PREFIX = 'datasetCreation.stepOne.website' @@ -51,7 +52,7 @@ const JinaReader: FC = ({ const { setShowAccountSettingModal } = useModalContext() const handleSetting = useCallback(() => { setShowAccountSettingModal({ - payload: 'data-source', + payload: ACCOUNT_SETTING_TAB.DATA_SOURCE, }) }, [setShowAccountSettingModal]) diff --git a/web/app/components/datasets/create/website/watercrawl/index.tsx b/web/app/components/datasets/create/website/watercrawl/index.tsx index 640b1c2063..67a3e53feb 100644 --- a/web/app/components/datasets/create/website/watercrawl/index.tsx +++ b/web/app/components/datasets/create/website/watercrawl/index.tsx @@ -14,6 +14,7 @@ import Toast from '@/app/components/base/toast' import { checkWatercrawlTaskStatus, createWatercrawlTask } from '@/service/datasets' import { sleep } from '@/utils' import Header from '../base/header' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' const ERROR_I18N_PREFIX = 'common.errorMsg' const I18N_PREFIX = 'datasetCreation.stepOne.website' @@ -51,7 +52,7 @@ const WaterCrawl: FC = ({ const { setShowAccountSettingModal } = useModalContext() const handleSetting = useCallback(() => { setShowAccountSettingModal({ - payload: 'data-source', + payload: ACCOUNT_SETTING_TAB.DATA_SOURCE, }) }, [setShowAccountSettingModal]) diff --git a/web/app/components/datasets/documents/create-from-pipeline/data-source/local-file/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/data-source/local-file/index.tsx index 47da96c2de..868621e1a3 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/data-source/local-file/index.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/data-source/local-file/index.tsx @@ -8,6 +8,7 @@ import cn from '@/utils/classnames' import type { CustomFile as File, FileItem } from '@/models/datasets' import { ToastContext } from '@/app/components/base/toast' import { upload } from '@/service/base' +import { getFileUploadErrorMessage } from '@/app/components/base/file-uploader/utils' import I18n from '@/context/i18n' import { LanguagesSupported } from '@/i18n-config/language' import { IS_CE_EDITION } from '@/config' @@ -121,6 +122,8 @@ const LocalFile = ({ return isValidType && isValidSize }, [fileUploadConfig, notify, t, ACCEPTS]) + type UploadResult = Awaited> + const fileUpload = useCallback(async (fileItem: FileItem): Promise => { const formData = new FormData() formData.append('file', fileItem.file) @@ -136,10 +139,14 @@ const LocalFile = ({ data: formData, onprogress: onProgress, }, false, undefined, '?source=datasets') - .then((res: File) => { - const completeFile = { + .then((res: UploadResult) => { + const updatedFile = Object.assign({}, fileItem.file, { + id: res.id, + ...(res as Partial), + }) as File + const completeFile: FileItem = { fileID: fileItem.fileID, - file: res, + file: updatedFile, progress: -1, } const index = fileListRef.current.findIndex(item => item.fileID === fileItem.fileID) @@ -148,7 +155,8 @@ const LocalFile = ({ return Promise.resolve({ ...completeFile }) }) .catch((e) => { - notify({ type: 'error', message: e?.response?.code === 'forbidden' ? e?.response?.message : t('datasetCreation.stepOne.uploader.failed') }) + const errorMessage = getFileUploadErrorMessage(e, t('datasetCreation.stepOne.uploader.failed'), t) + notify({ type: 'error', message: errorMessage }) updateFile(fileItem, -2, fileListRef.current) return Promise.resolve({ ...fileItem }) }) diff --git a/web/app/components/datasets/documents/create-from-pipeline/data-source/online-documents/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/data-source/online-documents/index.tsx index f5cbac909d..97d6721e00 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/data-source/online-documents/index.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/data-source/online-documents/index.tsx @@ -16,6 +16,7 @@ import Title from './title' import { useGetDataSourceAuth } from '@/service/use-datasource' import Loading from '@/app/components/base/loading' import { useDocLink } from '@/context/i18n' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' type OnlineDocumentsProps = { isInPipeline?: boolean @@ -120,7 +121,7 @@ const OnlineDocuments = ({ const handleSetting = useCallback(() => { setShowAccountSettingModal({ - payload: 'data-source', + payload: ACCOUNT_SETTING_TAB.DATA_SOURCE, }) }, [setShowAccountSettingModal]) diff --git a/web/app/components/datasets/documents/create-from-pipeline/data-source/online-drive/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/data-source/online-drive/index.tsx index ed2820675c..da8fd5dcc0 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/data-source/online-drive/index.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/data-source/online-drive/index.tsx @@ -15,6 +15,7 @@ import { useShallow } from 'zustand/react/shallow' import { useModalContextSelector } from '@/context/modal-context' import { useGetDataSourceAuth } from '@/service/use-datasource' import { useDocLink } from '@/context/i18n' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' type OnlineDriveProps = { nodeId: string @@ -180,7 +181,7 @@ const OnlineDrive = ({ const handleSetting = useCallback(() => { setShowAccountSettingModal({ - payload: 'data-source', + payload: ACCOUNT_SETTING_TAB.DATA_SOURCE, }) }, [setShowAccountSettingModal]) diff --git a/web/app/components/datasets/documents/create-from-pipeline/data-source/website-crawl/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/data-source/website-crawl/index.tsx index c46cbdf0f1..648f6a5d93 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/data-source/website-crawl/index.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/data-source/website-crawl/index.tsx @@ -26,6 +26,7 @@ import { useShallow } from 'zustand/react/shallow' import { useModalContextSelector } from '@/context/modal-context' import { useGetDataSourceAuth } from '@/service/use-datasource' import { useDocLink } from '@/context/i18n' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' const I18N_PREFIX = 'datasetCreation.stepOne.website' @@ -139,7 +140,7 @@ const WebsiteCrawl = ({ const handleSetting = useCallback(() => { setShowAccountSettingModal({ - payload: 'data-source', + payload: ACCOUNT_SETTING_TAB.DATA_SOURCE, }) }, [setShowAccountSettingModal]) diff --git a/web/app/components/datasets/documents/detail/batch-modal/csv-uploader.tsx b/web/app/components/datasets/documents/detail/batch-modal/csv-uploader.tsx index 7e8749f0bf..317db84c43 100644 --- a/web/app/components/datasets/documents/detail/batch-modal/csv-uploader.tsx +++ b/web/app/components/datasets/documents/detail/batch-modal/csv-uploader.tsx @@ -12,6 +12,7 @@ import { ToastContext } from '@/app/components/base/toast' import Button from '@/app/components/base/button' import type { FileItem } from '@/models/datasets' import { upload } from '@/service/base' +import { getFileUploadErrorMessage } from '@/app/components/base/file-uploader/utils' import useSWR from 'swr' import { fetchFileUploadConfig } from '@/service/common' import SimplePieChart from '@/app/components/base/simple-pie-chart' @@ -38,6 +39,8 @@ const CSVUploader: FC = ({ file_size_limit: 15, }, [fileUploadConfigResponse]) + type UploadResult = Awaited> + const fileUpload = useCallback(async (fileItem: FileItem): Promise => { fileItem.progress = 0 @@ -58,17 +61,22 @@ const CSVUploader: FC = ({ data: formData, onprogress: onProgress, }, false, undefined, '?source=datasets') - .then((res: File) => { - const completeFile = { + .then((res: UploadResult) => { + const updatedFile = Object.assign({}, fileItem.file, { + id: res.id, + ...(res as Partial), + }) as File + const completeFile: FileItem = { fileID: fileItem.fileID, - file: res, + file: updatedFile, progress: 100, } updateFile(completeFile) return Promise.resolve({ ...completeFile }) }) .catch((e) => { - notify({ type: 'error', message: e?.response?.code === 'forbidden' ? e?.response?.message : t('datasetCreation.stepOne.uploader.failed') }) + const errorMessage = getFileUploadErrorMessage(e, t('datasetCreation.stepOne.uploader.failed'), t) + notify({ type: 'error', message: errorMessage }) const errorFile = { ...fileItem, progress: -2, diff --git a/web/app/components/datasets/documents/detail/completed/common/batch-action.tsx b/web/app/components/datasets/documents/detail/completed/common/batch-action.tsx index 900ab3fb5a..c152ec5400 100644 --- a/web/app/components/datasets/documents/detail/completed/common/batch-action.tsx +++ b/web/app/components/datasets/documents/detail/completed/common/batch-action.tsx @@ -44,8 +44,8 @@ const BatchAction: FC = ({ hideDeleteConfirm() } return ( -
-
+
+
{selectedIds.length} diff --git a/web/app/components/datasets/documents/detail/completed/common/regeneration-modal.tsx b/web/app/components/datasets/documents/detail/completed/common/regeneration-modal.tsx index 95bb339db9..f90fd7ac60 100644 --- a/web/app/components/datasets/documents/detail/completed/common/regeneration-modal.tsx +++ b/web/app/components/datasets/documents/detail/completed/common/regeneration-modal.tsx @@ -121,7 +121,7 @@ const RegenerationModal: FC = ({ }) return ( - + {!loading && !updateSucceeded && } {loading && !updateSucceeded && } {!loading && updateSucceeded && } diff --git a/web/app/components/datasets/documents/detail/completed/index.tsx b/web/app/components/datasets/documents/detail/completed/index.tsx index 8fa167f976..09c63d54a1 100644 --- a/web/app/components/datasets/documents/detail/completed/index.tsx +++ b/web/app/components/datasets/documents/detail/completed/index.tsx @@ -124,6 +124,7 @@ const Completed: FC = ({ const [limit, setLimit] = useState(DEFAULT_LIMIT) const [fullScreen, setFullScreen] = useState(false) const [showNewChildSegmentModal, setShowNewChildSegmentModal] = useState(false) + const [isRegenerationModalOpen, setIsRegenerationModalOpen] = useState(false) const segmentListRef = useRef(null) const childSegmentListRef = useRef(null) @@ -669,6 +670,7 @@ const Completed: FC = ({ onClose={onCloseSegmentDetail} showOverlay={false} needCheckChunks + modal={isRegenerationModalOpen} > = ({ isEditMode={currSegment.isEditMode} onUpdate={handleUpdateSegment} onCancel={onCloseSegmentDetail} + onModalStateChange={setIsRegenerationModalOpen} /> {/* Create New Segment */} diff --git a/web/app/components/datasets/documents/detail/completed/segment-detail.tsx b/web/app/components/datasets/documents/detail/completed/segment-detail.tsx index bbd9df1adc..5e5ae6b485 100644 --- a/web/app/components/datasets/documents/detail/completed/segment-detail.tsx +++ b/web/app/components/datasets/documents/detail/completed/segment-detail.tsx @@ -27,6 +27,7 @@ type ISegmentDetailProps = { onCancel: () => void isEditMode?: boolean docForm: ChunkingMode + onModalStateChange?: (isOpen: boolean) => void } /** @@ -38,6 +39,7 @@ const SegmentDetail: FC = ({ onCancel, isEditMode, docForm, + onModalStateChange, }) => { const { t } = useTranslation() const [question, setQuestion] = useState(isEditMode ? segInfo?.content || '' : segInfo?.sign_content || '') @@ -68,11 +70,19 @@ const SegmentDetail: FC = ({ const handleRegeneration = useCallback(() => { setShowRegenerationModal(true) - }, []) + onModalStateChange?.(true) + }, [onModalStateChange]) const onCancelRegeneration = useCallback(() => { setShowRegenerationModal(false) - }, []) + onModalStateChange?.(false) + }, [onModalStateChange]) + + const onCloseAfterRegeneration = useCallback(() => { + setShowRegenerationModal(false) + onModalStateChange?.(false) + onCancel() // Close the edit drawer + }, [onCancel, onModalStateChange]) const onConfirmRegeneration = useCallback(() => { onUpdate(segInfo?.id || '', question, answer, keywords, true) @@ -161,7 +171,7 @@ const SegmentDetail: FC = ({ isShow={showRegenerationModal} onConfirm={onConfirmRegeneration} onCancel={onCancelRegeneration} - onClose={onCancelRegeneration} + onClose={onCloseAfterRegeneration} /> ) } diff --git a/web/app/components/datasets/documents/detail/index.tsx b/web/app/components/datasets/documents/detail/index.tsx index b4f47253fb..ddec9b6dbe 100644 --- a/web/app/components/datasets/documents/detail/index.tsx +++ b/web/app/components/datasets/documents/detail/index.tsx @@ -17,7 +17,7 @@ import Divider from '@/app/components/base/divider' import Loading from '@/app/components/base/loading' import Toast from '@/app/components/base/toast' import { ChunkingMode } from '@/models/datasets' -import type { FileItem } from '@/models/datasets' +import type { DataSourceInfo, FileItem, LegacyDataSourceInfo } from '@/models/datasets' import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' import FloatRightContainer from '@/app/components/base/float-right-container' import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' @@ -109,6 +109,18 @@ const DocumentDetail: FC = ({ datasetId, documentId }) => { const embedding = ['queuing', 'indexing', 'paused'].includes((documentDetail?.display_status || '').toLowerCase()) + const isLegacyDataSourceInfo = (info?: DataSourceInfo): info is LegacyDataSourceInfo => { + return !!info && 'upload_file' in info + } + + const documentUploadFile = useMemo(() => { + if (!documentDetail?.data_source_info) + return undefined + if (isLegacyDataSourceInfo(documentDetail.data_source_info)) + return documentDetail.data_source_info.upload_file + return undefined + }, [documentDetail?.data_source_info]) + const invalidChunkList = useInvalid(useSegmentListKey) const invalidChildChunkList = useInvalid(useChildSegmentListKey) const invalidDocumentList = useInvalidDocumentList(datasetId) @@ -153,7 +165,7 @@ const DocumentDetail: FC = ({ datasetId, documentId }) => {
void } +type MetadataState = { + documentType?: DocType | '' + metadata: Record +} + const Metadata: FC = ({ docDetail, loading, onUpdate }) => { const { doc_metadata = {} } = docDetail || {} - const doc_type = docDetail?.doc_type || '' + const rawDocType = docDetail?.doc_type ?? '' + const doc_type = rawDocType === 'others' ? '' : rawDocType const { t } = useTranslation() const metadataMap = useMetadataMap() @@ -143,18 +149,16 @@ const Metadata: FC = ({ docDetail, loading, onUpdate }) => { const businessDocCategoryMap = useBusinessDocCategories() const [editStatus, setEditStatus] = useState(!doc_type) // if no documentType, in editing status by default // the initial values are according to the documentType - const [metadataParams, setMetadataParams] = useState<{ - documentType?: DocType | '' - metadata: { [key: string]: string } - }>( + const [metadataParams, setMetadataParams] = useState( doc_type ? { - documentType: doc_type, - metadata: doc_metadata || {}, + documentType: doc_type as DocType, + metadata: (doc_metadata || {}) as Record, } - : { metadata: {} }) + : { metadata: {} }, + ) const [showDocTypes, setShowDocTypes] = useState(!doc_type) // whether show doc types - const [tempDocType, setTempDocType] = useState('') // for remember icon click + const [tempDocType, setTempDocType] = useState('') // for remember icon click const [saveLoading, setSaveLoading] = useState(false) const { notify } = useContext(ToastContext) @@ -165,13 +169,13 @@ const Metadata: FC = ({ docDetail, loading, onUpdate }) => { if (docDetail?.doc_type) { setEditStatus(false) setShowDocTypes(false) - setTempDocType(docDetail?.doc_type) + setTempDocType(doc_type as DocType | '') setMetadataParams({ - documentType: docDetail?.doc_type, - metadata: docDetail?.doc_metadata || {}, + documentType: doc_type as DocType | '', + metadata: (docDetail?.doc_metadata || {}) as Record, }) } - }, [docDetail?.doc_type]) + }, [docDetail?.doc_type, docDetail?.doc_metadata, doc_type]) // confirm doc type const confirmDocType = () => { @@ -179,7 +183,7 @@ const Metadata: FC = ({ docDetail, loading, onUpdate }) => { return setMetadataParams({ documentType: tempDocType, - metadata: tempDocType === metadataParams.documentType ? metadataParams.metadata : {}, // change doc type, clear metadata + metadata: tempDocType === metadataParams.documentType ? metadataParams.metadata : {} as Record, // change doc type, clear metadata }) setEditStatus(true) setShowDocTypes(false) @@ -187,7 +191,7 @@ const Metadata: FC = ({ docDetail, loading, onUpdate }) => { // cancel doc type const cancelDocType = () => { - setTempDocType(metadataParams.documentType) + setTempDocType(metadataParams.documentType ?? '') setEditStatus(true) setShowDocTypes(false) } @@ -209,7 +213,7 @@ const Metadata: FC = ({ docDetail, loading, onUpdate }) => { {t('datasetDocuments.metadata.docTypeChangeTitle')} {t('datasetDocuments.metadata.docTypeSelectWarning')} } - + {CUSTOMIZABLE_DOC_TYPES.map((type, index) => { const currValue = tempDocType ?? documentType return diff --git a/web/app/components/datasets/documents/detail/settings/document-settings.tsx b/web/app/components/datasets/documents/detail/settings/document-settings.tsx index 048645c9cf..3bcb8ef3aa 100644 --- a/web/app/components/datasets/documents/detail/settings/document-settings.tsx +++ b/web/app/components/datasets/documents/detail/settings/document-settings.tsx @@ -4,7 +4,17 @@ import { useBoolean } from 'ahooks' import { useContext } from 'use-context-selector' import { useRouter } from 'next/navigation' import DatasetDetailContext from '@/context/dataset-detail' -import type { CrawlOptions, CustomFile, DataSourceType } from '@/models/datasets' +import type { + CrawlOptions, + CustomFile, + DataSourceInfo, + DataSourceType, + LegacyDataSourceInfo, + LocalFileInfo, + OnlineDocumentInfo, + WebsiteCrawlInfo, +} from '@/models/datasets' +import type { DataSourceProvider } from '@/models/common' import Loading from '@/app/components/base/loading' import StepTwo from '@/app/components/datasets/create/step-two' import AccountSetting from '@/app/components/header/account-setting' @@ -42,15 +52,78 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => { params: { metadata: 'without' }, }) + const dataSourceInfo = documentDetail?.data_source_info + + const isLegacyDataSourceInfo = (info: DataSourceInfo | undefined): info is LegacyDataSourceInfo => { + return !!info && 'upload_file' in info + } + const isWebsiteCrawlInfo = (info: DataSourceInfo | undefined): info is WebsiteCrawlInfo => { + return !!info && 'source_url' in info && 'title' in info + } + const isOnlineDocumentInfo = (info: DataSourceInfo | undefined): info is OnlineDocumentInfo => { + return !!info && 'page' in info + } + const isLocalFileInfo = (info: DataSourceInfo | undefined): info is LocalFileInfo => { + return !!info && 'related_id' in info && 'transfer_method' in info + } + const legacyInfo = isLegacyDataSourceInfo(dataSourceInfo) ? dataSourceInfo : undefined + const websiteInfo = isWebsiteCrawlInfo(dataSourceInfo) ? dataSourceInfo : undefined + const onlineDocumentInfo = isOnlineDocumentInfo(dataSourceInfo) ? dataSourceInfo : undefined + const localFileInfo = isLocalFileInfo(dataSourceInfo) ? dataSourceInfo : undefined + const currentPage = useMemo(() => { - return { - workspace_id: documentDetail?.data_source_info.notion_workspace_id, - page_id: documentDetail?.data_source_info.notion_page_id, - page_name: documentDetail?.name, - page_icon: documentDetail?.data_source_info.notion_page_icon, - type: documentDetail?.data_source_type, + if (legacyInfo) { + return { + workspace_id: legacyInfo.notion_workspace_id ?? '', + page_id: legacyInfo.notion_page_id ?? '', + page_name: documentDetail?.name, + page_icon: legacyInfo.notion_page_icon, + type: documentDetail?.data_source_type, + } } - }, [documentDetail]) + if (onlineDocumentInfo) { + return { + workspace_id: onlineDocumentInfo.workspace_id, + page_id: onlineDocumentInfo.page.page_id, + page_name: onlineDocumentInfo.page.page_name, + page_icon: onlineDocumentInfo.page.page_icon, + type: onlineDocumentInfo.page.type, + } + } + return undefined + }, [documentDetail?.data_source_type, documentDetail?.name, legacyInfo, onlineDocumentInfo]) + + const files = useMemo(() => { + if (legacyInfo?.upload_file) + return [legacyInfo.upload_file as CustomFile] + if (localFileInfo) { + const { related_id, name, extension } = localFileInfo + return [{ + id: related_id, + name, + extension, + } as unknown as CustomFile] + } + return [] + }, [legacyInfo?.upload_file, localFileInfo]) + + const websitePages = useMemo(() => { + if (!websiteInfo) + return [] + return [{ + title: websiteInfo.title, + source_url: websiteInfo.source_url, + content: websiteInfo.content, + description: websiteInfo.description, + }] + }, [websiteInfo]) + + const crawlOptions = (dataSourceInfo && typeof dataSourceInfo === 'object' && 'includes' in dataSourceInfo && 'excludes' in dataSourceInfo) + ? dataSourceInfo as unknown as CrawlOptions + : undefined + + const websiteCrawlProvider = (websiteInfo?.provider ?? legacyInfo?.provider) as DataSourceProvider | undefined + const websiteCrawlJobId = websiteInfo?.job_id ?? legacyInfo?.job_id if (error) return @@ -65,22 +138,16 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => { onSetting={showSetAPIKey} datasetId={datasetId} dataSourceType={documentDetail.data_source_type as DataSourceType} - notionPages={[currentPage as unknown as NotionPage]} - websitePages={[ - { - title: documentDetail.name, - source_url: documentDetail.data_source_info?.url, - content: '', - description: '', - }, - ]} - websiteCrawlProvider={documentDetail.data_source_info?.provider} - websiteCrawlJobId={documentDetail.data_source_info?.job_id} - crawlOptions={documentDetail.data_source_info as unknown as CrawlOptions} + notionPages={currentPage ? [currentPage as unknown as NotionPage] : []} + notionCredentialId={legacyInfo?.credential_id || onlineDocumentInfo?.credential_id || ''} + websitePages={websitePages} + websiteCrawlProvider={websiteCrawlProvider} + websiteCrawlJobId={websiteCrawlJobId || ''} + crawlOptions={crawlOptions} indexingType={indexingTechnique} isSetting documentDetail={documentDetail} - files={[documentDetail.data_source_info.upload_file as CustomFile]} + files={files} onSave={saveHandler} onCancel={cancelHandler} /> diff --git a/web/app/components/datasets/documents/hooks/use-document-list-query-state.ts b/web/app/components/datasets/documents/hooks/use-document-list-query-state.ts index 4531b7e658..f2a251d99d 100644 --- a/web/app/components/datasets/documents/hooks/use-document-list-query-state.ts +++ b/web/app/components/datasets/documents/hooks/use-document-list-query-state.ts @@ -1,16 +1,31 @@ import { type ReadonlyURLSearchParams, usePathname, useRouter, useSearchParams } from 'next/navigation' import { useCallback, useMemo } from 'react' +import { sanitizeStatusValue } from '../status-filter' +import type { SortType } from '@/service/datasets' + +const ALLOWED_SORT_VALUES: SortType[] = ['-created_at', 'created_at', '-hit_count', 'hit_count'] + +const sanitizeSortValue = (value?: string | null): SortType => { + if (!value) + return '-created_at' + + return (ALLOWED_SORT_VALUES.includes(value as SortType) ? value : '-created_at') as SortType +} export type DocumentListQuery = { page: number limit: number keyword: string + status: string + sort: SortType } const DEFAULT_QUERY: DocumentListQuery = { page: 1, limit: 10, keyword: '', + status: 'all', + sort: '-created_at', } // Parse the query parameters from the URL search string. @@ -18,17 +33,21 @@ function parseParams(params: ReadonlyURLSearchParams): DocumentListQuery { const page = Number.parseInt(params.get('page') || '1', 10) const limit = Number.parseInt(params.get('limit') || '10', 10) const keyword = params.get('keyword') || '' + const status = sanitizeStatusValue(params.get('status')) + const sort = sanitizeSortValue(params.get('sort')) return { page: page > 0 ? page : 1, limit: (limit > 0 && limit <= 100) ? limit : 10, keyword: keyword ? decodeURIComponent(keyword) : '', + status, + sort, } } // Update the URL search string with the given query parameters. function updateSearchParams(query: DocumentListQuery, searchParams: URLSearchParams) { - const { page, limit, keyword } = query || {} + const { page, limit, keyword, status, sort } = query || {} const hasNonDefaultParams = (page && page > 1) || (limit && limit !== 10) || (keyword && keyword.trim()) @@ -45,6 +64,18 @@ function updateSearchParams(query: DocumentListQuery, searchParams: URLSearchPar searchParams.set('keyword', encodeURIComponent(keyword)) else searchParams.delete('keyword') + + const sanitizedStatus = sanitizeStatusValue(status) + if (sanitizedStatus && sanitizedStatus !== 'all') + searchParams.set('status', sanitizedStatus) + else + searchParams.delete('status') + + const sanitizedSort = sanitizeSortValue(sort) + if (sanitizedSort !== '-created_at') + searchParams.set('sort', sanitizedSort) + else + searchParams.delete('sort') } function useDocumentListQueryState() { @@ -57,6 +88,8 @@ function useDocumentListQueryState() { // Helper function to update specific query parameters const updateQuery = useCallback((updates: Partial) => { const newQuery = { ...query, ...updates } + newQuery.status = sanitizeStatusValue(newQuery.status) + newQuery.sort = sanitizeSortValue(newQuery.sort) const params = new URLSearchParams() updateSearchParams(newQuery, params) const search = params.toString() diff --git a/web/app/components/datasets/documents/index.tsx b/web/app/components/datasets/documents/index.tsx index 613257efee..e09ab44701 100644 --- a/web/app/components/datasets/documents/index.tsx +++ b/web/app/components/datasets/documents/index.tsx @@ -25,10 +25,12 @@ import useEditDocumentMetadata from '../metadata/hooks/use-edit-dataset-metadata import DatasetMetadataDrawer from '../metadata/metadata-dataset/dataset-metadata-drawer' import StatusWithAction from '../common/document-status-with-action/status-with-action' import { useDocLink } from '@/context/i18n' -import { SimpleSelect } from '../../base/select' -import StatusItem from './detail/completed/status-item' +import Chip from '../../base/chip' +import Sort from '../../base/sort' +import type { SortType } from '@/service/datasets' import type { Item } from '@/app/components/base/select' import { useIndexStatus } from './status-item/hooks' +import { normalizeStatusForQuery, sanitizeStatusValue } from './status-filter' const FolderPlusIcon = ({ className }: React.SVGProps) => { return @@ -84,13 +86,12 @@ const Documents: FC = ({ datasetId }) => { const docLink = useDocLink() const { plan } = useProviderContext() const isFreePlan = plan.type === 'sandbox' + const { query, updateQuery } = useDocumentListQueryState() const [inputValue, setInputValue] = useState('') // the input value const [searchValue, setSearchValue] = useState('') - const [statusFilter, setStatusFilter] = useState({ value: 'all', name: 'All Status' }) + const [statusFilterValue, setStatusFilterValue] = useState(() => sanitizeStatusValue(query.status)) + const [sortValue, setSortValue] = useState(query.sort) const DOC_INDEX_STATUS_MAP = useIndexStatus() - - // Use the new hook for URL state management - const { query, updateQuery } = useDocumentListQueryState() const [currPage, setCurrPage] = React.useState(query.page - 1) // Convert to 0-based index const [limit, setLimit] = useState(query.limit) @@ -104,7 +105,7 @@ const Documents: FC = ({ datasetId }) => { const debouncedSearchValue = useDebounce(searchValue, { wait: 500 }) const statusFilterItems: Item[] = useMemo(() => [ - { value: 'all', name: 'All Status' }, + { value: 'all', name: t('datasetDocuments.list.index.all') as string }, { value: 'queuing', name: DOC_INDEX_STATUS_MAP.queuing.text }, { value: 'indexing', name: DOC_INDEX_STATUS_MAP.indexing.text }, { value: 'paused', name: DOC_INDEX_STATUS_MAP.paused.text }, @@ -114,6 +115,11 @@ const Documents: FC = ({ datasetId }) => { { value: 'disabled', name: DOC_INDEX_STATUS_MAP.disabled.text }, { value: 'archived', name: DOC_INDEX_STATUS_MAP.archived.text }, ], [DOC_INDEX_STATUS_MAP, t]) + const normalizedStatusFilterValue = useMemo(() => normalizeStatusForQuery(statusFilterValue), [statusFilterValue]) + const sortItems: Item[] = useMemo(() => [ + { value: 'created_at', name: t('datasetDocuments.list.sort.uploadTime') as string }, + { value: 'hit_count', name: t('datasetDocuments.list.sort.hitCount') as string }, + ], [t]) // Initialize search value from URL on mount useEffect(() => { @@ -131,12 +137,17 @@ const Documents: FC = ({ datasetId }) => { setInputValue(query.keyword) setSearchValue(query.keyword) } + setStatusFilterValue((prev) => { + const nextValue = sanitizeStatusValue(query.status) + return prev === nextValue ? prev : nextValue + }) + setSortValue(query.sort) }, [query]) // Update URL when pagination changes const handlePageChange = (newPage: number) => { setCurrPage(newPage) - updateQuery({ page: newPage + 1 }) // Convert to 1-based index + updateQuery({ page: newPage + 1 }) // Pagination emits 0-based page, convert to 1-based for URL } // Update URL when limit changes @@ -160,6 +171,8 @@ const Documents: FC = ({ datasetId }) => { page: currPage + 1, limit, keyword: debouncedSearchValue, + status: normalizedStatusFilterValue, + sort: sortValue, }, refetchInterval: timerCanRun ? 2500 : 0, }) @@ -211,8 +224,14 @@ const Documents: FC = ({ datasetId }) => { percent, } }) - setTimerCanRun(completedNum !== documentsRes?.data?.length) - }, [documentsRes]) + + const hasIncompleteDocuments = completedNum !== documentsRes?.data?.length + const transientStatuses = ['queuing', 'indexing', 'paused'] + const shouldForcePolling = normalizedStatusFilterValue === 'all' + ? false + : transientStatuses.includes(normalizedStatusFilterValue) + setTimerCanRun(shouldForcePolling || hasIncompleteDocuments) + }, [documentsRes, normalizedStatusFilterValue]) const total = documentsRes?.total || 0 const routeToDocCreate = () => { @@ -233,6 +252,10 @@ const Documents: FC = ({ datasetId }) => { setSelectedIds([]) }, [searchValue, query.keyword]) + useEffect(() => { + setSelectedIds([]) + }, [normalizedStatusFilterValue]) + const { run: handleSearch } = useDebounceFn(() => { setSearchValue(inputValue) }, { wait: 500 }) @@ -260,7 +283,7 @@ const Documents: FC = ({ datasetId }) => { }) return ( -
+

{t('datasetDocuments.list.title')}

@@ -275,20 +298,27 @@ const Documents: FC = ({ datasetId }) => {
-
+
- { - setStatusFilter(item) - }} + } - optionClassName='p-0' - notClearable + onSelect={(item) => { + const selectedValue = sanitizeStatusValue(item?.value ? String(item.value) : '') + setStatusFilterValue(selectedValue) + setCurrPage(0) + updateQuery({ status: selectedValue, page: 1 }) + }} + onClear={() => { + if (statusFilterValue === 'all') + return + setStatusFilterValue('all') + setCurrPage(0) + updateQuery({ status: 'all', page: 1 }) + }} /> = ({ datasetId }) => { onChange={e => handleInputChange(e.target.value)} onClear={() => handleInputChange('')} /> +
+ { + const next = String(value) as SortType + if (next === sortValue) + return + setSortValue(next) + setCurrPage(0) + updateQuery({ sort: next, page: 1 }) + }} + />
{!isFreePlan && } @@ -343,7 +387,8 @@ const Documents: FC = ({ datasetId }) => { onUpdate={handleUpdate} selectedIds={selectedIds} onSelectedIdChange={setSelectedIds} - statusFilter={statusFilter} + statusFilterValue={normalizedStatusFilterValue} + remoteSortValue={sortValue} pagination={{ total, limit, diff --git a/web/app/components/datasets/documents/list.tsx b/web/app/components/datasets/documents/list.tsx index 9659925b3a..6f95d3cecb 100644 --- a/web/app/components/datasets/documents/list.tsx +++ b/web/app/components/datasets/documents/list.tsx @@ -1,6 +1,6 @@ 'use client' import type { FC } from 'react' -import React, { useCallback, useMemo, useState } from 'react' +import React, { useCallback, useEffect, useMemo, useState } from 'react' import { useBoolean } from 'ahooks' import { ArrowDownIcon } from '@heroicons/react/24/outline' import { pick, uniq } from 'lodash-es' @@ -18,7 +18,6 @@ import BatchAction from './detail/completed/common/batch-action' import cn from '@/utils/classnames' import Tooltip from '@/app/components/base/tooltip' import Toast from '@/app/components/base/toast' -import type { Item } from '@/app/components/base/select' import { asyncRunSafe } from '@/utils' import { formatNumber } from '@/utils/format' import NotionIcon from '@/app/components/base/notion-icon' @@ -37,6 +36,7 @@ import EditMetadataBatchModal from '@/app/components/datasets/metadata/edit-meta import StatusItem from './status-item' import Operations from './operations' import { DatasourceType } from '@/models/pipeline' +import { normalizeStatusForQuery } from '@/app/components/datasets/documents/status-filter' export const renderTdValue = (value: string | number | null, isEmptyStyle = false) => { return ( @@ -66,7 +66,8 @@ type IDocumentListProps = { pagination: PaginationProps onUpdate: () => void onManageMetadata: () => void - statusFilter: Item + statusFilterValue: string + remoteSortValue: string } /** @@ -81,7 +82,8 @@ const DocumentList: FC = ({ pagination, onUpdate, onManageMetadata, - statusFilter, + statusFilterValue, + remoteSortValue, }) => { const { t } = useTranslation() const { formatTime } = useTimestamp() @@ -90,9 +92,14 @@ const DocumentList: FC = ({ const chunkingMode = datasetConfig?.doc_form const isGeneralMode = chunkingMode !== ChunkingMode.parentChild const isQAMode = chunkingMode === ChunkingMode.qa - const [sortField, setSortField] = useState<'name' | 'word_count' | 'hit_count' | 'created_at' | null>('created_at') + const [sortField, setSortField] = useState<'name' | 'word_count' | 'hit_count' | 'created_at' | null>(null) const [sortOrder, setSortOrder] = useState<'asc' | 'desc'>('desc') + useEffect(() => { + setSortField(null) + setSortOrder('desc') + }, [remoteSortValue]) + const { isShowEditModal, showEditModal, @@ -109,11 +116,10 @@ const DocumentList: FC = ({ const localDocs = useMemo(() => { let filteredDocs = documents - if (statusFilter.value !== 'all') { + if (statusFilterValue && statusFilterValue !== 'all') { filteredDocs = filteredDocs.filter(doc => typeof doc.display_status === 'string' - && typeof statusFilter.value === 'string' - && doc.display_status.toLowerCase() === statusFilter.value.toLowerCase(), + && normalizeStatusForQuery(doc.display_status) === statusFilterValue, ) } @@ -156,7 +162,7 @@ const DocumentList: FC = ({ }) return sortedDocs - }, [documents, sortField, sortOrder, statusFilter]) + }, [documents, sortField, sortOrder, statusFilterValue]) const handleSort = (field: 'name' | 'word_count' | 'hit_count' | 'created_at') => { if (sortField === field) { @@ -279,9 +285,9 @@ const DocumentList: FC = ({ }, []) return ( -
-
-
{!log.read_at && (
diff --git a/web/app/components/app/overview/apikey-info-panel/index.tsx b/web/app/components/app/overview/apikey-info-panel/index.tsx index 7654d49e99..b50b0077cb 100644 --- a/web/app/components/app/overview/apikey-info-panel/index.tsx +++ b/web/app/components/app/overview/apikey-info-panel/index.tsx @@ -9,6 +9,7 @@ import { LinkExternal02 } from '@/app/components/base/icons/src/vender/line/gene import { IS_CE_EDITION } from '@/config' import { useProviderContext } from '@/context/provider-context' import { useModalContext } from '@/context/modal-context' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' const APIKeyInfoPanel: FC = () => { const isCloud = !IS_CE_EDITION @@ -47,7 +48,7 @@ const APIKeyInfoPanel: FC = () => {
- +
+
+
+
@@ -449,7 +455,7 @@ const DocumentList: FC = ({ {pagination.total && ( )} diff --git a/web/app/components/datasets/documents/status-filter.ts b/web/app/components/datasets/documents/status-filter.ts new file mode 100644 index 0000000000..d345774351 --- /dev/null +++ b/web/app/components/datasets/documents/status-filter.ts @@ -0,0 +1,33 @@ +import { DisplayStatusList } from '@/models/datasets' + +const KNOWN_STATUS_VALUES = new Set([ + 'all', + ...DisplayStatusList.map(item => item.toLowerCase()), +]) + +const URL_STATUS_ALIASES: Record = { + active: 'available', +} + +const QUERY_STATUS_ALIASES: Record = { + enabled: 'available', +} + +export const sanitizeStatusValue = (value?: string | null) => { + if (!value) + return 'all' + + const normalized = value.toLowerCase() + if (URL_STATUS_ALIASES[normalized]) + return URL_STATUS_ALIASES[normalized] + + return KNOWN_STATUS_VALUES.has(normalized) ? normalized : 'all' +} + +export const normalizeStatusForQuery = (value?: string | null) => { + const sanitized = sanitizeStatusValue(value) + if (sanitized === 'all') + return 'all' + + return QUERY_STATUS_ALIASES[sanitized] || sanitized +} diff --git a/web/app/components/datasets/list/dataset-card/index.tsx b/web/app/components/datasets/list/dataset-card/index.tsx index b1304e578e..ef6650a75d 100644 --- a/web/app/components/datasets/list/dataset-card/index.tsx +++ b/web/app/components/datasets/list/dataset-card/index.tsx @@ -85,6 +85,9 @@ const DatasetCard = ({ }, [t, dataset.document_count, dataset.total_available_documents]) const { formatTimeFromNow } = useFormatTimeFromNow() + const editTimeText = useMemo(() => { + return `${t('datasetDocuments.segment.editedAt')} ${formatTimeFromNow(dataset.updated_at * 1000)}` + }, [t, dataset.updated_at, formatTimeFromNow]) const openRenameModal = useCallback(() => { setShowRenameModal(true) @@ -193,6 +196,11 @@ const DatasetCard = ({ > {dataset.name} +
+
{dataset.author_name}
+
·
+
{editTimeText}
+
{isExternalProvider && {t('dataset.externalKnowledgeBase')}} {!isExternalProvider && isShowDocModeInfo && ( diff --git a/web/app/components/datasets/metadata/edit-metadata-batch/modal.tsx b/web/app/components/datasets/metadata/edit-metadata-batch/modal.tsx index 6681e4b67b..69eb969ebf 100644 --- a/web/app/components/datasets/metadata/edit-metadata-batch/modal.tsx +++ b/web/app/components/datasets/metadata/edit-metadata-batch/modal.tsx @@ -119,7 +119,7 @@ const EditMetadataBatchModal: FC = ({ className='!max-w-[640px]' >
{t(`${i18nPrefix}.editDocumentsNum`, { num: documentNum })}
-
+
{templeList.map(item => ( { return } if (retrievalConfig.weights) { - retrievalConfig.weights.vector_setting.embedding_provider_name = currentDataset?.embedding_model_provider || '' - retrievalConfig.weights.vector_setting.embedding_model_name = currentDataset?.embedding_model || '' + retrievalConfig.weights.vector_setting.embedding_provider_name = embeddingModel.provider || '' + retrievalConfig.weights.vector_setting.embedding_model_name = embeddingModel.model || '' } try { setLoading(true) diff --git a/web/app/components/develop/template/template_chat.en.mdx b/web/app/components/develop/template/template_chat.en.mdx index df31177127..1e4e767d6d 100644 --- a/web/app/components/develop/template/template_chat.en.mdx +++ b/web/app/components/develop/template/template_chat.en.mdx @@ -74,7 +74,8 @@ Chat applications support session persistence, allowing previous chat history to If set to `false`, can achieve async title generation by calling the conversation rename API and setting `auto_generate` to `true`. - (Optional) Workflow ID to specify a specific version, if not provided, uses the default published version. + (Optional) Workflow ID to specify a specific version, if not provided, uses the default published version.
+ How to obtain: In the version history interface, click the copy icon on the right side of each version entry to copy the complete workflow ID.
(Optional) Trace ID. Used for integration with existing business trace components to achieve end-to-end distributed tracing. If not provided, the system will automatically generate a trace_id. Supports the following three ways to pass, in order of priority:
diff --git a/web/app/components/develop/template/template_chat.ja.mdx b/web/app/components/develop/template/template_chat.ja.mdx index eafa653cad..6ba80d8890 100644 --- a/web/app/components/develop/template/template_chat.ja.mdx +++ b/web/app/components/develop/template/template_chat.ja.mdx @@ -74,7 +74,8 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from `false`に設定すると、会話のリネームAPIを呼び出し、`auto_generate`を`true`に設定することで非同期タイトル生成を実現できます。
- (オプション)ワークフローID、特定のバージョンを指定するために使用、提供されない場合はデフォルトの公開バージョンを使用。 + (オプション)ワークフローID、特定のバージョンを指定するために使用、提供されない場合はデフォルトの公開バージョンを使用。
+ 取得方法:バージョン履歴インターフェースで、各バージョンエントリの右側にあるコピーアイコンをクリックすると、完全なワークフローIDをコピーできます。
(オプション)トレースID。既存の業務システムのトレースコンポーネントと連携し、エンドツーエンドの分散トレーシングを実現するために使用します。指定がない場合、システムが自動的に trace_id を生成します。以下の3つの方法で渡すことができ、優先順位は次のとおりです:
diff --git a/web/app/components/develop/template/template_chat.zh.mdx b/web/app/components/develop/template/template_chat.zh.mdx index fc3fd6d0d2..bf69be2c28 100644 --- a/web/app/components/develop/template/template_chat.zh.mdx +++ b/web/app/components/develop/template/template_chat.zh.mdx @@ -72,7 +72,8 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx' (选填)自动生成标题,默认 `true`。 若设置为 `false`,则可通过调用会话重命名接口并设置 `auto_generate` 为 `true` 实现异步生成标题。
- (选填)工作流ID,用于指定特定版本,如果不提供则使用默认的已发布版本。 + (选填)工作流ID,用于指定特定版本,如果不提供则使用默认的已发布版本。
+ 获取方式:在版本历史界面,点击每个版本条目右侧的复制图标即可复制完整的工作流 ID。
(选填)链路追踪ID。适用于与业务系统已有的trace组件打通,实现端到端分布式追踪等场景。如果未指定,系统会自动生成trace_id。支持以下三种方式传递,具体优先级依次为:
diff --git a/web/app/components/develop/template/template_workflow.ja.mdx b/web/app/components/develop/template/template_workflow.ja.mdx index ff809b8a9c..688aaae9be 100644 --- a/web/app/components/develop/template/template_workflow.ja.mdx +++ b/web/app/components/develop/template/template_workflow.ja.mdx @@ -344,7 +344,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### パス - `workflow_id` (string) 必須 特定バージョンのワークフローを指定するためのワークフローID - 取得方法:バージョン履歴で特定バージョンのワークフローIDを照会できます。 + 取得方法:バージョン履歴インターフェースで、各バージョンエントリの右側にあるコピーアイコンをクリックすると、完全なワークフローIDをコピーできます。 ### リクエストボディ - `inputs` (object) 必須 diff --git a/web/app/components/develop/template/template_workflow.zh.mdx b/web/app/components/develop/template/template_workflow.zh.mdx index 0e2b19df83..32ad342c71 100644 --- a/web/app/components/develop/template/template_workflow.zh.mdx +++ b/web/app/components/develop/template/template_workflow.zh.mdx @@ -334,7 +334,7 @@ Workflow 应用无会话支持,适合用于翻译/文章写作/总结 AI 等 ### Path - `workflow_id` (string) Required 工作流ID,用于指定特定版本的工作流 - 获取方式:可以在版本历史中查询特定版本的工作流ID。 + 获取方式:在版本历史界面,点击每个版本条目右侧的复制图标即可复制完整的工作流 ID。 ### Request Body - `inputs` (object) Required diff --git a/web/app/components/goto-anything/actions/commands/feedback.tsx b/web/app/components/goto-anything/actions/commands/forum.tsx similarity index 54% rename from web/app/components/goto-anything/actions/commands/feedback.tsx rename to web/app/components/goto-anything/actions/commands/forum.tsx index cce0aeb5f4..66237cb348 100644 --- a/web/app/components/goto-anything/actions/commands/feedback.tsx +++ b/web/app/components/goto-anything/actions/commands/forum.tsx @@ -4,27 +4,27 @@ import { RiFeedbackLine } from '@remixicon/react' import i18n from '@/i18n-config/i18next-config' import { registerCommands, unregisterCommands } from './command-bus' -// Feedback command dependency types -type FeedbackDeps = Record +// Forum command dependency types +type ForumDeps = Record /** - * Feedback command - Opens GitHub feedback discussions + * Forum command - Opens Dify community forum */ -export const feedbackCommand: SlashCommandHandler = { - name: 'feedback', - description: 'Open feedback discussions', +export const forumCommand: SlashCommandHandler = { + name: 'forum', + description: 'Open Dify community forum', mode: 'direct', // Direct execution function execute: () => { - const url = 'https://github.com/langgenius/dify/discussions/categories/feedbacks' + const url = 'https://forum.dify.ai' window.open(url, '_blank', 'noopener,noreferrer') }, async search(args: string, locale: string = 'en') { return [{ - id: 'feedback', - title: i18n.t('common.userProfile.communityFeedback', { lng: locale }), + id: 'forum', + title: i18n.t('common.userProfile.forum', { lng: locale }), description: i18n.t('app.gotoAnything.actions.feedbackDesc', { lng: locale }) || 'Open community feedback discussions', type: 'command' as const, icon: ( @@ -32,20 +32,20 @@ export const feedbackCommand: SlashCommandHandler = {
), - data: { command: 'navigation.feedback', args: { url: 'https://github.com/langgenius/dify/discussions/categories/feedbacks' } }, + data: { command: 'navigation.forum', args: { url: 'https://forum.dify.ai' } }, }] }, - register(_deps: FeedbackDeps) { + register(_deps: ForumDeps) { registerCommands({ - 'navigation.feedback': async (args) => { - const url = args?.url || 'https://github.com/langgenius/dify/discussions/categories/feedbacks' + 'navigation.forum': async (args) => { + const url = args?.url || 'https://forum.dify.ai' window.open(url, '_blank', 'noopener,noreferrer') }, }) }, unregister() { - unregisterCommands(['navigation.feedback']) + unregisterCommands(['navigation.forum']) }, } diff --git a/web/app/components/goto-anything/actions/commands/slash.tsx b/web/app/components/goto-anything/actions/commands/slash.tsx index e0d03d5019..b99215255f 100644 --- a/web/app/components/goto-anything/actions/commands/slash.tsx +++ b/web/app/components/goto-anything/actions/commands/slash.tsx @@ -7,7 +7,7 @@ import { useTheme } from 'next-themes' import { setLocaleOnClient } from '@/i18n-config' import { themeCommand } from './theme' import { languageCommand } from './language' -import { feedbackCommand } from './feedback' +import { forumCommand } from './forum' import { docsCommand } from './docs' import { communityCommand } from './community' import { accountCommand } from './account' @@ -34,7 +34,7 @@ export const registerSlashCommands = (deps: Record) => { // Register command handlers to the registry system with their respective dependencies slashCommandRegistry.register(themeCommand, { setTheme: deps.setTheme }) slashCommandRegistry.register(languageCommand, { setLocale: deps.setLocale }) - slashCommandRegistry.register(feedbackCommand, {}) + slashCommandRegistry.register(forumCommand, {}) slashCommandRegistry.register(docsCommand, {}) slashCommandRegistry.register(communityCommand, {}) slashCommandRegistry.register(accountCommand, {}) @@ -44,7 +44,7 @@ export const unregisterSlashCommands = () => { // Remove command handlers from registry system (automatically calls each command's unregister method) slashCommandRegistry.unregister('theme') slashCommandRegistry.unregister('language') - slashCommandRegistry.unregister('feedback') + slashCommandRegistry.unregister('forum') slashCommandRegistry.unregister('docs') slashCommandRegistry.unregister('community') slashCommandRegistry.unregister('account') diff --git a/web/app/components/header/account-dropdown/compliance.tsx b/web/app/components/header/account-dropdown/compliance.tsx index b5849682e9..8dc4aeec32 100644 --- a/web/app/components/header/account-dropdown/compliance.tsx +++ b/web/app/components/header/account-dropdown/compliance.tsx @@ -16,6 +16,7 @@ import cn from '@/utils/classnames' import { useProviderContext } from '@/context/provider-context' import { Plan } from '@/app/components/billing/type' import { useModalContext } from '@/context/modal-context' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' import { getDocDownloadUrl } from '@/service/common' enum DocName { @@ -38,7 +39,7 @@ const UpgradeOrDownload: FC = ({ doc_name }) => { if (isFreePlan) setShowPricingModal() else - setShowAccountSettingModal({ payload: 'billing' }) + setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.BILLING }) }, [isFreePlan, setShowAccountSettingModal, setShowPricingModal]) const { isPending, mutate: downloadCompliance } = useMutation({ diff --git a/web/app/components/header/account-dropdown/index.tsx b/web/app/components/header/account-dropdown/index.tsx index 30b2bfdf6f..d00cddc693 100644 --- a/web/app/components/header/account-dropdown/index.tsx +++ b/web/app/components/header/account-dropdown/index.tsx @@ -33,6 +33,7 @@ import cn from '@/utils/classnames' import { useGlobalPublicStore } from '@/context/global-public-context' import { useDocLink } from '@/context/i18n' import { useLogout } from '@/service/use-common' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' export default function AppSelector() { const itemClassName = ` @@ -122,7 +123,7 @@ export default function AppSelector() {
setShowAccountSettingModal({ payload: 'members' })}> + )} onClick={() => setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.MEMBERS })}>
{t('common.userProfile.settings')}
diff --git a/web/app/components/header/account-dropdown/support.tsx b/web/app/components/header/account-dropdown/support.tsx index b165c5fcca..f354cc4ab0 100644 --- a/web/app/components/header/account-dropdown/support.tsx +++ b/web/app/components/header/account-dropdown/support.tsx @@ -1,5 +1,5 @@ import { Menu, MenuButton, MenuItem, MenuItems, Transition } from '@headlessui/react' -import { RiArrowRightSLine, RiArrowRightUpLine, RiChatSmile2Line, RiDiscordLine, RiFeedbackLine, RiMailSendLine, RiQuestionLine } from '@remixicon/react' +import { RiArrowRightSLine, RiArrowRightUpLine, RiChatSmile2Line, RiDiscordLine, RiDiscussLine, RiMailSendLine, RiQuestionLine } from '@remixicon/react' import { Fragment } from 'react' import Link from 'next/link' import { useTranslation } from 'react-i18next' @@ -86,10 +86,10 @@ export default function Support({ closeAccountDropdown }: SupportProps) { className={cn(itemClassName, 'group justify-between', 'data-[active]:bg-state-base-hover', )} - href='https://github.com/langgenius/dify/discussions/categories/feedbacks' + href='https://forum.dify.ai/' target='_blank' rel='noopener noreferrer'> - -
{t('common.userProfile.communityFeedback')}
+ +
{t('common.userProfile.forum')}
diff --git a/web/app/components/header/account-setting/api-based-extension-page/selector.tsx b/web/app/components/header/account-setting/api-based-extension-page/selector.tsx index ce218540ee..549b5e7910 100644 --- a/web/app/components/header/account-setting/api-based-extension-page/selector.tsx +++ b/web/app/components/header/account-setting/api-based-extension-page/selector.tsx @@ -16,6 +16,7 @@ import { } from '@/app/components/base/icons/src/vender/line/arrows' import { useModalContext } from '@/context/modal-context' import { fetchApiBasedExtensionList } from '@/service/common' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' type ApiBasedExtensionSelectorProps = { value: string @@ -83,7 +84,7 @@ const ApiBasedExtensionSelector: FC = ({ className='flex cursor-pointer items-center text-xs text-text-accent' onClick={() => { setOpen(false) - setShowAccountSettingModal({ payload: 'api-based-extension' }) + setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.API_BASED_EXTENSION }) }} > {t('common.apiBasedExtension.selector.manage')} diff --git a/web/app/components/header/account-setting/constants.ts b/web/app/components/header/account-setting/constants.ts new file mode 100644 index 0000000000..2bf2f2eff5 --- /dev/null +++ b/web/app/components/header/account-setting/constants.ts @@ -0,0 +1,21 @@ +export const ACCOUNT_SETTING_MODAL_ACTION = 'showSettings' + +export const ACCOUNT_SETTING_TAB = { + PROVIDER: 'provider', + MEMBERS: 'members', + BILLING: 'billing', + DATA_SOURCE: 'data-source', + API_BASED_EXTENSION: 'api-based-extension', + CUSTOM: 'custom', + LANGUAGE: 'language', +} as const + +export type AccountSettingTab = typeof ACCOUNT_SETTING_TAB[keyof typeof ACCOUNT_SETTING_TAB] + +export const DEFAULT_ACCOUNT_SETTING_TAB = ACCOUNT_SETTING_TAB.MEMBERS + +export const isValidAccountSettingTab = (tab: string | null): tab is AccountSettingTab => { + if (!tab) + return false + return Object.values(ACCOUNT_SETTING_TAB).includes(tab as AccountSettingTab) +} diff --git a/web/app/components/header/account-setting/data-source-page-new/card.tsx b/web/app/components/header/account-setting/data-source-page-new/card.tsx index 7a8790e76d..1e2e60bb7a 100644 --- a/web/app/components/header/account-setting/data-source-page-new/card.tsx +++ b/web/app/components/header/account-setting/data-source-page-new/card.tsx @@ -20,6 +20,7 @@ import { useDataSourceAuthUpdate } from './hooks' import Confirm from '@/app/components/base/confirm' import { useGetDataSourceOAuthUrl } from '@/service/use-datasource' import { openOAuthPopup } from '@/hooks/use-oauth' +import { CollectionType } from '@/app/components/tools/types' type CardProps = { item: DataSourceAuth @@ -42,6 +43,7 @@ const Card = ({ const pluginPayload = { category: AuthCategory.datasource, provider: `${item.plugin_id}/${item.name}`, + providerType: CollectionType.datasource, } const { handleAuthUpdate } = useDataSourceAuthUpdate({ pluginId: item.plugin_id, diff --git a/web/app/components/header/account-setting/index.tsx b/web/app/components/header/account-setting/index.tsx index 8e71597e9c..49f6f62a08 100644 --- a/web/app/components/header/account-setting/index.tsx +++ b/web/app/components/header/account-setting/index.tsx @@ -31,6 +31,10 @@ import { useProviderContext } from '@/context/provider-context' import { useAppContext } from '@/context/app-context' import MenuDialog from '@/app/components/header/account-setting/menu-dialog' import Input from '@/app/components/base/input' +import { + ACCOUNT_SETTING_TAB, + type AccountSettingTab, +} from '@/app/components/header/account-setting/constants' const iconClassName = ` w-5 h-5 mr-2 @@ -38,11 +42,12 @@ const iconClassName = ` type IAccountSettingProps = { onCancel: () => void - activeTab?: string + activeTab?: AccountSettingTab + onTabChange?: (tab: AccountSettingTab) => void } type GroupItem = { - key: string + key: AccountSettingTab name: string description?: string icon: React.JSX.Element @@ -51,56 +56,71 @@ type GroupItem = { export default function AccountSetting({ onCancel, - activeTab = 'members', + activeTab = ACCOUNT_SETTING_TAB.MEMBERS, + onTabChange, }: IAccountSettingProps) { - const [activeMenu, setActiveMenu] = useState(activeTab) + const [activeMenu, setActiveMenu] = useState(activeTab) + useEffect(() => { + setActiveMenu(activeTab) + }, [activeTab]) const { t } = useTranslation() const { enableBilling, enableReplaceWebAppLogo } = useProviderContext() const { isCurrentWorkspaceDatasetOperator } = useAppContext() - const workplaceGroupItems = (() => { + const workplaceGroupItems: GroupItem[] = (() => { if (isCurrentWorkspaceDatasetOperator) return [] - return [ + + const items: GroupItem[] = [ { - key: 'provider', + key: ACCOUNT_SETTING_TAB.PROVIDER, name: t('common.settings.provider'), icon: , activeIcon: , }, { - key: 'members', + key: ACCOUNT_SETTING_TAB.MEMBERS, name: t('common.settings.members'), icon: , activeIcon: , }, - { - // Use key false to hide this item - key: enableBilling ? 'billing' : false, + ] + + if (enableBilling) { + items.push({ + key: ACCOUNT_SETTING_TAB.BILLING, name: t('common.settings.billing'), description: t('billing.plansCommon.receiptInfo'), icon: , activeIcon: , - }, + }) + } + + items.push( { - key: 'data-source', + key: ACCOUNT_SETTING_TAB.DATA_SOURCE, name: t('common.settings.dataSource'), icon: , activeIcon: , }, { - key: 'api-based-extension', + key: ACCOUNT_SETTING_TAB.API_BASED_EXTENSION, name: t('common.settings.apiBasedExtension'), icon: , activeIcon: , }, - { - key: (enableReplaceWebAppLogo || enableBilling) ? 'custom' : false, + ) + + if (enableReplaceWebAppLogo || enableBilling) { + items.push({ + key: ACCOUNT_SETTING_TAB.CUSTOM, name: t('custom.custom'), icon: , activeIcon: , - }, - ].filter(item => !!item.key) as GroupItem[] + }) + } + + return items })() const media = useBreakpoints() @@ -117,7 +137,7 @@ export default function AccountSetting({ name: t('common.settings.generalGroup'), items: [ { - key: 'language', + key: ACCOUNT_SETTING_TAB.LANGUAGE, name: t('common.settings.language'), icon: , activeIcon: , @@ -167,7 +187,10 @@ export default function AccountSetting({ 'mb-0.5 flex h-[37px] cursor-pointer items-center rounded-lg p-1 pl-3 text-sm', activeMenu === item.key ? 'system-sm-semibold bg-state-base-active text-components-menu-item-text-active' : 'system-sm-medium text-components-menu-item-text')} title={item.name} - onClick={() => setActiveMenu(item.key)} + onClick={() => { + setActiveMenu(item.key) + onTabChange?.(item.key) + }} > {activeMenu === item.key ? item.activeIcon : item.icon} {!isMobile &&
{item.name}
} diff --git a/web/app/components/header/account-setting/members-page/invite-modal/index.tsx b/web/app/components/header/account-setting/members-page/invite-modal/index.tsx index 264665805d..a432b8a4f0 100644 --- a/web/app/components/header/account-setting/members-page/invite-modal/index.tsx +++ b/web/app/components/header/account-setting/members-page/invite-modal/index.tsx @@ -17,8 +17,9 @@ import type { InvitationResult } from '@/models/common' import I18n from '@/context/i18n' import 'react-multi-email/dist/style.css' import { noop } from 'lodash-es' - import { useProviderContextSelector } from '@/context/provider-context' +import { useBoolean } from 'ahooks' + type IInviteModalProps = { isEmailSetup: boolean onCancel: () => void @@ -49,9 +50,15 @@ const InviteModal = ({ const { locale } = useContext(I18n) const [role, setRole] = useState('normal') + const [isSubmitting, { + setTrue: setIsSubmitting, + setFalse: setIsSubmitted, + }] = useBoolean(false) + const handleSend = useCallback(async () => { - if (isLimitExceeded) + if (isLimitExceeded || isSubmitting) return + setIsSubmitting() if (emails.map((email: string) => emailRegex.test(email)).every(Boolean)) { try { const { result, invitation_results } = await inviteMember({ @@ -70,7 +77,8 @@ const InviteModal = ({ else { notify({ type: 'error', message: t('common.members.emailInvalid') }) } - }, [isLimitExceeded, emails, role, locale, onCancel, onSend, notify, t]) + setIsSubmitted() + }, [isLimitExceeded, emails, role, locale, onCancel, onSend, notify, t, isSubmitting]) return (
@@ -133,7 +141,7 @@ const InviteModal = ({ tabIndex={0} className='w-full' onClick={handleSend} - disabled={!emails.length || isLimitExceeded} + disabled={!emails.length || isLimitExceeded || isSubmitting} variant='primary' > {t('common.members.sendInvite')} diff --git a/web/app/components/header/account-setting/model-provider-page/index.tsx b/web/app/components/header/account-setting/model-provider-page/index.tsx index 35de29185f..239c462ffe 100644 --- a/web/app/components/header/account-setting/model-provider-page/index.tsx +++ b/web/app/components/header/account-setting/model-provider-page/index.tsx @@ -93,7 +93,7 @@ const ModelProviderPage = ({ searchText }: Props) => { {defaultModelNotConfigured && (
- {t('common.modelProvider.notConfigured')} + {t('common.modelProvider.notConfigured')}
)} { + const text = hasCredential ? t('common.operation.config') : t('common.operation.setup') const Item = ( ) if (notAllowCustomCredential && !hasCredential) { diff --git a/web/app/components/header/account-setting/model-provider-page/model-parameter-modal/index.tsx b/web/app/components/header/account-setting/model-provider-page/model-parameter-modal/index.tsx index 5272846d03..e56def4113 100644 --- a/web/app/components/header/account-setting/model-provider-page/model-parameter-modal/index.tsx +++ b/web/app/components/header/account-setting/model-provider-page/model-parameter-modal/index.tsx @@ -31,13 +31,11 @@ import Loading from '@/app/components/base/loading' import { useProviderContext } from '@/context/provider-context' import { PROVIDER_WITH_PRESET_TONE, STOP_PARAMETER_RULE, TONE_LIST } from '@/config' import { ArrowNarrowLeft } from '@/app/components/base/icons/src/vender/line/arrows' -import type { ModelModeType } from '@/types/app' export type ModelParameterModalProps = { popupClassName?: string portalToFollowElemContentClassName?: string isAdvancedMode: boolean - mode: ModelModeType modelId: string provider: string setModel: (model: { modelId: string; provider: string; mode?: string; features?: string[] }) => void diff --git a/web/app/components/header/account-setting/model-provider-page/model-selector/index.tsx b/web/app/components/header/account-setting/model-provider-page/model-selector/index.tsx index d28959a509..58e96fde69 100644 --- a/web/app/components/header/account-setting/model-provider-page/model-selector/index.tsx +++ b/web/app/components/header/account-setting/model-provider-page/model-selector/index.tsx @@ -5,6 +5,7 @@ import type { Model, ModelItem, } from '../declarations' +import type { ModelFeatureEnum } from '../declarations' import { useCurrentProviderAndModel } from '../hooks' import ModelTrigger from './model-trigger' import EmptyTrigger from './empty-trigger' @@ -24,7 +25,7 @@ type ModelSelectorProps = { popupClassName?: string onSelect?: (model: DefaultModel) => void readonly?: boolean - scopeFeatures?: string[] + scopeFeatures?: ModelFeatureEnum[] deprecatedClassName?: string showDeprecatedWarnIcon?: boolean } diff --git a/web/app/components/header/account-setting/model-provider-page/model-selector/popup.tsx b/web/app/components/header/account-setting/model-provider-page/model-selector/popup.tsx index ff32b438ed..ae7d863d91 100644 --- a/web/app/components/header/account-setting/model-provider-page/model-selector/popup.tsx +++ b/web/app/components/header/account-setting/model-provider-page/model-selector/popup.tsx @@ -15,6 +15,7 @@ import { useLanguage } from '../hooks' import PopupItem from './popup-item' import { XCircle } from '@/app/components/base/icons/src/vender/solid/general' import { useModalContext } from '@/context/modal-context' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' import { supportFunctionCall } from '@/utils/tool-call' import { tooltipManager } from '@/app/components/base/tooltip/TooltipManager' @@ -22,7 +23,7 @@ type PopupProps = { defaultModel?: DefaultModel modelList: Model[] onSelect: (provider: string, model: ModelItem) => void - scopeFeatures?: string[] + scopeFeatures?: ModelFeatureEnum[] onHide: () => void } const Popup: FC = ({ @@ -129,7 +130,7 @@ const Popup: FC = ({
{ onHide() - setShowAccountSettingModal({ payload: 'provider' }) + setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.PROVIDER }) }}> {t('common.model.settingsLink')} diff --git a/web/app/components/header/account-setting/model-provider-page/provider-added-card/model-list.tsx b/web/app/components/header/account-setting/model-provider-page/provider-added-card/model-list.tsx index 9e26d233c9..2e008a0b35 100644 --- a/web/app/components/header/account-setting/model-provider-page/provider-added-card/model-list.tsx +++ b/web/app/components/header/account-setting/model-provider-page/provider-added-card/model-list.tsx @@ -86,7 +86,7 @@ const ModelList: FC = ({ { models.map(model => ( { if (isFreePlan) setShowPricingModal() else - setShowAccountSettingModal({ payload: 'billing' }) + setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.BILLING }) }, [isFreePlan, setShowAccountSettingModal, setShowPricingModal]) if (isMobile) { diff --git a/web/app/components/plugins/install-plugin/hooks/use-refresh-plugin-list.tsx b/web/app/components/plugins/install-plugin/hooks/use-refresh-plugin-list.tsx index d25beff59c..264c4782cd 100644 --- a/web/app/components/plugins/install-plugin/hooks/use-refresh-plugin-list.tsx +++ b/web/app/components/plugins/install-plugin/hooks/use-refresh-plugin-list.tsx @@ -2,12 +2,13 @@ import { useModelList } from '@/app/components/header/account-setting/model-prov import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' import { useProviderContext } from '@/context/provider-context' import { useInvalidateInstalledPluginList } from '@/service/use-plugins' -import { useInvalidateAllBuiltInTools, useInvalidateAllToolProviders } from '@/service/use-tools' +import { useInvalidateAllBuiltInTools, useInvalidateAllToolProviders, useInvalidateRAGRecommendedPlugins } from '@/service/use-tools' import { useInvalidateStrategyProviders } from '@/service/use-strategy' import type { Plugin, PluginDeclaration, PluginManifestInMarket } from '../../types' import { PluginCategoryEnum } from '../../types' import { useInvalidDataSourceList } from '@/service/use-pipeline' import { useInvalidDataSourceListAuth } from '@/service/use-datasource' +import { useInvalidateAllTriggerPlugins } from '@/service/use-triggers' const useRefreshPluginList = () => { const invalidateInstalledPluginList = useInvalidateInstalledPluginList() @@ -23,6 +24,10 @@ const useRefreshPluginList = () => { const invalidateDataSourceListAuth = useInvalidDataSourceListAuth() const invalidateStrategyProviders = useInvalidateStrategyProviders() + + const invalidateAllTriggerPlugins = useInvalidateAllTriggerPlugins() + + const invalidateRAGRecommendedPlugins = useInvalidateRAGRecommendedPlugins() return { refreshPluginList: (manifest?: PluginManifestInMarket | Plugin | PluginDeclaration | null, refreshAllType?: boolean) => { // installed list @@ -32,9 +37,13 @@ const useRefreshPluginList = () => { if ((manifest && PluginCategoryEnum.tool.includes(manifest.category)) || refreshAllType) { invalidateAllToolProviders() invalidateAllBuiltInTools() + invalidateRAGRecommendedPlugins() // TODO: update suggested tools. It's a function in hook useMarketplacePlugins,handleUpdatePlugins } + if ((manifest && PluginCategoryEnum.trigger.includes(manifest.category)) || refreshAllType) + invalidateAllTriggerPlugins() + if ((manifest && PluginCategoryEnum.datasource.includes(manifest.category)) || refreshAllType) { invalidateAllDataSources() invalidateDataSourceListAuth() diff --git a/web/app/components/plugins/install-plugin/install-bundle/ready-to-install.tsx b/web/app/components/plugins/install-plugin/install-bundle/ready-to-install.tsx index 63c0b5b07e..b2b0aefb9b 100644 --- a/web/app/components/plugins/install-plugin/install-bundle/ready-to-install.tsx +++ b/web/app/components/plugins/install-plugin/install-bundle/ready-to-install.tsx @@ -4,7 +4,7 @@ import React, { useCallback, useState } from 'react' import { InstallStep } from '../../types' import Install from './steps/install' import Installed from './steps/installed' -import type { Dependency, InstallStatusResponse, Plugin } from '../../types' +import type { Dependency, InstallStatus, Plugin } from '../../types' type Props = { step: InstallStep @@ -26,8 +26,8 @@ const ReadyToInstall: FC = ({ isFromMarketPlace, }) => { const [installedPlugins, setInstalledPlugins] = useState([]) - const [installStatus, setInstallStatus] = useState([]) - const handleInstalled = useCallback((plugins: Plugin[], installStatus: InstallStatusResponse[]) => { + const [installStatus, setInstallStatus] = useState([]) + const handleInstalled = useCallback((plugins: Plugin[], installStatus: InstallStatus[]) => { setInstallStatus(installStatus) setInstalledPlugins(plugins) onStepChange(InstallStep.installed) diff --git a/web/app/components/plugins/install-plugin/install-bundle/steps/install.tsx b/web/app/components/plugins/install-plugin/install-bundle/steps/install.tsx index 758daafca0..a717e0a24a 100644 --- a/web/app/components/plugins/install-plugin/install-bundle/steps/install.tsx +++ b/web/app/components/plugins/install-plugin/install-bundle/steps/install.tsx @@ -2,23 +2,31 @@ import type { FC } from 'react' import { useRef } from 'react' import React, { useCallback, useState } from 'react' -import type { Dependency, InstallStatusResponse, Plugin, VersionInfo } from '../../../types' +import { + type Dependency, + type InstallStatus, + type InstallStatusResponse, + type Plugin, + TaskStatus, + type VersionInfo, +} from '../../../types' import Button from '@/app/components/base/button' import { RiLoader2Line } from '@remixicon/react' import { useTranslation } from 'react-i18next' import type { ExposeRefs } from './install-multi' import InstallMulti from './install-multi' -import { useInstallOrUpdate } from '@/service/use-plugins' +import { useInstallOrUpdate, usePluginTaskList } from '@/service/use-plugins' import useRefreshPluginList from '../../hooks/use-refresh-plugin-list' import { useCanInstallPluginFromMarketplace } from '@/app/components/plugins/plugin-page/use-reference-setting' import { useMittContextSelector } from '@/context/mitt-context' import Checkbox from '@/app/components/base/checkbox' +import checkTaskStatus from '../../base/check-task-status' const i18nPrefix = 'plugin.installModal' type Props = { allPlugins: Dependency[] onStartToInstall?: () => void - onInstalled: (plugins: Plugin[], installStatus: InstallStatusResponse[]) => void + onInstalled: (plugins: Plugin[], installStatus: InstallStatus[]) => void onCancel: () => void isFromMarketPlace?: boolean isHideButton?: boolean @@ -55,18 +63,60 @@ const Install: FC = ({ setCanInstall(true) }, []) + const { + check, + stop, + } = checkTaskStatus() + + const handleCancel = useCallback(() => { + stop() + onCancel() + }, [onCancel, stop]) + + const { handleRefetch } = usePluginTaskList() + // Install from marketplace and github const { mutate: installOrUpdate, isPending: isInstalling } = useInstallOrUpdate({ - onSuccess: (res: InstallStatusResponse[]) => { - onInstalled(selectedPlugins, res.map((r, i) => { - return ({ - ...r, - isFromMarketPlace: allPlugins[selectedIndexes[i]].type === 'marketplace', + onSuccess: async (res: InstallStatusResponse[]) => { + const isAllSettled = res.every(r => r.status === TaskStatus.success || r.status === TaskStatus.failed) + // if all settled, return the install status + if (isAllSettled) { + onInstalled(selectedPlugins, res.map((r, i) => { + return ({ + success: r.status === TaskStatus.success, + isFromMarketPlace: allPlugins[selectedIndexes[i]].type === 'marketplace', + }) + })) + const hasInstallSuccess = res.some(r => r.status === TaskStatus.success) + if (hasInstallSuccess) { + refreshPluginList(undefined, true) + emit('plugin:install:success', selectedPlugins.map((p) => { + return `${p.plugin_id}/${p.name}` + })) + } + return + } + // if not all settled, keep checking the status of the plugins + handleRefetch() + const installStatus = await Promise.all(res.map(async (item, index) => { + if (item.status !== TaskStatus.running) { + return { + success: item.status === TaskStatus.success, + isFromMarketPlace: allPlugins[selectedIndexes[index]].type === 'marketplace', + } + } + const { status } = await check({ + taskId: item.taskId, + pluginUniqueIdentifier: item.uniqueIdentifier, }) + return { + success: status === TaskStatus.success, + isFromMarketPlace: allPlugins[selectedIndexes[index]].type === 'marketplace', + } })) - const hasInstallSuccess = res.some(r => r.success) + onInstalled(selectedPlugins, installStatus) + const hasInstallSuccess = installStatus.some(r => r.success) if (hasInstallSuccess) { - refreshPluginList(undefined, true) emit('plugin:install:success', selectedPlugins.map((p) => { return `${p.plugin_id}/${p.name}` })) @@ -150,7 +200,7 @@ const Install: FC = ({
{!canInstall && ( - )} diff --git a/web/app/components/plugins/install-plugin/install-bundle/steps/installed.tsx b/web/app/components/plugins/install-plugin/install-bundle/steps/installed.tsx index 4e16d200e7..f787882211 100644 --- a/web/app/components/plugins/install-plugin/install-bundle/steps/installed.tsx +++ b/web/app/components/plugins/install-plugin/install-bundle/steps/installed.tsx @@ -1,7 +1,7 @@ 'use client' import type { FC } from 'react' import React from 'react' -import type { InstallStatusResponse, Plugin } from '../../../types' +import type { InstallStatus, Plugin } from '../../../types' import Card from '@/app/components/plugins/card' import Button from '@/app/components/base/button' import { useTranslation } from 'react-i18next' @@ -11,7 +11,7 @@ import { MARKETPLACE_API_PREFIX } from '@/config' type Props = { list: Plugin[] - installStatus: InstallStatusResponse[] + installStatus: InstallStatus[] onCancel: () => void isHideButton?: boolean } diff --git a/web/app/components/plugins/install-plugin/utils.ts b/web/app/components/plugins/install-plugin/utils.ts index f19a7fd287..79c6d7b031 100644 --- a/web/app/components/plugins/install-plugin/utils.ts +++ b/web/app/components/plugins/install-plugin/utils.ts @@ -5,15 +5,17 @@ import { isEmpty } from 'lodash-es' export const pluginManifestToCardPluginProps = (pluginManifest: PluginDeclaration): Plugin => { return { plugin_id: pluginManifest.plugin_unique_identifier, - type: pluginManifest.category, + type: pluginManifest.category as Plugin['type'], category: pluginManifest.category, name: pluginManifest.name, version: pluginManifest.version, latest_version: '', latest_package_identifier: '', org: pluginManifest.author, + author: pluginManifest.author, label: pluginManifest.label, brief: pluginManifest.description, + description: pluginManifest.description, icon: pluginManifest.icon, verified: pluginManifest.verified, introduction: '', @@ -22,14 +24,17 @@ export const pluginManifestToCardPluginProps = (pluginManifest: PluginDeclaratio endpoint: { settings: [], }, - tags: [], + tags: pluginManifest.tags.map(tag => ({ name: tag })), + badges: [], + verification: { authorized_category: 'langgenius' }, + from: 'package', } } export const pluginManifestInMarketToPluginProps = (pluginManifest: PluginManifestInMarket): Plugin => { return { plugin_id: pluginManifest.plugin_unique_identifier, - type: pluginManifest.category, + type: pluginManifest.category as Plugin['type'], category: pluginManifest.category, name: pluginManifest.name, version: pluginManifest.latest_version, @@ -38,6 +43,7 @@ export const pluginManifestInMarketToPluginProps = (pluginManifest: PluginManife org: pluginManifest.org, label: pluginManifest.label, brief: pluginManifest.brief, + description: pluginManifest.brief, icon: pluginManifest.icon, verified: true, introduction: pluginManifest.introduction, @@ -49,6 +55,7 @@ export const pluginManifestInMarketToPluginProps = (pluginManifest: PluginManife tags: [], badges: pluginManifest.badges, verification: isEmpty(pluginManifest.verification) ? { authorized_category: 'langgenius' } : pluginManifest.verification, + from: pluginManifest.from, } } diff --git a/web/app/components/plugins/marketplace/hooks.ts b/web/app/components/plugins/marketplace/hooks.ts index 10aead17c4..5bc9263aaa 100644 --- a/web/app/components/plugins/marketplace/hooks.ts +++ b/web/app/components/plugins/marketplace/hooks.ts @@ -65,10 +65,12 @@ export const useMarketplacePlugins = () => { } = useMutationPluginsFromMarketplace() const [prevPlugins, setPrevPlugins] = useState() + const resetPlugins = useCallback(() => { reset() setPrevPlugins(undefined) }, [reset]) + const handleUpdatePlugins = useCallback((pluginsSearchParams: PluginsSearchParams) => { mutateAsync(pluginsSearchParams).then((res) => { const currentPage = pluginsSearchParams.page || 1 @@ -85,9 +87,6 @@ export const useMarketplacePlugins = () => { } }) }, [mutateAsync]) - const queryPlugins = useCallback((pluginsSearchParams: PluginsSearchParams) => { - handleUpdatePlugins(pluginsSearchParams) - }, [handleUpdatePlugins]) const { run: queryPluginsWithDebounced, cancel: cancelQueryPluginsWithDebounced } = useDebounceFn((pluginsSearchParams: PluginsSearchParams) => { handleUpdatePlugins(pluginsSearchParams) @@ -99,7 +98,7 @@ export const useMarketplacePlugins = () => { plugins: prevPlugins, total: data?.data?.total, resetPlugins, - queryPlugins, + queryPlugins: handleUpdatePlugins, queryPluginsWithDebounced, cancelQueryPluginsWithDebounced, isLoading: isPending, diff --git a/web/app/components/plugins/marketplace/utils.ts b/web/app/components/plugins/marketplace/utils.ts index df943b79b8..f424811537 100644 --- a/web/app/components/plugins/marketplace/utils.ts +++ b/web/app/components/plugins/marketplace/utils.ts @@ -8,6 +8,7 @@ import type { } from '@/app/components/plugins/marketplace/types' import { APP_VERSION, + IS_MARKETPLACE, MARKETPLACE_API_PREFIX, } from '@/config' import { getMarketplaceUrl } from '@/utils/var' @@ -51,7 +52,7 @@ export const getMarketplacePluginsByCollectionId = async (collectionId: string, try { const url = `${MARKETPLACE_API_PREFIX}/collections/${collectionId}/plugins` const headers = new Headers({ - 'X-Dify-Version': APP_VERSION, + 'X-Dify-Version': !IS_MARKETPLACE ? APP_VERSION : '999.0.0', }) const marketplaceCollectionPluginsData = await globalThis.fetch( url, @@ -89,7 +90,7 @@ export const getMarketplaceCollectionsAndPlugins = async (query?: CollectionsAnd if (query?.type) marketplaceUrl += `&type=${query.type}` const headers = new Headers({ - 'X-Dify-Version': APP_VERSION, + 'X-Dify-Version': !IS_MARKETPLACE ? APP_VERSION : '999.0.0', }) const marketplaceCollectionsData = await globalThis.fetch(marketplaceUrl, { headers, cache: 'no-store' }) const marketplaceCollectionsDataJson = await marketplaceCollectionsData.json() @@ -113,21 +114,12 @@ export const getMarketplaceCollectionsAndPlugins = async (query?: CollectionsAnd } export const getMarketplaceListCondition = (pluginType: string) => { - if (pluginType === PluginCategoryEnum.tool) - return 'category=tool' - - if (pluginType === PluginCategoryEnum.agent) - return 'category=agent-strategy' - - if (pluginType === PluginCategoryEnum.model) - return 'category=model' + if ([PluginCategoryEnum.tool, PluginCategoryEnum.agent, PluginCategoryEnum.model, PluginCategoryEnum.datasource, PluginCategoryEnum.trigger].includes(pluginType as PluginCategoryEnum)) + return `category=${pluginType}` if (pluginType === PluginCategoryEnum.extension) return 'category=endpoint' - if (pluginType === PluginCategoryEnum.datasource) - return 'category=datasource' - if (pluginType === 'bundle') return 'type=bundle' diff --git a/web/app/components/plugins/plugin-auth/authorize/api-key-modal.tsx b/web/app/components/plugins/plugin-auth/authorize/api-key-modal.tsx index 51c4f65604..cb90b075b0 100644 --- a/web/app/components/plugins/plugin-auth/authorize/api-key-modal.tsx +++ b/web/app/components/plugins/plugin-auth/authorize/api-key-modal.tsx @@ -144,7 +144,9 @@ const ApiKeyModal = ({ clickOutsideNotClose={true} wrapperClassName='!z-[101]' > - + {pluginPayload.detail && ( + + )} { isLoading && (
diff --git a/web/app/components/plugins/plugin-auth/authorize/oauth-client-settings.tsx b/web/app/components/plugins/plugin-auth/authorize/oauth-client-settings.tsx index c1c44928b3..256f6d0f4b 100644 --- a/web/app/components/plugins/plugin-auth/authorize/oauth-client-settings.tsx +++ b/web/app/components/plugins/plugin-auth/authorize/oauth-client-settings.tsx @@ -160,7 +160,9 @@ const OAuthClientSettings = ({ wrapperClassName='!z-[101]' clickOutsideNotClose={true} > - + {pluginPayload.detail && ( + + )} { const apiMap = useGetApi(pluginPayload) @@ -29,8 +30,14 @@ export const useDeletePluginCredentialHook = (pluginPayload: PluginPayload) => { export const useInvalidPluginCredentialInfoHook = (pluginPayload: PluginPayload) => { const apiMap = useGetApi(pluginPayload) + const invalidPluginCredentialInfo = useInvalidPluginCredentialInfo(apiMap.getCredentialInfo) + const providerType = pluginPayload.providerType + const invalidToolsByType = useInvalidToolsByType(providerType) - return useInvalidPluginCredentialInfo(apiMap.getCredentialInfo) + return () => { + invalidPluginCredentialInfo() + invalidToolsByType() + } } export const useSetPluginDefaultCredentialHook = (pluginPayload: PluginPayload) => { diff --git a/web/app/components/plugins/plugin-auth/types.ts b/web/app/components/plugins/plugin-auth/types.ts index aab29f8cec..9974586302 100644 --- a/web/app/components/plugins/plugin-auth/types.ts +++ b/web/app/components/plugins/plugin-auth/types.ts @@ -1,3 +1,4 @@ +import type { CollectionType } from '../../tools/types' import type { PluginDetail } from '../types' export type { AddApiKeyButtonProps } from './authorize/add-api-key-button' @@ -13,7 +14,8 @@ export enum AuthCategory { export type PluginPayload = { category: AuthCategory provider: string - detail: PluginDetail + providerType?: CollectionType | string + detail?: PluginDetail } export enum CredentialTypeEnum { diff --git a/web/app/components/plugins/plugin-detail-panel/detail-header.tsx b/web/app/components/plugins/plugin-detail-panel/detail-header.tsx index ab979a79a5..44ddb8360e 100644 --- a/web/app/components/plugins/plugin-detail-panel/detail-header.tsx +++ b/web/app/components/plugins/plugin-detail-panel/detail-header.tsx @@ -73,7 +73,7 @@ const DetailHeader = ({ const { enable_marketplace } = useGlobalPublicStore(s => s.systemFeatures) const { - installation_id, + id, source, tenant_id, version, @@ -198,7 +198,7 @@ const DetailHeader = ({ const handleDelete = useCallback(async () => { showDeleting() - const res = await uninstallPlugin(installation_id) + const res = await uninstallPlugin(id) hideDeleting() if (res.success) { hideDeleteConfirm() @@ -208,7 +208,7 @@ const DetailHeader = ({ if (PluginCategoryEnum.tool.includes(category)) invalidateAllToolProviders() } - }, [showDeleting, installation_id, hideDeleting, hideDeleteConfirm, onUpdate, category, refreshModelProviders, invalidateAllToolProviders]) + }, [showDeleting, id, hideDeleting, hideDeleteConfirm, onUpdate, category, refreshModelProviders, invalidateAllToolProviders]) return (
@@ -335,6 +335,7 @@ const DetailHeader = ({ pluginPayload={{ provider: provider?.name || '', category: AuthCategory.tool, + providerType: provider?.type || '', detail, }} /> @@ -355,7 +356,6 @@ const DetailHeader = ({ content={
{t(`${i18nPrefix}.deleteContentLeft`)}{label[locale]}{t(`${i18nPrefix}.deleteContentRight`)}
- {/* {usedInApps > 0 && t(`${i18nPrefix}.usedInApps`, { num: usedInApps })} */}
} onCancel={hideDeleteConfirm} diff --git a/web/app/components/plugins/plugin-detail-panel/endpoint-modal.tsx b/web/app/components/plugins/plugin-detail-panel/endpoint-modal.tsx index ce0df8123b..48aeecf1b2 100644 --- a/web/app/components/plugins/plugin-detail-panel/endpoint-modal.tsx +++ b/web/app/components/plugins/plugin-detail-panel/endpoint-modal.tsx @@ -55,7 +55,7 @@ const EndpointModal: FC = ({ // Fix: Process boolean fields to ensure they are sent as proper boolean values const processedCredential = { ...tempCredential } - formSchemas.forEach((field) => { + formSchemas.forEach((field: any) => { if (field.type === 'boolean' && processedCredential[field.name] !== undefined) { const value = processedCredential[field.name] if (typeof value === 'string') diff --git a/web/app/components/plugins/plugin-detail-panel/model-selector/index.tsx b/web/app/components/plugins/plugin-detail-panel/model-selector/index.tsx index 873f187e8f..1393a1844f 100644 --- a/web/app/components/plugins/plugin-detail-panel/model-selector/index.tsx +++ b/web/app/components/plugins/plugin-detail-panel/model-selector/index.tsx @@ -7,6 +7,7 @@ import { useTranslation } from 'react-i18next' import type { DefaultModel, FormValue, + ModelFeatureEnum, } from '@/app/components/header/account-setting/model-provider-page/declarations' import { ModelStatusEnum, ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector' @@ -57,7 +58,7 @@ const ModelParameterModal: FC = ({ const { isAPIKeySet } = useProviderContext() const [open, setOpen] = useState(false) const scopeArray = scope.split('&') - const scopeFeatures = useMemo(() => { + const scopeFeatures = useMemo((): ModelFeatureEnum[] => { if (scopeArray.includes('all')) return [] return scopeArray.filter(item => ![ @@ -67,7 +68,7 @@ const ModelParameterModal: FC = ({ ModelTypeEnum.moderation, ModelTypeEnum.speech2text, ModelTypeEnum.tts, - ].includes(item as ModelTypeEnum)) + ].includes(item as ModelTypeEnum)).map(item => item as ModelFeatureEnum) }, [scopeArray]) const { data: textGenerationList } = useModelList(ModelTypeEnum.textGeneration) diff --git a/web/app/components/plugins/plugin-detail-panel/store.ts b/web/app/components/plugins/plugin-detail-panel/store.ts index 26b778f288..931b08215d 100644 --- a/web/app/components/plugins/plugin-detail-panel/store.ts +++ b/web/app/components/plugins/plugin-detail-panel/store.ts @@ -1,7 +1,22 @@ import { create } from 'zustand' -import type { PluginDetail } from '../types' +import type { + ParametersSchema, + PluginDeclaration, + PluginDetail, + PluginTriggerSubscriptionConstructor, +} from '../types' -export type SimpleDetail = Pick & { provider: string } +type TriggerDeclarationSummary = { + subscription_schema?: ParametersSchema[] + subscription_constructor?: PluginTriggerSubscriptionConstructor | null +} + +export type SimpleDetail = Pick & { + provider: string + declaration: Partial> & { + trigger?: TriggerDeclarationSummary + } +} type Shape = { detail: SimpleDetail | undefined diff --git a/web/app/components/plugins/plugin-detail-panel/subscription-list/create/common-modal.tsx b/web/app/components/plugins/plugin-detail-panel/subscription-list/create/common-modal.tsx index 9eb7b90f88..3bd82d59c1 100644 --- a/web/app/components/plugins/plugin-detail-panel/subscription-list/create/common-modal.tsx +++ b/web/app/components/plugins/plugin-detail-panel/subscription-list/create/common-modal.tsx @@ -18,13 +18,14 @@ import { useVerifyTriggerSubscriptionBuilder, } from '@/service/use-triggers' import { parsePluginErrorMessage } from '@/utils/error-parser' +import { isPrivateOrLocalAddress } from '@/utils/urlValidation' import { RiLoader2Line } from '@remixicon/react' import { debounce } from 'lodash-es' import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import LogViewer from '../log-viewer' -import { usePluginSubscriptionStore } from '../store' import { usePluginStore } from '../../store' +import { useSubscriptionList } from '../use-subscription-list' type Props = { onClose: () => void @@ -66,43 +67,6 @@ const normalizeFormType = (type: FormTypeEnum | string): FormTypeEnum => { } } -// Check if URL is a private/local network address -const isPrivateOrLocalAddress = (url: string): boolean => { - try { - const urlObj = new URL(url) - const hostname = urlObj.hostname.toLowerCase() - - // Check for localhost - if (hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '::1') - return true - - // Check for private IP ranges - const ipv4Regex = /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/ - const ipv4Match = hostname.match(ipv4Regex) - if (ipv4Match) { - const [, a, b] = ipv4Match.map(Number) - // 10.0.0.0/8 - if (a === 10) - return true - // 172.16.0.0/12 - if (a === 172 && b >= 16 && b <= 31) - return true - // 192.168.0.0/16 - if (a === 192 && b === 168) - return true - // 169.254.0.0/16 (link-local) - if (a === 169 && b === 254) - return true - } - - // Check for .local domains - return hostname.endsWith('.local') - } - catch { - return false - } -} - const StatusStep = ({ isActive, text }: { isActive: boolean, text: string }) => { return
{ export const CommonCreateModal = ({ onClose, createType, builder }: Props) => { const { t } = useTranslation() const detail = usePluginStore(state => state.detail) - const { refresh } = usePluginSubscriptionStore() + const { refetch } = useSubscriptionList() const [currentStep, setCurrentStep] = useState(createType === SupportedCreationMethods.APIKEY ? ApiKeyStep.Verify : ApiKeyStep.Configuration) @@ -139,7 +103,7 @@ export const CommonCreateModal = ({ onClose, createType, builder }: Props) => { const { mutate: buildSubscription, isPending: isBuilding } = useBuildTriggerSubscription() const { mutate: updateBuilder } = useUpdateTriggerSubscriptionBuilder() - const manualPropertiesSchema = detail?.declaration.trigger.subscription_schema || [] // manual + const manualPropertiesSchema = detail?.declaration?.trigger?.subscription_schema || [] // manual const manualPropertiesFormRef = React.useRef(null) const subscriptionFormRef = React.useRef(null) @@ -193,6 +157,7 @@ export const CommonCreateModal = ({ onClose, createType, builder }: Props) => { if (form) form.setFieldValue('callback_url', subscriptionBuilder.endpoint) if (isPrivateOrLocalAddress(subscriptionBuilder.endpoint)) { + console.log('isPrivateOrLocalAddress', isPrivateOrLocalAddress(subscriptionBuilder.endpoint)) subscriptionFormRef.current?.setFields([{ name: 'callback_url', warnings: [t('pluginTrigger.modal.form.callbackUrl.privateAddressWarning')], @@ -330,7 +295,7 @@ export const CommonCreateModal = ({ onClose, createType, builder }: Props) => { message: t('pluginTrigger.subscription.createSuccess'), }) onClose() - refresh?.() + refetch?.() }, onError: async (error: any) => { const errorMessage = await parsePluginErrorMessage(error) || t('pluginTrigger.subscription.createFailed') @@ -426,6 +391,7 @@ export const CommonCreateModal = ({ onClose, createType, builder }: Props) => { const normalizedType = normalizeFormType(schema.type as FormTypeEnum | string) return { ...schema, + tooltip: schema.description, type: normalizedType, dynamicSelectParams: normalizedType === FormTypeEnum.dynamicSelect ? { plugin_id: detail?.plugin_id || '', @@ -439,7 +405,7 @@ export const CommonCreateModal = ({ onClose, createType, builder }: Props) => { } })} ref={autoCommonParametersFormRef} - labelClassName='system-sm-medium mb-2 block text-text-primary' + labelClassName='system-sm-medium mb-2 flex items-center gap-1 text-text-primary' formClassName='space-y-4' /> )} @@ -447,9 +413,12 @@ export const CommonCreateModal = ({ onClose, createType, builder }: Props) => { {manualPropertiesSchema.length > 0 && (
({ + ...schema, + tooltip: schema.description, + }))} ref={manualPropertiesFormRef} - labelClassName='system-sm-medium mb-2 block text-text-primary' + labelClassName='system-sm-medium mb-2 flex items-center gap-1 text-text-primary' formClassName='space-y-4' onChange={handleManualPropertiesChange} /> diff --git a/web/app/components/plugins/plugin-detail-panel/subscription-list/delete-confirm.tsx b/web/app/components/plugins/plugin-detail-panel/subscription-list/delete-confirm.tsx index 178983c6b1..5f4e8a2cbf 100644 --- a/web/app/components/plugins/plugin-detail-panel/subscription-list/delete-confirm.tsx +++ b/web/app/components/plugins/plugin-detail-panel/subscription-list/delete-confirm.tsx @@ -4,7 +4,7 @@ import Toast from '@/app/components/base/toast' import { useDeleteTriggerSubscription } from '@/service/use-triggers' import { useState } from 'react' import { useTranslation } from 'react-i18next' -import { usePluginSubscriptionStore } from './store' +import { useSubscriptionList } from './use-subscription-list' type Props = { onClose: (deleted: boolean) => void @@ -18,7 +18,7 @@ const tPrefix = 'pluginTrigger.subscription.list.item.actions.deleteConfirm' export const DeleteConfirm = (props: Props) => { const { onClose, isShow, currentId, currentName, workflowsInUse } = props - const { refresh } = usePluginSubscriptionStore() + const { refetch } = useSubscriptionList() const { mutate: deleteSubscription, isPending: isDeleting } = useDeleteTriggerSubscription() const { t } = useTranslation() const [inputName, setInputName] = useState('') @@ -40,7 +40,7 @@ export const DeleteConfirm = (props: Props) => { message: t(`${tPrefix}.success`, { name: currentName }), className: 'z-[10000001]', }) - refresh?.() + refetch?.() onClose(true) }, onError: (error: any) => { diff --git a/web/app/components/plugins/plugin-detail-panel/subscription-list/selector-entry.tsx b/web/app/components/plugins/plugin-detail-panel/subscription-list/selector-entry.tsx index ffaa4ab3a7..c23e022ac5 100644 --- a/web/app/components/plugins/plugin-detail-panel/subscription-list/selector-entry.tsx +++ b/web/app/components/plugins/plugin-detail-panel/subscription-list/selector-entry.tsx @@ -17,13 +17,11 @@ type SubscriptionTriggerButtonProps = { onClick?: () => void isOpen?: boolean className?: string - onSelect: (v: SimpleSubscription, callback?: () => void) => void } const SubscriptionTriggerButton: React.FC = ({ selectedId, onClick, - onSelect, isOpen = false, className, }) => { @@ -109,7 +107,6 @@ export const SubscriptionSelectorEntry = ({ selectedId, onSelect }: { selectedId={selectedId} onClick={() => setIsOpen(!isOpen)} isOpen={isOpen} - onSelect={onSelect} />
diff --git a/web/app/components/plugins/plugin-detail-panel/subscription-list/store.ts b/web/app/components/plugins/plugin-detail-panel/subscription-list/store.ts deleted file mode 100644 index 24840e9971..0000000000 --- a/web/app/components/plugins/plugin-detail-panel/subscription-list/store.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { create } from 'zustand' - -type ShapeSubscription = { - refresh?: () => void - setRefresh: (refresh: () => void) => void -} - -export const usePluginSubscriptionStore = create(set => ({ - refresh: undefined, - setRefresh: (refresh: () => void) => set({ refresh }), -})) diff --git a/web/app/components/plugins/plugin-detail-panel/subscription-list/subscription-card.tsx b/web/app/components/plugins/plugin-detail-panel/subscription-list/subscription-card.tsx index f4766803a4..b2a86b5c76 100644 --- a/web/app/components/plugins/plugin-detail-panel/subscription-list/subscription-card.tsx +++ b/web/app/components/plugins/plugin-detail-panel/subscription-list/subscription-card.tsx @@ -1,5 +1,6 @@ 'use client' import ActionButton from '@/app/components/base/action-button' +import Tooltip from '@/app/components/base/tooltip' import type { TriggerSubscription } from '@/app/components/workflow/block-selector/types' import cn from '@/utils/classnames' import { @@ -48,9 +49,19 @@ const SubscriptionCard = ({ data }: Props) => {
-
- {data.endpoint} -
+ + {data.endpoint} +
+ )} + position='left' + > +
+ {data.endpoint} +
+
·
{data.workflows_in_use > 0 ? t('pluginTrigger.subscription.list.item.usedByNum', { num: data.workflows_in_use }) : t('pluginTrigger.subscription.list.item.noUsed')} diff --git a/web/app/components/plugins/plugin-detail-panel/subscription-list/use-subscription-list.ts b/web/app/components/plugins/plugin-detail-panel/subscription-list/use-subscription-list.ts index ff3e903a31..9f95ff05a0 100644 --- a/web/app/components/plugins/plugin-detail-panel/subscription-list/use-subscription-list.ts +++ b/web/app/components/plugins/plugin-detail-panel/subscription-list/use-subscription-list.ts @@ -1,19 +1,11 @@ -import { useEffect } from 'react' import { useTriggerSubscriptions } from '@/service/use-triggers' import { usePluginStore } from '../store' -import { usePluginSubscriptionStore } from './store' export const useSubscriptionList = () => { const detail = usePluginStore(state => state.detail) - const { setRefresh } = usePluginSubscriptionStore() const { data: subscriptions, isLoading, refetch } = useTriggerSubscriptions(detail?.provider || '') - useEffect(() => { - if (refetch) - setRefresh(refetch) - }, [refetch, setRefresh]) - return { detail, subscriptions, diff --git a/web/app/components/plugins/plugin-detail-panel/tool-selector/index.tsx b/web/app/components/plugins/plugin-detail-panel/tool-selector/index.tsx index a41f44c3d5..ea7892be32 100644 --- a/web/app/components/plugins/plugin-detail-panel/tool-selector/index.tsx +++ b/web/app/components/plugins/plugin-detail-panel/tool-selector/index.tsx @@ -318,6 +318,7 @@ const ToolSelector: FC = ({ pluginPayload={{ provider: currentProvider.name, category: AuthCategory.tool, + providerType: currentProvider.type, detail: currentProvider as any, }} credentialId={value?.credential_id} diff --git a/web/app/components/plugins/plugin-detail-panel/trigger/event-detail-drawer.tsx b/web/app/components/plugins/plugin-detail-panel/trigger/event-detail-drawer.tsx index e241fadb8a..2083f34263 100644 --- a/web/app/components/plugins/plugin-detail-panel/trigger/event-detail-drawer.tsx +++ b/web/app/components/plugins/plugin-detail-panel/trigger/event-detail-drawer.tsx @@ -17,7 +17,7 @@ import { import type { TFunction } from 'i18next' import type { FC } from 'react' import { useTranslation } from 'react-i18next' -import type { TriggerEvent } from '../../types' +import type { TriggerEvent } from '@/app/components/plugins/types' type EventDetailDrawerProps = { eventInfo: TriggerEvent diff --git a/web/app/components/plugins/plugin-detail-panel/trigger/event-list.tsx b/web/app/components/plugins/plugin-detail-panel/trigger/event-list.tsx index 1eef047771..93f2fcc9c7 100644 --- a/web/app/components/plugins/plugin-detail-panel/trigger/event-list.tsx +++ b/web/app/components/plugins/plugin-detail-panel/trigger/event-list.tsx @@ -14,17 +14,19 @@ type TriggerEventCardProps = { } const TriggerEventCard = ({ eventInfo, providerInfo }: TriggerEventCardProps) => { - const { identity, description = {} } = eventInfo + const { identity, description } = eventInfo const language = useLanguage() const [showDetail, setShowDetail] = useState(false) + const title = identity.label?.[language] ?? identity.label?.en_US ?? '' + const descriptionText = description?.[language] ?? description?.en_US ?? '' return ( <>
setShowDetail(true)} > -
{identity.label[language]}
-
{description[language]}
+
{title}
+
{descriptionText}
{showDetail && ( (null) + const [dependencies, setDependencies] = useState([]) const bundleInfo = useMemo(() => { const info = searchParams.get(BUNDLE_INFO_KEY) @@ -99,6 +101,7 @@ const PluginPage = ({ useEffect(() => { (async () => { + setUniqueIdentifier(null) await sleep(100) if (packageId) { const { data } = await fetchManifestFromMarketPlace(encodeURIComponent(packageId)) @@ -108,6 +111,7 @@ const PluginPage = ({ version: version.version, icon: `${MARKETPLACE_API_PREFIX}/plugins/${plugin.org}/${plugin.name}/icon`, }) + setUniqueIdentifier(packageId) showInstallFromMarketplace() return } @@ -283,10 +287,10 @@ const PluginPage = ({ )} { - isShowInstallFromMarketplace && ( + isShowInstallFromMarketplace && uniqueIdentifier && ( { return (
{ const children = (
-
+
@@ -71,7 +69,7 @@ const ReadmePanel: FC = () => { return ( ) } @@ -86,38 +84,36 @@ const ReadmePanel: FC = () => {
) - return ( - showType === ReadmeShowType.drawer ? ( - - {children} - - ) : ( - - {children} - + const portalContent = showType === ReadmeShowType.drawer + ? ( +
+
+ {children} +
+
) + : ( +
+
{ + event.stopPropagation() + }} + > + {children} +
+
+ ) + + return createPortal( + portalContent, + document.body, ) } diff --git a/web/app/components/plugins/reference-setting-modal/auto-update-setting/index.tsx b/web/app/components/plugins/reference-setting-modal/auto-update-setting/index.tsx index 2d00788142..dfbeaad9cb 100644 --- a/web/app/components/plugins/reference-setting-modal/auto-update-setting/index.tsx +++ b/web/app/components/plugins/reference-setting-modal/auto-update-setting/index.tsx @@ -15,6 +15,7 @@ import { RiTimeLine } from '@remixicon/react' import cn from '@/utils/classnames' import { convertTimezoneToOffsetStr } from '@/app/components/base/date-and-time-picker/utils/dayjs' import { useModalContextSelector } from '@/context/modal-context' +import { ACCOUNT_SETTING_TAB } from '@/app/components/header/account-setting/constants' const i18nPrefix = 'plugin.autoUpdate' @@ -30,7 +31,7 @@ const SettingTimeZone: FC<{ }) => { const setShowAccountSettingModal = useModalContextSelector(s => s.setShowAccountSettingModal) return ( - setShowAccountSettingModal({ payload: 'language' })} >{children} + setShowAccountSettingModal({ payload: ACCOUNT_SETTING_TAB.LANGUAGE })} >{children} ) } const AutoUpdateSetting: FC = ({ @@ -143,6 +144,7 @@ const AutoUpdateSetting: FC = ({ title={t(`${i18nPrefix}.updateTime`)} minuteFilter={minuteFilter} renderTrigger={renderTimePickerTrigger} + placement='bottom-end' />
+ label: TypeWithI18N type: string auto_generate: any template: any @@ -173,15 +174,21 @@ export type TriggerEventParameter = { precision: any options?: Array<{ value: string - label: Record + label: TypeWithI18N icon?: string }> - description?: Record + description?: TypeWithI18N } export type TriggerEvent = { - identity: Identity - description: Record + name: string + identity: { + author: string + name: string + label: TypeWithI18N + provider?: string + } + description: TypeWithI18N parameters: TriggerEventParameter[] output_schema: Record } @@ -388,6 +395,12 @@ export type InstallPackageResponse = { } export type InstallStatusResponse = { + status: TaskStatus, + taskId: string, + uniqueIdentifier: string, +} + +export type InstallStatus = { success: boolean, isFromMarketPlace?: boolean } diff --git a/web/app/components/share/text-generation/index.tsx b/web/app/components/share/text-generation/index.tsx index 98804c7311..f5cb7005b8 100644 --- a/web/app/components/share/text-generation/index.tsx +++ b/web/app/components/share/text-generation/index.tsx @@ -125,6 +125,12 @@ const TextGeneration: FC = ({ transfer_methods: [TransferMethod.local_file], }) const [completionFiles, setCompletionFiles] = useState([]) + const [runControl, setRunControl] = useState<{ onStop: () => Promise | void; isStopping: boolean } | null>(null) + + useEffect(() => { + if (isCallBatchAPI) + setRunControl(null) + }, [isCallBatchAPI]) const handleSend = () => { setIsCallBatchAPI(false) @@ -417,6 +423,7 @@ const TextGeneration: FC = ({ isPC={isPC} isMobile={!isPC} isInstalledApp={isInstalledApp} + appId={appId} installedAppInfo={installedAppInfo} isError={task?.status === TaskStatus.failed} promptConfig={promptConfig} @@ -434,6 +441,8 @@ const TextGeneration: FC = ({ isShowTextToSpeech={!!textToSpeechConfig?.enabled} siteInfo={siteInfo} onRunStart={() => setResultExisted(true)} + onRunControlChange={!isCallBatchAPI ? setRunControl : undefined} + hideInlineStopButton={!isCallBatchAPI} />) const renderBatchRes = () => { @@ -565,6 +574,7 @@ const TextGeneration: FC = ({ onSend={handleSend} visionConfig={visionConfig} onVisionFilesChange={setCompletionFiles} + runControl={runControl} />
diff --git a/web/app/components/share/text-generation/result/index.tsx b/web/app/components/share/text-generation/result/index.tsx index 7d21df448d..8cf5494bc9 100644 --- a/web/app/components/share/text-generation/result/index.tsx +++ b/web/app/components/share/text-generation/result/index.tsx @@ -1,13 +1,16 @@ 'use client' import type { FC } from 'react' -import React, { useEffect, useRef, useState } from 'react' +import React, { useCallback, useEffect, useRef, useState } from 'react' import { useBoolean } from 'ahooks' import { t } from 'i18next' import { produce } from 'immer' import TextGenerationRes from '@/app/components/app/text-generate/item' import NoData from '@/app/components/share/text-generation/no-data' import Toast from '@/app/components/base/toast' -import { sendCompletionMessage, sendWorkflowMessage, updateFeedback } from '@/service/share' +import Button from '@/app/components/base/button' +import { StopCircle } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices' +import { RiLoader2Line } from '@remixicon/react' +import { sendCompletionMessage, sendWorkflowMessage, stopChatMessageResponding, stopWorkflowMessage, updateFeedback } from '@/service/share' import type { FeedbackType } from '@/app/components/base/chat/chat/type' import Loading from '@/app/components/base/loading' import type { PromptConfig } from '@/models/debug' @@ -31,6 +34,7 @@ export type IResultProps = { isPC: boolean isMobile: boolean isInstalledApp: boolean + appId: string installedAppInfo?: InstalledApp isError: boolean isShowTextToSpeech: boolean @@ -48,6 +52,8 @@ export type IResultProps = { completionFiles: VisionFile[] siteInfo: SiteInfo | null onRunStart: () => void + onRunControlChange?: (control: { onStop: () => Promise | void; isStopping: boolean } | null) => void + hideInlineStopButton?: boolean } const Result: FC = ({ @@ -56,6 +62,7 @@ const Result: FC = ({ isPC, isMobile, isInstalledApp, + appId, installedAppInfo, isError, isShowTextToSpeech, @@ -73,13 +80,10 @@ const Result: FC = ({ completionFiles, siteInfo, onRunStart, + onRunControlChange, + hideInlineStopButton = false, }) => { const [isResponding, { setTrue: setRespondingTrue, setFalse: setRespondingFalse }] = useBoolean(false) - useEffect(() => { - if (controlStopResponding) - setRespondingFalse() - }, [controlStopResponding]) - const [completionRes, doSetCompletionRes] = useState('') const completionResRef = useRef('') const setCompletionRes = (res: string) => { @@ -94,6 +98,29 @@ const Result: FC = ({ doSetWorkflowProcessData(data) } const getWorkflowProcessData = () => workflowProcessDataRef.current + const [currentTaskId, setCurrentTaskId] = useState(null) + const [isStopping, setIsStopping] = useState(false) + const abortControllerRef = useRef(null) + const resetRunState = useCallback(() => { + setCurrentTaskId(null) + setIsStopping(false) + abortControllerRef.current = null + onRunControlChange?.(null) + }, [onRunControlChange]) + + useEffect(() => { + const abortCurrentRequest = () => { + abortControllerRef.current?.abort() + } + + if (controlStopResponding) { + abortCurrentRequest() + setRespondingFalse() + resetRunState() + } + + return abortCurrentRequest + }, [controlStopResponding, resetRunState, setRespondingFalse]) const { notify } = Toast const isNoData = !completionRes @@ -112,6 +139,40 @@ const Result: FC = ({ notify({ type: 'error', message }) } + const handleStop = useCallback(async () => { + if (!currentTaskId || isStopping) + return + setIsStopping(true) + try { + if (isWorkflow) + await stopWorkflowMessage(appId, currentTaskId, isInstalledApp, installedAppInfo?.id || '') + else + await stopChatMessageResponding(appId, currentTaskId, isInstalledApp, installedAppInfo?.id || '') + abortControllerRef.current?.abort() + } + catch (error) { + const message = error instanceof Error ? error.message : String(error) + notify({ type: 'error', message }) + } + finally { + setIsStopping(false) + } + }, [appId, currentTaskId, installedAppInfo?.id, isInstalledApp, isStopping, isWorkflow, notify]) + + useEffect(() => { + if (!onRunControlChange) + return + if (isResponding && currentTaskId) { + onRunControlChange({ + onStop: handleStop, + isStopping, + }) + } + else { + onRunControlChange(null) + } + }, [currentTaskId, handleStop, isResponding, isStopping, onRunControlChange]) + const checkCanSend = () => { // batch will check outer if (isCallBatchAPI) @@ -196,6 +257,7 @@ const Result: FC = ({ rating: null, }) setCompletionRes('') + resetRunState() let res: string[] = [] let tempMessageId = '' @@ -213,6 +275,7 @@ const Result: FC = ({ if (!isEnd) { setRespondingFalse() onCompleted(getCompletionRes(), taskId, false) + resetRunState() isTimeout = true } })() @@ -221,8 +284,10 @@ const Result: FC = ({ sendWorkflowMessage( data, { - onWorkflowStarted: ({ workflow_run_id }) => { + onWorkflowStarted: ({ workflow_run_id, task_id }) => { tempMessageId = workflow_run_id + setCurrentTaskId(task_id || null) + setIsStopping(false) setWorkflowProcessData({ status: WorkflowRunningStatus.Running, tracing: [], @@ -330,12 +395,38 @@ const Result: FC = ({ notify({ type: 'warning', message: t('appDebug.warningMessage.timeoutExceeded') }) return } + const workflowStatus = data.status as WorkflowRunningStatus | undefined + const markNodesStopped = (traces?: WorkflowProcess['tracing']) => { + if (!traces) + return + const markTrace = (trace: WorkflowProcess['tracing'][number]) => { + if ([NodeRunningStatus.Running, NodeRunningStatus.Waiting].includes(trace.status as NodeRunningStatus)) + trace.status = NodeRunningStatus.Stopped + trace.details?.forEach(detailGroup => detailGroup.forEach(markTrace)) + trace.retryDetail?.forEach(markTrace) + trace.parallelDetail?.children?.forEach(markTrace) + } + traces.forEach(markTrace) + } + if (workflowStatus === WorkflowRunningStatus.Stopped) { + setWorkflowProcessData(produce(getWorkflowProcessData()!, (draft) => { + draft.status = WorkflowRunningStatus.Stopped + markNodesStopped(draft.tracing) + })) + setRespondingFalse() + resetRunState() + onCompleted(getCompletionRes(), taskId, false) + isEnd = true + return + } if (data.error) { notify({ type: 'error', message: data.error }) setWorkflowProcessData(produce(getWorkflowProcessData()!, (draft) => { draft.status = WorkflowRunningStatus.Failed + markNodesStopped(draft.tracing) })) setRespondingFalse() + resetRunState() onCompleted(getCompletionRes(), taskId, false) isEnd = true return @@ -357,6 +448,7 @@ const Result: FC = ({ } } setRespondingFalse() + resetRunState() setMessageId(tempMessageId) onCompleted(getCompletionRes(), taskId, true) isEnd = true @@ -376,12 +468,19 @@ const Result: FC = ({ }, isInstalledApp, installedAppInfo?.id, - ) + ).catch((error) => { + setRespondingFalse() + resetRunState() + const message = error instanceof Error ? error.message : String(error) + notify({ type: 'error', message }) + }) } else { sendCompletionMessage(data, { - onData: (data: string, _isFirstMessage: boolean, { messageId }) => { + onData: (data: string, _isFirstMessage: boolean, { messageId, taskId }) => { tempMessageId = messageId + if (taskId && typeof taskId === 'string' && taskId.trim() !== '') + setCurrentTaskId(prev => prev ?? taskId) res.push(data) setCompletionRes(res.join('')) }, @@ -391,6 +490,7 @@ const Result: FC = ({ return } setRespondingFalse() + resetRunState() setMessageId(tempMessageId) onCompleted(getCompletionRes(), taskId, true) isEnd = true @@ -405,9 +505,13 @@ const Result: FC = ({ return } setRespondingFalse() + resetRunState() onCompleted(getCompletionRes(), taskId, false) isEnd = true }, + getAbortController: (abortController) => { + abortControllerRef.current = abortController + }, }, isInstalledApp, installedAppInfo?.id) } } @@ -426,28 +530,46 @@ const Result: FC = ({ }, [controlRetry]) const renderTextGenerationRes = () => ( - + <> + {!hideInlineStopButton && isResponding && currentTaskId && ( +
+ +
+ )} + + ) return ( diff --git a/web/app/components/share/text-generation/run-once/index.tsx b/web/app/components/share/text-generation/run-once/index.tsx index d24428f32a..379d885ff1 100644 --- a/web/app/components/share/text-generation/run-once/index.tsx +++ b/web/app/components/share/text-generation/run-once/index.tsx @@ -3,6 +3,7 @@ import { useEffect, useState } from 'react' import React, { useCallback } from 'react' import { useTranslation } from 'react-i18next' import { + RiLoader2Line, RiPlayLargeLine, } from '@remixicon/react' import Select from '@/app/components/base/select' @@ -20,6 +21,7 @@ import cn from '@/utils/classnames' import BoolInput from '@/app/components/workflow/nodes/_base/components/before-run-form/bool-input' import CodeEditor from '@/app/components/workflow/nodes/_base/components/editor/code-editor' import { CodeLanguage } from '@/app/components/workflow/nodes/code/types' +import { StopCircle } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices' export type IRunOnceProps = { siteInfo: SiteInfo @@ -30,6 +32,10 @@ export type IRunOnceProps = { onSend: () => void visionConfig: VisionSettings onVisionFilesChange: (files: VisionFile[]) => void + runControl?: { + onStop: () => Promise | void + isStopping: boolean + } | null } const RunOnce: FC = ({ promptConfig, @@ -39,6 +45,7 @@ const RunOnce: FC = ({ onSend, visionConfig, onVisionFilesChange, + runControl, }) => { const { t } = useTranslation() const media = useBreakpoints() @@ -62,6 +69,14 @@ const RunOnce: FC = ({ e.preventDefault() onSend() } + const isRunning = !!runControl + const stopLabel = t('share.generation.stopRun', { defaultValue: 'Stop Run' }) + const handlePrimaryClick = useCallback((e: React.MouseEvent) => { + if (!isRunning) + return + e.preventDefault() + runControl?.onStop?.() + }, [isRunning, runControl]) const handleInputsChange = useCallback((newInputs: Record) => { onInputsChange(newInputs) @@ -100,7 +115,10 @@ const RunOnce: FC = ({ : promptConfig.prompt_variables.map(item => (
{item.type !== 'checkbox' && ( - +
+
{item.name}
+ {!item.required && {t('workflow.panel.optional')}} +
)}
{item.type === 'select' && ( @@ -115,7 +133,7 @@ const RunOnce: FC = ({ {item.type === 'string' && ( ) => { handleInputsChange({ ...inputsRef.current, [item.key]: e.target.value }) }} maxLength={item.max_length || DEFAULT_VALUE_MAX_LEN} @@ -124,7 +142,7 @@ const RunOnce: FC = ({ {item.type === 'paragraph' && (